2 net_packet.c -- Handles in- and outgoing VPN packets
3 Copyright (C) 2014 Guus Sliepen <guus@meshlink.io>
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License along
16 with this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 #include "connection.h"
31 #include "meshlink_internal.h"
41 static void send_udppacket(meshlink_handle_t *mesh, node_t *, vpn_packet_t *);
43 unsigned replaywin = 16;
45 #define MAX_SEQNO 1073741824
47 /* mtuprobes == 1..30: initial discovery, send bursts with 1 second interval
48 mtuprobes == 31: sleep pinginterval seconds
49 mtuprobes == 32: send 1 burst, sleep pingtimeout second
50 mtuprobes == 33: no response from other side, restart PMTU discovery process
52 Probes are sent in batches of at least three, with random sizes between the
53 lower and upper boundaries for the MTU thus far discovered.
55 After the initial discovery, a fourth packet is added to each batch with a
56 size larger than the currently known PMTU, to test if the PMTU has increased.
58 In case local discovery is enabled, another packet is added to each batch,
59 which will be broadcast to the local network.
63 static void send_mtu_probe_handler(event_loop_t *loop, void *data) {
64 meshlink_handle_t *mesh = loop->data;
70 if(!n->status.reachable || !n->status.validkey) {
71 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send MTU probe to unreachable or rekeying node %s (%s)", n->name, n->hostname);
76 if(n->mtuprobes > 32) {
79 timeout = mesh->pinginterval;
83 logger(DEBUG_TRAFFIC, LOG_INFO, "%s (%s) did not respond to UDP ping, restarting PMTU discovery", n->name, n->hostname);
84 n->status.udp_confirmed = false;
90 if(n->mtuprobes >= 10 && n->mtuprobes < 32 && !n->minmtu) {
91 logger(DEBUG_TRAFFIC, LOG_INFO, "No response to MTU probes from %s (%s)", n->name, n->hostname);
95 if(n->mtuprobes == 30 || (n->mtuprobes < 30 && n->minmtu >= n->maxmtu)) {
96 if(n->minmtu > n->maxmtu)
97 n->minmtu = n->maxmtu;
99 n->maxmtu = n->minmtu;
101 logger(DEBUG_TRAFFIC, LOG_INFO, "Fixing MTU of %s (%s) to %d after %d probes", n->name, n->hostname, n->mtu, n->mtuprobes);
105 if(n->mtuprobes == 31) {
106 timeout = mesh->pinginterval;
108 } else if(n->mtuprobes == 32) {
109 timeout = mesh->pingtimeout;
112 for(int i = 0; i < 4 + mesh->localdiscovery; i++) {
116 if(n->mtuprobes < 30 || n->maxmtu + 8 >= MTU)
119 } else if(n->maxmtu <= n->minmtu) {
122 len = n->minmtu + 1 + rand() % (n->maxmtu - n->minmtu);
130 memset(packet.data, 0, 14);
131 randomize(packet.data + 14, len - 14);
133 n->status.broadcast = i >= 4 && n->mtuprobes <= 10 && n->prevedge;
135 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending MTU probe length %d to %s (%s)", len, n->name, n->hostname);
137 send_udppacket(mesh, n, &packet);
140 n->status.broadcast = false;
141 n->probe_counter = 0;
142 gettimeofday(&n->probe_time, NULL);
144 /* Calculate the packet loss of incoming traffic by comparing the rate of
145 packets received to the rate with which the sequence number has increased.
148 if(n->received > n->prev_received)
149 n->packetloss = 1.0 - (n->received - n->prev_received) / (float)(n->received_seqno - n->prev_received_seqno);
151 n->packetloss = n->received_seqno <= n->prev_received_seqno;
153 n->prev_received_seqno = n->received_seqno;
154 n->prev_received = n->received;
157 timeout_set(&mesh->loop, &n->mtutimeout, &(struct timeval){timeout, rand() % 100000});
160 void send_mtu_probe(meshlink_handle_t *mesh, node_t *n) {
161 timeout_add(&mesh->loop, &n->mtutimeout, send_mtu_probe_handler, n, &(struct timeval){1, 0});
162 send_mtu_probe_handler(&mesh->loop, n);
165 static void mtu_probe_h(meshlink_handle_t *mesh, node_t *n, vpn_packet_t *packet, uint16_t len) {
166 logger(DEBUG_TRAFFIC, LOG_INFO, "Got MTU probe length %d from %s (%s)", packet->len, n->name, n->hostname);
168 if(!packet->data[0]) {
169 /* It's a probe request, send back a reply */
173 /* Temporarily set udp_confirmed, so that the reply is sent
174 back exactly the way it came in. */
176 bool udp_confirmed = n->status.udp_confirmed;
177 n->status.udp_confirmed = true;
178 send_udppacket(mesh, n, packet);
179 n->status.udp_confirmed = udp_confirmed;
181 /* It's a valid reply: now we know bidirectional communication
182 is possible using the address and socket that the reply
185 n->status.udp_confirmed = true;
187 /* If we haven't established the PMTU yet, restart the discovery process. */
189 if(n->mtuprobes > 30) {
190 if (len == n->maxmtu + 8) {
191 logger(DEBUG_TRAFFIC, LOG_INFO, "Increase in PMTU to %s (%s) detected, restarting PMTU discovery", n->name, n->hostname);
203 /* If applicable, raise the minimum supported MTU */
210 /* Calculate RTT and bandwidth.
211 The RTT is the time between the MTU probe burst was sent and the first
212 reply is received. The bandwidth is measured using the time between the
213 arrival of the first and third probe reply.
216 struct timeval now, diff;
217 gettimeofday(&now, NULL);
218 timersub(&now, &n->probe_time, &diff);
222 if(n->probe_counter == 1) {
223 n->rtt = diff.tv_sec + diff.tv_usec * 1e-6;
225 } else if(n->probe_counter == 3) {
226 n->bandwidth = 2.0 * len / (diff.tv_sec + diff.tv_usec * 1e-6);
227 logger(DEBUG_TRAFFIC, LOG_DEBUG, "%s (%s) RTT %.2f ms, burst bandwidth %.3f Mbit/s, rx packet loss %.2f %%", n->name, n->hostname, n->rtt * 1e3, n->bandwidth * 8e-6, n->packetloss * 1e2);
232 static uint16_t compress_packet(uint8_t *dest, const uint8_t *source, uint16_t len, int level) {
234 memcpy(dest, source, len);
236 } else if(level == 10) {
238 } else if(level < 10) {
240 unsigned long destlen = MAXSIZE;
241 if(compress2(dest, &destlen, source, len, level) == Z_OK)
253 static uint16_t uncompress_packet(uint8_t *dest, const uint8_t *source, uint16_t len, int level) {
255 memcpy(dest, source, len);
257 } else if(level > 9) {
262 unsigned long destlen = MAXSIZE;
263 if(uncompress(dest, &destlen, source, len) == Z_OK)
275 static void receive_packet(meshlink_handle_t *mesh, node_t *n, vpn_packet_t *packet) {
276 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Received packet of %d bytes from %s (%s)",
277 packet->len, n->name, n->hostname);
280 n->in_bytes += packet->len;
282 route(mesh, n, packet);
285 static bool try_mac(meshlink_handle_t *mesh, node_t *n, const vpn_packet_t *inpkt) {
286 return sptps_verify_datagram(&n->sptps, inpkt->data, inpkt->len);
289 static void receive_udppacket(meshlink_handle_t *mesh, node_t *n, vpn_packet_t *inpkt) {
290 if(!n->sptps.state) {
291 if(!n->status.waitingforkey) {
292 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but we haven't exchanged keys yet", n->name, n->hostname);
293 send_req_key(mesh, n);
295 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
299 sptps_receive_data(&n->sptps, inpkt->data, inpkt->len);
302 void receive_tcppacket(meshlink_handle_t *mesh, connection_t *c, const char *buffer, int len) {
305 if(len > sizeof outpkt.data)
310 memcpy(outpkt.data, buffer, len);
312 receive_packet(mesh, c->node, &outpkt);
315 static void send_sptps_packet(meshlink_handle_t *mesh, node_t *n, vpn_packet_t *origpkt) {
316 if(!n->status.validkey) {
317 logger(DEBUG_TRAFFIC, LOG_INFO, "No valid key known yet for %s (%s)", n->name, n->hostname);
318 if(!n->status.waitingforkey)
319 send_req_key(mesh, n);
320 else if(n->last_req_key + 10 < mesh->loop.now.tv_sec) {
321 logger(DEBUG_ALWAYS, LOG_DEBUG, "No key from %s after 10 seconds, restarting SPTPS", n->name);
322 sptps_stop(&n->sptps);
323 n->status.waitingforkey = false;
324 send_req_key(mesh, n);
331 // If it's a probe, send it immediately without trying to compress it.
333 sptps_send_record(&n->sptps, PKT_PROBE, origpkt->data, origpkt->len);
339 if(n->outcompression) {
340 int len = compress_packet(outpkt.data, origpkt->data, origpkt->len, n->outcompression);
342 logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)", n->name, n->hostname);
343 } else if(len < origpkt->len) {
346 type |= PKT_COMPRESSED;
350 sptps_send_record(&n->sptps, type, origpkt->data, origpkt->len);
354 static void choose_udp_address(meshlink_handle_t *mesh, const node_t *n, const sockaddr_t **sa, int *sock) {
359 /* If the UDP address is confirmed, use it. */
360 if(n->status.udp_confirmed)
363 /* Send every third packet to n->address; that could be set
364 to the node's reflexive UDP address discovered during key
373 /* Otherwise, address are found in edges to this node.
374 So we pick a random edge and a random socket. */
377 int j = rand() % n->edge_tree->count;
378 edge_t *candidate = NULL;
380 for splay_each(edge_t, e, n->edge_tree) {
382 candidate = e->reverse;
388 *sa = &candidate->address;
389 *sock = rand() % mesh->listen_sockets;
392 /* Make sure we have a suitable socket for the chosen address */
393 if(mesh->listen_socket[*sock].sa.sa.sa_family != (*sa)->sa.sa_family) {
394 for(int i = 0; i < mesh->listen_sockets; i++) {
395 if(mesh->listen_socket[i].sa.sa.sa_family == (*sa)->sa.sa_family) {
403 static void choose_broadcast_address(meshlink_handle_t *mesh, const node_t *n, const sockaddr_t **sa, int *sock) {
404 static sockaddr_t broadcast_ipv4 = {
406 .sin_family = AF_INET,
407 .sin_addr.s_addr = -1,
411 static sockaddr_t broadcast_ipv6 = {
413 .sin6_family = AF_INET6,
414 .sin6_addr.s6_addr[0x0] = 0xff,
415 .sin6_addr.s6_addr[0x1] = 0x02,
416 .sin6_addr.s6_addr[0xf] = 0x01,
420 *sock = rand() % mesh->listen_sockets;
422 if(mesh->listen_socket[*sock].sa.sa.sa_family == AF_INET6) {
423 if(mesh->localdiscovery_address.sa.sa_family == AF_INET6) {
424 mesh->localdiscovery_address.in6.sin6_port = n->prevedge->address.in.sin_port;
425 *sa = &mesh->localdiscovery_address;
427 broadcast_ipv6.in6.sin6_port = n->prevedge->address.in.sin_port;
428 broadcast_ipv6.in6.sin6_scope_id = mesh->listen_socket[*sock].sa.in6.sin6_scope_id;
429 *sa = &broadcast_ipv6;
432 if(mesh->localdiscovery_address.sa.sa_family == AF_INET) {
433 mesh->localdiscovery_address.in.sin_port = n->prevedge->address.in.sin_port;
434 *sa = &mesh->localdiscovery_address;
436 broadcast_ipv4.in.sin_port = n->prevedge->address.in.sin_port;
437 *sa = &broadcast_ipv4;
442 static void send_udppacket(meshlink_handle_t *mesh, node_t *n, vpn_packet_t *origpkt) {
443 vpn_packet_t pkt1, pkt2;
444 vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
445 vpn_packet_t *inpkt = origpkt;
447 vpn_packet_t *outpkt;
448 int origlen = origpkt->len;
451 if(!n->status.reachable) {
452 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send UDP packet to unreachable node %s (%s)", n->name, n->hostname);
456 return send_sptps_packet(mesh, n, origpkt);
459 bool send_sptps_data(void *handle, uint8_t type, const char *data, size_t len) {
461 meshlink_handle_t *mesh = to->mesh;
463 /* Send it via TCP if it is a handshake packet, TCPOnly is in use, or this packet is larger than the MTU. */
465 if(type >= SPTPS_HANDSHAKE || ((mesh->self->options | to->options) & OPTION_TCPONLY) || (type != PKT_PROBE && len > to->minmtu)) {
466 char buf[len * 4 / 3 + 5];
467 b64encode(data, buf, len);
468 /* If no valid key is known yet, send the packets using ANS_KEY requests,
469 to ensure we get to learn the reflexive UDP address. */
470 if(!to->status.validkey) {
471 to->incompression = mesh->self->incompression;
472 return send_request(mesh, to->nexthop->connection, "%d %s %s %s -1 -1 -1 %d", ANS_KEY, mesh->self->name, to->name, buf, to->incompression);
474 return send_request(mesh, to->nexthop->connection, "%d %s %s %d %s", REQ_KEY, mesh->self->name, to->name, REQ_SPTPS, buf);
478 /* Otherwise, send the packet via UDP */
480 const sockaddr_t *sa;
483 if(to->status.broadcast)
484 choose_broadcast_address(mesh, to, &sa, &sock);
486 choose_udp_address(mesh, to, &sa, &sock);
488 if(sendto(mesh->listen_socket[sock].udp.fd, data, len, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
489 if(sockmsgsize(sockerrno)) {
490 if(to->maxmtu >= len)
491 to->maxmtu = len - 1;
495 logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending UDP SPTPS packet to %s (%s): %s", to->name, to->hostname, sockstrerror(sockerrno));
503 bool receive_sptps_record(void *handle, uint8_t type, const char *data, uint16_t len) {
504 node_t *from = handle;
505 meshlink_handle_t *mesh = from->mesh;
507 if(type == SPTPS_HANDSHAKE) {
508 if(!from->status.validkey) {
509 from->status.validkey = true;
510 from->status.waitingforkey = false;
511 logger(DEBUG_META, LOG_INFO, "SPTPS key exchange with %s (%s) succesful", from->name, from->hostname);
517 logger(DEBUG_ALWAYS, LOG_ERR, "Packet from %s (%s) larger than maximum supported size (%d > %d)", from->name, from->hostname, len, MTU);
523 if(type == PKT_PROBE) {
526 memcpy(inpkt.data, data, len);
527 mtu_probe_h(mesh, from, &inpkt, len);
533 if(type & ~(PKT_COMPRESSED)) {
534 logger(DEBUG_ALWAYS, LOG_ERR, "Unexpected SPTPS record type %d len %d from %s (%s)", type, len, from->name, from->hostname);
538 if(type & PKT_COMPRESSED) {
539 uint16_t ulen = uncompress_packet(inpkt.data, (const uint8_t *)data, len, from->incompression);
545 if(inpkt.len > MAXSIZE)
548 memcpy(inpkt.data, data, len);
552 receive_packet(mesh, from, &inpkt);
557 send a packet to the given vpn ip.
559 void send_packet(meshlink_handle_t *mesh, node_t *n, vpn_packet_t *packet) {
562 if(n == mesh->self) {
564 n->out_bytes += packet->len;
565 // TODO: send to application
569 logger(DEBUG_TRAFFIC, LOG_ERR, "Sending packet of %d bytes to %s (%s)",
570 packet->len, n->name, n->hostname);
572 if(!n->status.reachable) {
573 logger(DEBUG_TRAFFIC, LOG_INFO, "Node %s (%s) is not reachable",
574 n->name, n->hostname);
579 n->out_bytes += packet->len;
581 send_sptps_packet(mesh, n, packet);
585 /* Broadcast a packet using the minimum spanning tree */
587 void broadcast_packet(meshlink_handle_t *mesh, const node_t *from, vpn_packet_t *packet) {
588 // Always give ourself a copy of the packet.
589 if(from != mesh->self)
590 send_packet(mesh, mesh->self, packet);
592 logger(DEBUG_TRAFFIC, LOG_INFO, "Broadcasting packet of %d bytes from %s (%s)",
593 packet->len, from->name, from->hostname);
595 for list_each(connection_t, c, mesh->connections)
596 if(c->status.active && c->status.mst && c != from->nexthop->connection)
597 send_packet(mesh, c->node, packet);
600 static node_t *try_harder(meshlink_handle_t *mesh, const sockaddr_t *from, const vpn_packet_t *pkt) {
603 static time_t last_hard_try = 0;
605 for splay_each(edge_t, e, mesh->edges) {
606 if(!e->to->status.reachable || e->to == mesh->self)
609 if(sockaddrcmp_noport(from, &e->address)) {
610 if(last_hard_try == mesh->loop.now.tv_sec)
615 if(!try_mac(mesh, e->to, pkt))
623 last_hard_try = mesh->loop.now.tv_sec;
625 last_hard_try = mesh->loop.now.tv_sec;
629 void handle_incoming_vpn_data(event_loop_t *loop, void *data, int flags) {
630 meshlink_handle_t *mesh = loop->data;
631 listen_socket_t *ls = data;
634 sockaddr_t from = {{0}};
635 socklen_t fromlen = sizeof from;
639 len = recvfrom(ls->udp.fd, pkt.data, MAXSIZE, 0, &from.sa, &fromlen);
641 if(len <= 0 || len > MAXSIZE) {
642 if(!sockwouldblock(sockerrno))
643 logger(DEBUG_ALWAYS, LOG_ERR, "Receiving packet failed: %s", sockstrerror(sockerrno));
649 sockaddrunmap(&from); /* Some braindead IPv6 implementations do stupid things. */
651 n = lookup_node_udp(mesh, &from);
654 n = try_harder(mesh, &from, &pkt);
656 update_node_udp(mesh, n, &from);
657 else if(mesh->debug_level >= DEBUG_PROTOCOL) {
658 hostname = sockaddr2hostname(&from);
659 logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet from unknown source %s", hostname);
667 n->sock = ls - mesh->listen_socket;
669 receive_udppacket(mesh, n, &pkt);