2 net_packet.c -- Handles in- and outgoing VPN packets
3 Copyright (C) 2014 Guus Sliepen <guus@meshlink.io>
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License along
16 with this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
28 #include "connection.h"
33 #include "meshlink_internal.h"
43 static void send_udppacket(node_t *, vpn_packet_t *);
45 unsigned replaywin = 16;
46 bool localdiscovery = false;
47 sockaddr_t localdiscovery_address;
49 #define MAX_SEQNO 1073741824
51 /* mtuprobes == 1..30: initial discovery, send bursts with 1 second interval
52 mtuprobes == 31: sleep pinginterval seconds
53 mtuprobes == 32: send 1 burst, sleep pingtimeout second
54 mtuprobes == 33: no response from other side, restart PMTU discovery process
56 Probes are sent in batches of at least three, with random sizes between the
57 lower and upper boundaries for the MTU thus far discovered.
59 After the initial discovery, a fourth packet is added to each batch with a
60 size larger than the currently known PMTU, to test if the PMTU has increased.
62 In case local discovery is enabled, another packet is added to each batch,
63 which will be broadcast to the local network.
67 static void send_mtu_probe_handler(void *data) {
73 if(!n->status.reachable || !n->status.validkey) {
74 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send MTU probe to unreachable or rekeying node %s (%s)", n->name, n->hostname);
79 if(n->mtuprobes > 32) {
82 timeout = pinginterval;
86 logger(DEBUG_TRAFFIC, LOG_INFO, "%s (%s) did not respond to UDP ping, restarting PMTU discovery", n->name, n->hostname);
87 n->status.udp_confirmed = false;
93 if(n->mtuprobes >= 10 && n->mtuprobes < 32 && !n->minmtu) {
94 logger(DEBUG_TRAFFIC, LOG_INFO, "No response to MTU probes from %s (%s)", n->name, n->hostname);
98 if(n->mtuprobes == 30 || (n->mtuprobes < 30 && n->minmtu >= n->maxmtu)) {
99 if(n->minmtu > n->maxmtu)
100 n->minmtu = n->maxmtu;
102 n->maxmtu = n->minmtu;
104 logger(DEBUG_TRAFFIC, LOG_INFO, "Fixing MTU of %s (%s) to %d after %d probes", n->name, n->hostname, n->mtu, n->mtuprobes);
108 if(n->mtuprobes == 31) {
109 timeout = pinginterval;
111 } else if(n->mtuprobes == 32) {
112 timeout = pingtimeout;
115 for(int i = 0; i < 4 + localdiscovery; i++) {
119 if(n->mtuprobes < 30 || n->maxmtu + 8 >= MTU)
122 } else if(n->maxmtu <= n->minmtu) {
125 len = n->minmtu + 1 + rand() % (n->maxmtu - n->minmtu);
132 memset(packet.data, 0, 14);
133 randomize(packet.data + 14, len - 14);
135 n->status.broadcast = i >= 4 && n->mtuprobes <= 10 && n->prevedge;
137 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending MTU probe length %d to %s (%s)", len, n->name, n->hostname);
139 send_udppacket(n, &packet);
142 n->status.broadcast = false;
143 n->probe_counter = 0;
144 gettimeofday(&n->probe_time, NULL);
146 /* Calculate the packet loss of incoming traffic by comparing the rate of
147 packets received to the rate with which the sequence number has increased.
150 if(n->received > n->prev_received)
151 n->packetloss = 1.0 - (n->received - n->prev_received) / (float)(n->received_seqno - n->prev_received_seqno);
153 n->packetloss = n->received_seqno <= n->prev_received_seqno;
155 n->prev_received_seqno = n->received_seqno;
156 n->prev_received = n->received;
159 timeout_set(&n->mtutimeout, &(struct timeval){timeout, rand() % 100000});
162 void send_mtu_probe(node_t *n) {
163 timeout_add(&n->mtutimeout, send_mtu_probe_handler, n, &(struct timeval){1, 0});
164 send_mtu_probe_handler(n);
167 static void mtu_probe_h(node_t *n, vpn_packet_t *packet, uint16_t len) {
168 logger(DEBUG_TRAFFIC, LOG_INFO, "Got MTU probe length %d from %s (%s)", packet->len, n->name, n->hostname);
170 if(!packet->data[0]) {
171 /* It's a probe request, send back a reply */
175 /* Temporarily set udp_confirmed, so that the reply is sent
176 back exactly the way it came in. */
178 bool udp_confirmed = n->status.udp_confirmed;
179 n->status.udp_confirmed = true;
180 send_udppacket(n, packet);
181 n->status.udp_confirmed = udp_confirmed;
183 /* It's a valid reply: now we know bidirectional communication
184 is possible using the address and socket that the reply
187 n->status.udp_confirmed = true;
189 /* If we haven't established the PMTU yet, restart the discovery process. */
191 if(n->mtuprobes > 30) {
192 if (len == n->maxmtu + 8) {
193 logger(DEBUG_TRAFFIC, LOG_INFO, "Increase in PMTU to %s (%s) detected, restarting PMTU discovery", n->name, n->hostname);
205 /* If applicable, raise the minimum supported MTU */
212 /* Calculate RTT and bandwidth.
213 The RTT is the time between the MTU probe burst was sent and the first
214 reply is received. The bandwidth is measured using the time between the
215 arrival of the first and third probe reply.
218 struct timeval now, diff;
219 gettimeofday(&now, NULL);
220 timersub(&now, &n->probe_time, &diff);
224 if(n->probe_counter == 1) {
225 n->rtt = diff.tv_sec + diff.tv_usec * 1e-6;
227 } else if(n->probe_counter == 3) {
228 n->bandwidth = 2.0 * len / (diff.tv_sec + diff.tv_usec * 1e-6);
229 logger(DEBUG_TRAFFIC, LOG_DEBUG, "%s (%s) RTT %.2f ms, burst bandwidth %.3f Mbit/s, rx packet loss %.2f %%", n->name, n->hostname, n->rtt * 1e3, n->bandwidth * 8e-6, n->packetloss * 1e2);
234 static uint16_t compress_packet(uint8_t *dest, const uint8_t *source, uint16_t len, int level) {
236 memcpy(dest, source, len);
238 } else if(level == 10) {
240 } else if(level < 10) {
242 unsigned long destlen = MAXSIZE;
243 if(compress2(dest, &destlen, source, len, level) == Z_OK)
255 static uint16_t uncompress_packet(uint8_t *dest, const uint8_t *source, uint16_t len, int level) {
257 memcpy(dest, source, len);
259 } else if(level > 9) {
264 unsigned long destlen = MAXSIZE;
265 if(uncompress(dest, &destlen, source, len) == Z_OK)
277 static void receive_packet(node_t *n, vpn_packet_t *packet) {
278 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Received packet of %d bytes from %s (%s)",
279 packet->len, n->name, n->hostname);
282 n->in_bytes += packet->len;
287 static bool try_mac(node_t *n, const vpn_packet_t *inpkt) {
288 return sptps_verify_datagram(&n->sptps, inpkt->data, inpkt->len);
291 static void receive_udppacket(node_t *n, vpn_packet_t *inpkt) {
292 if(!n->sptps.state) {
293 if(!n->status.waitingforkey) {
294 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but we haven't exchanged keys yet", n->name, n->hostname);
297 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
301 sptps_receive_data(&n->sptps, inpkt->data, inpkt->len);
304 void receive_tcppacket(connection_t *c, const char *buffer, int len) {
307 if(len > sizeof outpkt.data)
312 memcpy(outpkt.data, buffer, len);
314 receive_packet(c->node, &outpkt);
317 static void send_sptps_packet(node_t *n, vpn_packet_t *origpkt) {
318 if(!n->status.validkey) {
319 logger(DEBUG_TRAFFIC, LOG_INFO, "No valid key known yet for %s (%s)", n->name, n->hostname);
320 if(!n->status.waitingforkey)
322 else if(n->last_req_key + 10 < now.tv_sec) {
323 logger(DEBUG_ALWAYS, LOG_DEBUG, "No key from %s after 10 seconds, restarting SPTPS", n->name);
324 sptps_stop(&n->sptps);
325 n->status.waitingforkey = false;
333 // If it's a probe, send it immediately without trying to compress it.
335 sptps_send_record(&n->sptps, PKT_PROBE, origpkt->data, origpkt->len);
341 if(n->outcompression) {
342 int len = compress_packet(outpkt.data, origpkt->data, origpkt->len, n->outcompression);
344 logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)", n->name, n->hostname);
345 } else if(len < origpkt->len) {
348 type |= PKT_COMPRESSED;
352 sptps_send_record(&n->sptps, type, origpkt->data, origpkt->len);
356 static void choose_udp_address(const node_t *n, const sockaddr_t **sa, int *sock) {
361 /* If the UDP address is confirmed, use it. */
362 if(n->status.udp_confirmed)
365 /* Send every third packet to n->address; that could be set
366 to the node's reflexive UDP address discovered during key
375 /* Otherwise, address are found in edges to this node.
376 So we pick a random edge and a random socket. */
379 int j = rand() % n->edge_tree->count;
380 edge_t *candidate = NULL;
382 for splay_each(edge_t, e, n->edge_tree) {
384 candidate = e->reverse;
390 *sa = &candidate->address;
391 *sock = rand() % listen_sockets;
394 /* Make sure we have a suitable socket for the chosen address */
395 if(listen_socket[*sock].sa.sa.sa_family != (*sa)->sa.sa_family) {
396 for(int i = 0; i < listen_sockets; i++) {
397 if(listen_socket[i].sa.sa.sa_family == (*sa)->sa.sa_family) {
405 static void choose_broadcast_address(const node_t *n, const sockaddr_t **sa, int *sock) {
406 static sockaddr_t broadcast_ipv4 = {
408 .sin_family = AF_INET,
409 .sin_addr.s_addr = -1,
413 static sockaddr_t broadcast_ipv6 = {
415 .sin6_family = AF_INET6,
416 .sin6_addr.s6_addr[0x0] = 0xff,
417 .sin6_addr.s6_addr[0x1] = 0x02,
418 .sin6_addr.s6_addr[0xf] = 0x01,
422 *sock = rand() % listen_sockets;
424 if(listen_socket[*sock].sa.sa.sa_family == AF_INET6) {
425 if(localdiscovery_address.sa.sa_family == AF_INET6) {
426 localdiscovery_address.in6.sin6_port = n->prevedge->address.in.sin_port;
427 *sa = &localdiscovery_address;
429 broadcast_ipv6.in6.sin6_port = n->prevedge->address.in.sin_port;
430 broadcast_ipv6.in6.sin6_scope_id = listen_socket[*sock].sa.in6.sin6_scope_id;
431 *sa = &broadcast_ipv6;
434 if(localdiscovery_address.sa.sa_family == AF_INET) {
435 localdiscovery_address.in.sin_port = n->prevedge->address.in.sin_port;
436 *sa = &localdiscovery_address;
438 broadcast_ipv4.in.sin_port = n->prevedge->address.in.sin_port;
439 *sa = &broadcast_ipv4;
444 static void send_udppacket(node_t *n, vpn_packet_t *origpkt) {
445 vpn_packet_t pkt1, pkt2;
446 vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
447 vpn_packet_t *inpkt = origpkt;
449 vpn_packet_t *outpkt;
450 int origlen = origpkt->len;
453 if(!n->status.reachable) {
454 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send UDP packet to unreachable node %s (%s)", n->name, n->hostname);
458 return send_sptps_packet(n, origpkt);
461 bool send_sptps_data(void *handle, uint8_t type, const char *data, size_t len) {
464 /* Send it via TCP if it is a handshake packet, TCPOnly is in use, or this packet is larger than the MTU. */
466 if(type >= SPTPS_HANDSHAKE || ((mesh->self->options | to->options) & OPTION_TCPONLY) || (type != PKT_PROBE && len > to->minmtu)) {
467 char buf[len * 4 / 3 + 5];
468 b64encode(data, buf, len);
469 /* If no valid key is known yet, send the packets using ANS_KEY requests,
470 to ensure we get to learn the reflexive UDP address. */
471 if(!to->status.validkey) {
472 to->incompression = mesh->self->incompression;
473 return send_request(to->nexthop->connection, "%d %s %s %s -1 -1 -1 %d", ANS_KEY, mesh->self->name, to->name, buf, to->incompression);
475 return send_request(to->nexthop->connection, "%d %s %s %d %s", REQ_KEY, mesh->self->name, to->name, REQ_SPTPS, buf);
479 /* Otherwise, send the packet via UDP */
481 const sockaddr_t *sa;
484 if(to->status.broadcast)
485 choose_broadcast_address(to, &sa, &sock);
487 choose_udp_address(to, &sa, &sock);
489 if(sendto(listen_socket[sock].udp.fd, data, len, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
490 if(sockmsgsize(sockerrno)) {
491 if(to->maxmtu >= len)
492 to->maxmtu = len - 1;
496 logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending UDP SPTPS packet to %s (%s): %s", to->name, to->hostname, sockstrerror(sockerrno));
504 bool receive_sptps_record(void *handle, uint8_t type, const char *data, uint16_t len) {
505 node_t *from = handle;
507 if(type == SPTPS_HANDSHAKE) {
508 if(!from->status.validkey) {
509 from->status.validkey = true;
510 from->status.waitingforkey = false;
511 logger(DEBUG_META, LOG_INFO, "SPTPS key exchange with %s (%s) succesful", from->name, from->hostname);
517 logger(DEBUG_ALWAYS, LOG_ERR, "Packet from %s (%s) larger than maximum supported size (%d > %d)", from->name, from->hostname, len, MTU);
523 if(type == PKT_PROBE) {
526 memcpy(inpkt.data, data, len);
527 mtu_probe_h(from, &inpkt, len);
533 if(type & ~(PKT_COMPRESSED)) {
534 logger(DEBUG_ALWAYS, LOG_ERR, "Unexpected SPTPS record type %d len %d from %s (%s)", type, len, from->name, from->hostname);
538 if(type & PKT_COMPRESSED) {
539 uint16_t ulen = uncompress_packet(inpkt.data, (const uint8_t *)data, len, from->incompression);
545 if(inpkt.len > MAXSIZE)
548 memcpy(inpkt.data, data, len);
552 receive_packet(from, &inpkt);
557 send a packet to the given vpn ip.
559 void send_packet(node_t *n, vpn_packet_t *packet) {
562 if(n == mesh->self) {
564 n->out_bytes += packet->len;
565 // TODO: send to application
569 logger(DEBUG_TRAFFIC, LOG_ERR, "Sending packet of %d bytes to %s (%s)",
570 packet->len, n->name, n->hostname);
572 if(!n->status.reachable) {
573 logger(DEBUG_TRAFFIC, LOG_INFO, "Node %s (%s) is not reachable",
574 n->name, n->hostname);
579 n->out_bytes += packet->len;
581 send_sptps_packet(n, packet);
585 /* Broadcast a packet using the minimum spanning tree */
587 void broadcast_packet(const node_t *from, vpn_packet_t *packet) {
588 // Always give ourself a copy of the packet.
589 if(from != mesh->self)
590 send_packet(mesh->self, packet);
592 logger(DEBUG_TRAFFIC, LOG_INFO, "Broadcasting packet of %d bytes from %s (%s)",
593 packet->len, from->name, from->hostname);
595 for list_each(connection_t, c, connection_list)
596 if(c->status.active && c->status.mst && c != from->nexthop->connection)
597 send_packet(c->node, packet);
600 static node_t *try_harder(const sockaddr_t *from, const vpn_packet_t *pkt) {
603 static time_t last_hard_try = 0;
605 for splay_each(edge_t, e, mesh->edges) {
606 if(!e->to->status.reachable || e->to == mesh->self)
609 if(sockaddrcmp_noport(from, &e->address)) {
610 if(last_hard_try == now.tv_sec)
615 if(!try_mac(e->to, pkt))
623 last_hard_try = now.tv_sec;
625 last_hard_try = now.tv_sec;
629 void handle_incoming_vpn_data(void *data, int flags) {
630 listen_socket_t *ls = data;
633 sockaddr_t from = {{0}};
634 socklen_t fromlen = sizeof from;
638 len = recvfrom(ls->udp.fd, pkt.data, MAXSIZE, 0, &from.sa, &fromlen);
640 if(len <= 0 || len > MAXSIZE) {
641 if(!sockwouldblock(sockerrno))
642 logger(DEBUG_ALWAYS, LOG_ERR, "Receiving packet failed: %s", sockstrerror(sockerrno));
648 sockaddrunmap(&from); /* Some braindead IPv6 implementations do stupid things. */
650 n = lookup_node_udp(&from);
653 n = try_harder(&from, &pkt);
655 update_node_udp(n, &from);
656 else if(debug_level >= DEBUG_PROTOCOL) {
657 hostname = sockaddr2hostname(&from);
658 logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet from unknown source %s", hostname);
666 n->sock = ls - listen_socket;
668 receive_udppacket(n, &pkt);