2 net_packet.c -- Handles in- and outgoing VPN packets
3 Copyright (C) 2014 Guus Sliepen <guus@meshlink.io>
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License along
16 with this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 #include "connection.h"
31 #include "meshlink_internal.h"
41 static void send_udppacket(meshlink_handle_t *mesh, node_t *, vpn_packet_t *);
43 unsigned replaywin = 16;
45 #define MAX_SEQNO 1073741824
47 /* mtuprobes == 1..30: initial discovery, send bursts with 1 second interval
48 mtuprobes == 31: sleep pinginterval seconds
49 mtuprobes == 32: send 1 burst, sleep pingtimeout second
50 mtuprobes == 33: no response from other side, restart PMTU discovery process
52 Probes are sent in batches of at least three, with random sizes between the
53 lower and upper boundaries for the MTU thus far discovered.
55 After the initial discovery, a fourth packet is added to each batch with a
56 size larger than the currently known PMTU, to test if the PMTU has increased.
58 In case local discovery is enabled, another packet is added to each batch,
59 which will be broadcast to the local network.
63 static void send_mtu_probe_handler(event_loop_t *loop, void *data) {
64 meshlink_handle_t *mesh = loop->data;
70 if(!n->status.reachable || !n->status.validkey) {
71 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send MTU probe to unreachable or rekeying node %s (%s)", n->name, n->hostname);
76 if(n->mtuprobes > 32) {
79 timeout = mesh->pinginterval;
83 logger(DEBUG_TRAFFIC, LOG_INFO, "%s (%s) did not respond to UDP ping, restarting PMTU discovery", n->name, n->hostname);
84 n->status.udp_confirmed = false;
90 if(n->mtuprobes >= 10 && n->mtuprobes < 32 && !n->minmtu) {
91 logger(DEBUG_TRAFFIC, LOG_INFO, "No response to MTU probes from %s (%s)", n->name, n->hostname);
95 if(n->mtuprobes == 30 || (n->mtuprobes < 30 && n->minmtu >= n->maxmtu)) {
96 if(n->minmtu > n->maxmtu)
97 n->minmtu = n->maxmtu;
99 n->maxmtu = n->minmtu;
101 logger(DEBUG_TRAFFIC, LOG_INFO, "Fixing MTU of %s (%s) to %d after %d probes", n->name, n->hostname, n->mtu, n->mtuprobes);
105 if(n->mtuprobes == 31) {
106 timeout = mesh->pinginterval;
108 } else if(n->mtuprobes == 32) {
109 timeout = mesh->pingtimeout;
112 for(int i = 0; i < 4 + mesh->localdiscovery; i++) {
116 if(n->mtuprobes < 30 || n->maxmtu + 8 >= MTU)
119 } else if(n->maxmtu <= n->minmtu) {
122 len = n->minmtu + 1 + rand() % (n->maxmtu - n->minmtu);
130 memset(packet.data, 0, 14);
131 randomize(packet.data + 14, len - 14);
133 n->status.broadcast = i >= 4 && n->mtuprobes <= 10 && n->prevedge;
135 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending MTU probe length %d to %s (%s)", len, n->name, n->hostname);
137 send_udppacket(mesh, n, &packet);
140 n->status.broadcast = false;
141 n->probe_counter = 0;
142 gettimeofday(&n->probe_time, NULL);
144 /* Calculate the packet loss of incoming traffic by comparing the rate of
145 packets received to the rate with which the sequence number has increased.
148 if(n->received > n->prev_received)
149 n->packetloss = 1.0 - (n->received - n->prev_received) / (float)(n->received_seqno - n->prev_received_seqno);
151 n->packetloss = n->received_seqno <= n->prev_received_seqno;
153 n->prev_received_seqno = n->received_seqno;
154 n->prev_received = n->received;
157 timeout_set(&mesh->loop, &n->mtutimeout, &(struct timeval){timeout, rand() % 100000});
160 void send_mtu_probe(meshlink_handle_t *mesh, node_t *n) {
161 timeout_add(&mesh->loop, &n->mtutimeout, send_mtu_probe_handler, n, &(struct timeval){1, 0});
162 send_mtu_probe_handler(&mesh->loop, n);
165 static void mtu_probe_h(meshlink_handle_t *mesh, node_t *n, vpn_packet_t *packet, uint16_t len) {
166 logger(DEBUG_TRAFFIC, LOG_INFO, "Got MTU probe length %d from %s (%s)", packet->len, n->name, n->hostname);
168 if(!packet->data[0]) {
169 /* It's a probe request, send back a reply */
173 /* Temporarily set udp_confirmed, so that the reply is sent
174 back exactly the way it came in. */
176 bool udp_confirmed = n->status.udp_confirmed;
177 n->status.udp_confirmed = true;
178 send_udppacket(mesh, n, packet);
179 n->status.udp_confirmed = udp_confirmed;
181 /* It's a valid reply: now we know bidirectional communication
182 is possible using the address and socket that the reply
185 n->status.udp_confirmed = true;
187 /* If we haven't established the PMTU yet, restart the discovery process. */
189 if(n->mtuprobes > 30) {
190 if (len == n->maxmtu + 8) {
191 logger(DEBUG_TRAFFIC, LOG_INFO, "Increase in PMTU to %s (%s) detected, restarting PMTU discovery", n->name, n->hostname);
203 /* If applicable, raise the minimum supported MTU */
210 /* Calculate RTT and bandwidth.
211 The RTT is the time between the MTU probe burst was sent and the first
212 reply is received. The bandwidth is measured using the time between the
213 arrival of the first and third probe reply.
216 struct timeval now, diff;
217 gettimeofday(&now, NULL);
218 timersub(&now, &n->probe_time, &diff);
222 if(n->probe_counter == 1) {
223 n->rtt = diff.tv_sec + diff.tv_usec * 1e-6;
225 } else if(n->probe_counter == 3) {
226 n->bandwidth = 2.0 * len / (diff.tv_sec + diff.tv_usec * 1e-6);
227 logger(DEBUG_TRAFFIC, LOG_DEBUG, "%s (%s) RTT %.2f ms, burst bandwidth %.3f Mbit/s, rx packet loss %.2f %%", n->name, n->hostname, n->rtt * 1e3, n->bandwidth * 8e-6, n->packetloss * 1e2);
232 static uint16_t compress_packet(uint8_t *dest, const uint8_t *source, uint16_t len, int level) {
234 memcpy(dest, source, len);
236 } else if(level == 10) {
238 } else if(level < 10) {
240 unsigned long destlen = MAXSIZE;
241 if(compress2(dest, &destlen, source, len, level) == Z_OK)
253 static uint16_t uncompress_packet(uint8_t *dest, const uint8_t *source, uint16_t len, int level) {
255 memcpy(dest, source, len);
257 } else if(level > 9) {
262 unsigned long destlen = MAXSIZE;
263 if(uncompress(dest, &destlen, source, len) == Z_OK)
275 static void receive_packet(meshlink_handle_t *mesh, node_t *n, vpn_packet_t *packet) {
276 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Received packet of %d bytes from %s (%s)",
277 packet->len, n->name, n->hostname);
280 n->in_bytes += packet->len;
285 static bool try_mac(meshlink_handle_t *mesh, node_t *n, const vpn_packet_t *inpkt) {
286 return sptps_verify_datagram(&n->sptps, inpkt->data, inpkt->len);
289 static void receive_udppacket(meshlink_handle_t *mesh, node_t *n, vpn_packet_t *inpkt) {
290 if(!n->sptps.state) {
291 if(!n->status.waitingforkey) {
292 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but we haven't exchanged keys yet", n->name, n->hostname);
295 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
299 sptps_receive_data(&n->sptps, inpkt->data, inpkt->len);
302 void receive_tcppacket(meshlink_handle_t *mesh, connection_t *c, const char *buffer, int len) {
305 if(len > sizeof outpkt.data)
310 memcpy(outpkt.data, buffer, len);
312 receive_packet(mesh, c->node, &outpkt);
315 static void send_sptps_packet(meshlink_handle_t *mesh, node_t *n, vpn_packet_t *origpkt) {
316 if(!n->status.validkey) {
317 logger(DEBUG_TRAFFIC, LOG_INFO, "No valid key known yet for %s (%s)", n->name, n->hostname);
318 if(!n->status.waitingforkey)
320 else if(n->last_req_key + 10 < mesh->loop.now.tv_sec) {
321 logger(DEBUG_ALWAYS, LOG_DEBUG, "No key from %s after 10 seconds, restarting SPTPS", n->name);
322 sptps_stop(&n->sptps);
323 n->status.waitingforkey = false;
331 // If it's a probe, send it immediately without trying to compress it.
333 sptps_send_record(&n->sptps, PKT_PROBE, origpkt->data, origpkt->len);
339 if(n->outcompression) {
340 int len = compress_packet(outpkt.data, origpkt->data, origpkt->len, n->outcompression);
342 logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)", n->name, n->hostname);
343 } else if(len < origpkt->len) {
346 type |= PKT_COMPRESSED;
350 sptps_send_record(&n->sptps, type, origpkt->data, origpkt->len);
354 static void choose_udp_address(meshlink_handle_t *mesh, const node_t *n, const sockaddr_t **sa, int *sock) {
359 /* If the UDP address is confirmed, use it. */
360 if(n->status.udp_confirmed)
363 /* Send every third packet to n->address; that could be set
364 to the node's reflexive UDP address discovered during key
373 /* Otherwise, address are found in edges to this node.
374 So we pick a random edge and a random socket. */
377 int j = rand() % n->edge_tree->count;
378 edge_t *candidate = NULL;
380 for splay_each(edge_t, e, n->edge_tree) {
382 candidate = e->reverse;
388 *sa = &candidate->address;
389 *sock = rand() % mesh->listen_sockets;
392 /* Make sure we have a suitable socket for the chosen address */
393 if(mesh->listen_socket[*sock].sa.sa.sa_family != (*sa)->sa.sa_family) {
394 for(int i = 0; i < mesh->listen_sockets; i++) {
395 if(mesh->listen_socket[i].sa.sa.sa_family == (*sa)->sa.sa_family) {
403 static void choose_broadcast_address(meshlink_handle_t *mesh, const node_t *n, const sockaddr_t **sa, int *sock) {
404 static sockaddr_t broadcast_ipv4 = {
406 .sin_family = AF_INET,
407 .sin_addr.s_addr = -1,
411 static sockaddr_t broadcast_ipv6 = {
413 .sin6_family = AF_INET6,
414 .sin6_addr.s6_addr[0x0] = 0xff,
415 .sin6_addr.s6_addr[0x1] = 0x02,
416 .sin6_addr.s6_addr[0xf] = 0x01,
420 *sock = rand() % mesh->listen_sockets;
422 if(mesh->listen_socket[*sock].sa.sa.sa_family == AF_INET6) {
423 if(mesh->localdiscovery_address.sa.sa_family == AF_INET6) {
424 mesh->localdiscovery_address.in6.sin6_port = n->prevedge->address.in.sin_port;
425 *sa = &mesh->localdiscovery_address;
427 broadcast_ipv6.in6.sin6_port = n->prevedge->address.in.sin_port;
428 broadcast_ipv6.in6.sin6_scope_id = mesh->listen_socket[*sock].sa.in6.sin6_scope_id;
429 *sa = &broadcast_ipv6;
432 if(mesh->localdiscovery_address.sa.sa_family == AF_INET) {
433 mesh->localdiscovery_address.in.sin_port = n->prevedge->address.in.sin_port;
434 *sa = &mesh->localdiscovery_address;
436 broadcast_ipv4.in.sin_port = n->prevedge->address.in.sin_port;
437 *sa = &broadcast_ipv4;
442 static void send_udppacket(meshlink_handle_t *mesh, node_t *n, vpn_packet_t *origpkt) {
443 vpn_packet_t pkt1, pkt2;
444 vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
445 vpn_packet_t *inpkt = origpkt;
447 vpn_packet_t *outpkt;
448 int origlen = origpkt->len;
451 if(!n->status.reachable) {
452 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send UDP packet to unreachable node %s (%s)", n->name, n->hostname);
456 return send_sptps_packet(mesh, n, origpkt);
459 bool send_sptps_data(void *handle, uint8_t type, const char *data, size_t len) {
462 /* Send it via TCP if it is a handshake packet, TCPOnly is in use, or this packet is larger than the MTU. */
464 if(type >= SPTPS_HANDSHAKE || ((mesh->self->options | to->options) & OPTION_TCPONLY) || (type != PKT_PROBE && len > to->minmtu)) {
465 char buf[len * 4 / 3 + 5];
466 b64encode(data, buf, len);
467 /* If no valid key is known yet, send the packets using ANS_KEY requests,
468 to ensure we get to learn the reflexive UDP address. */
469 if(!to->status.validkey) {
470 to->incompression = mesh->self->incompression;
471 return send_request(to->nexthop->connection, "%d %s %s %s -1 -1 -1 %d", ANS_KEY, mesh->self->name, to->name, buf, to->incompression);
473 return send_request(to->nexthop->connection, "%d %s %s %d %s", REQ_KEY, mesh->self->name, to->name, REQ_SPTPS, buf);
477 /* Otherwise, send the packet via UDP */
479 const sockaddr_t *sa;
482 if(to->status.broadcast)
483 choose_broadcast_address(mesh, to, &sa, &sock);
485 choose_udp_address(mesh, to, &sa, &sock);
487 if(sendto(mesh->listen_socket[sock].udp.fd, data, len, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
488 if(sockmsgsize(sockerrno)) {
489 if(to->maxmtu >= len)
490 to->maxmtu = len - 1;
494 logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending UDP SPTPS packet to %s (%s): %s", to->name, to->hostname, sockstrerror(sockerrno));
502 bool receive_sptps_record(void *handle, uint8_t type, const char *data, uint16_t len) {
503 node_t *from = handle;
505 if(type == SPTPS_HANDSHAKE) {
506 if(!from->status.validkey) {
507 from->status.validkey = true;
508 from->status.waitingforkey = false;
509 logger(DEBUG_META, LOG_INFO, "SPTPS key exchange with %s (%s) succesful", from->name, from->hostname);
515 logger(DEBUG_ALWAYS, LOG_ERR, "Packet from %s (%s) larger than maximum supported size (%d > %d)", from->name, from->hostname, len, MTU);
521 if(type == PKT_PROBE) {
524 memcpy(inpkt.data, data, len);
525 mtu_probe_h(mesh, from, &inpkt, len);
531 if(type & ~(PKT_COMPRESSED)) {
532 logger(DEBUG_ALWAYS, LOG_ERR, "Unexpected SPTPS record type %d len %d from %s (%s)", type, len, from->name, from->hostname);
536 if(type & PKT_COMPRESSED) {
537 uint16_t ulen = uncompress_packet(inpkt.data, (const uint8_t *)data, len, from->incompression);
543 if(inpkt.len > MAXSIZE)
546 memcpy(inpkt.data, data, len);
550 receive_packet(mesh, from, &inpkt);
555 send a packet to the given vpn ip.
557 void send_packet(meshlink_handle_t *mesh, node_t *n, vpn_packet_t *packet) {
560 if(n == mesh->self) {
562 n->out_bytes += packet->len;
563 // TODO: send to application
567 logger(DEBUG_TRAFFIC, LOG_ERR, "Sending packet of %d bytes to %s (%s)",
568 packet->len, n->name, n->hostname);
570 if(!n->status.reachable) {
571 logger(DEBUG_TRAFFIC, LOG_INFO, "Node %s (%s) is not reachable",
572 n->name, n->hostname);
577 n->out_bytes += packet->len;
579 send_sptps_packet(mesh, n, packet);
583 /* Broadcast a packet using the minimum spanning tree */
585 void broadcast_packet(meshlink_handle_t *mesh, const node_t *from, vpn_packet_t *packet) {
586 // Always give ourself a copy of the packet.
587 if(from != mesh->self)
588 send_packet(mesh, mesh->self, packet);
590 logger(DEBUG_TRAFFIC, LOG_INFO, "Broadcasting packet of %d bytes from %s (%s)",
591 packet->len, from->name, from->hostname);
593 for list_each(connection_t, c, mesh->connections)
594 if(c->status.active && c->status.mst && c != from->nexthop->connection)
595 send_packet(mesh, c->node, packet);
598 static node_t *try_harder(meshlink_handle_t *mesh, const sockaddr_t *from, const vpn_packet_t *pkt) {
601 static time_t last_hard_try = 0;
603 for splay_each(edge_t, e, mesh->edges) {
604 if(!e->to->status.reachable || e->to == mesh->self)
607 if(sockaddrcmp_noport(from, &e->address)) {
608 if(last_hard_try == mesh->loop.now.tv_sec)
613 if(!try_mac(mesh, e->to, pkt))
621 last_hard_try = mesh->loop.now.tv_sec;
623 last_hard_try = mesh->loop.now.tv_sec;
627 void handle_incoming_vpn_data(event_loop_t *loop, void *data, int flags) {
628 meshlink_handle_t *mesh = loop->data;
629 listen_socket_t *ls = data;
632 sockaddr_t from = {{0}};
633 socklen_t fromlen = sizeof from;
637 len = recvfrom(ls->udp.fd, pkt.data, MAXSIZE, 0, &from.sa, &fromlen);
639 if(len <= 0 || len > MAXSIZE) {
640 if(!sockwouldblock(sockerrno))
641 logger(DEBUG_ALWAYS, LOG_ERR, "Receiving packet failed: %s", sockstrerror(sockerrno));
647 sockaddrunmap(&from); /* Some braindead IPv6 implementations do stupid things. */
649 n = lookup_node_udp(&from);
652 n = try_harder(mesh, &from, &pkt);
654 update_node_udp(n, &from);
655 else if(mesh->debug_level >= DEBUG_PROTOCOL) {
656 hostname = sockaddr2hostname(&from);
657 logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet from unknown source %s", hostname);
665 n->sock = ls - mesh->listen_socket;
667 receive_udppacket(mesh, n, &pkt);