2 net_packet.c -- Handles in- and outgoing VPN packets
3 Copyright (C) 2014 Guus Sliepen <guus@meshlink.io>
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License along
16 with this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 #include "connection.h"
31 #include "meshlink_internal.h"
41 static void send_udppacket(node_t *, vpn_packet_t *);
43 unsigned replaywin = 16;
45 #define MAX_SEQNO 1073741824
47 /* mtuprobes == 1..30: initial discovery, send bursts with 1 second interval
48 mtuprobes == 31: sleep pinginterval seconds
49 mtuprobes == 32: send 1 burst, sleep pingtimeout second
50 mtuprobes == 33: no response from other side, restart PMTU discovery process
52 Probes are sent in batches of at least three, with random sizes between the
53 lower and upper boundaries for the MTU thus far discovered.
55 After the initial discovery, a fourth packet is added to each batch with a
56 size larger than the currently known PMTU, to test if the PMTU has increased.
58 In case local discovery is enabled, another packet is added to each batch,
59 which will be broadcast to the local network.
63 static void send_mtu_probe_handler(event_loop_t *loop, void *data) {
69 if(!n->status.reachable || !n->status.validkey) {
70 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send MTU probe to unreachable or rekeying node %s (%s)", n->name, n->hostname);
75 if(n->mtuprobes > 32) {
78 timeout = mesh->pinginterval;
82 logger(DEBUG_TRAFFIC, LOG_INFO, "%s (%s) did not respond to UDP ping, restarting PMTU discovery", n->name, n->hostname);
83 n->status.udp_confirmed = false;
89 if(n->mtuprobes >= 10 && n->mtuprobes < 32 && !n->minmtu) {
90 logger(DEBUG_TRAFFIC, LOG_INFO, "No response to MTU probes from %s (%s)", n->name, n->hostname);
94 if(n->mtuprobes == 30 || (n->mtuprobes < 30 && n->minmtu >= n->maxmtu)) {
95 if(n->minmtu > n->maxmtu)
96 n->minmtu = n->maxmtu;
98 n->maxmtu = n->minmtu;
100 logger(DEBUG_TRAFFIC, LOG_INFO, "Fixing MTU of %s (%s) to %d after %d probes", n->name, n->hostname, n->mtu, n->mtuprobes);
104 if(n->mtuprobes == 31) {
105 timeout = mesh->pinginterval;
107 } else if(n->mtuprobes == 32) {
108 timeout = mesh->pingtimeout;
111 for(int i = 0; i < 4 + mesh->localdiscovery; i++) {
115 if(n->mtuprobes < 30 || n->maxmtu + 8 >= MTU)
118 } else if(n->maxmtu <= n->minmtu) {
121 len = n->minmtu + 1 + rand() % (n->maxmtu - n->minmtu);
129 memset(packet.data, 0, 14);
130 randomize(packet.data + 14, len - 14);
132 n->status.broadcast = i >= 4 && n->mtuprobes <= 10 && n->prevedge;
134 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending MTU probe length %d to %s (%s)", len, n->name, n->hostname);
136 send_udppacket(n, &packet);
139 n->status.broadcast = false;
140 n->probe_counter = 0;
141 gettimeofday(&n->probe_time, NULL);
143 /* Calculate the packet loss of incoming traffic by comparing the rate of
144 packets received to the rate with which the sequence number has increased.
147 if(n->received > n->prev_received)
148 n->packetloss = 1.0 - (n->received - n->prev_received) / (float)(n->received_seqno - n->prev_received_seqno);
150 n->packetloss = n->received_seqno <= n->prev_received_seqno;
152 n->prev_received_seqno = n->received_seqno;
153 n->prev_received = n->received;
156 timeout_set(&mesh->loop, &n->mtutimeout, &(struct timeval){timeout, rand() % 100000});
159 void send_mtu_probe(node_t *n) {
160 timeout_add(&mesh->loop, &n->mtutimeout, send_mtu_probe_handler, n, &(struct timeval){1, 0});
161 send_mtu_probe_handler(&mesh->loop, n);
164 static void mtu_probe_h(node_t *n, vpn_packet_t *packet, uint16_t len) {
165 logger(DEBUG_TRAFFIC, LOG_INFO, "Got MTU probe length %d from %s (%s)", packet->len, n->name, n->hostname);
167 if(!packet->data[0]) {
168 /* It's a probe request, send back a reply */
172 /* Temporarily set udp_confirmed, so that the reply is sent
173 back exactly the way it came in. */
175 bool udp_confirmed = n->status.udp_confirmed;
176 n->status.udp_confirmed = true;
177 send_udppacket(n, packet);
178 n->status.udp_confirmed = udp_confirmed;
180 /* It's a valid reply: now we know bidirectional communication
181 is possible using the address and socket that the reply
184 n->status.udp_confirmed = true;
186 /* If we haven't established the PMTU yet, restart the discovery process. */
188 if(n->mtuprobes > 30) {
189 if (len == n->maxmtu + 8) {
190 logger(DEBUG_TRAFFIC, LOG_INFO, "Increase in PMTU to %s (%s) detected, restarting PMTU discovery", n->name, n->hostname);
202 /* If applicable, raise the minimum supported MTU */
209 /* Calculate RTT and bandwidth.
210 The RTT is the time between the MTU probe burst was sent and the first
211 reply is received. The bandwidth is measured using the time between the
212 arrival of the first and third probe reply.
215 struct timeval now, diff;
216 gettimeofday(&now, NULL);
217 timersub(&now, &n->probe_time, &diff);
221 if(n->probe_counter == 1) {
222 n->rtt = diff.tv_sec + diff.tv_usec * 1e-6;
224 } else if(n->probe_counter == 3) {
225 n->bandwidth = 2.0 * len / (diff.tv_sec + diff.tv_usec * 1e-6);
226 logger(DEBUG_TRAFFIC, LOG_DEBUG, "%s (%s) RTT %.2f ms, burst bandwidth %.3f Mbit/s, rx packet loss %.2f %%", n->name, n->hostname, n->rtt * 1e3, n->bandwidth * 8e-6, n->packetloss * 1e2);
231 static uint16_t compress_packet(uint8_t *dest, const uint8_t *source, uint16_t len, int level) {
233 memcpy(dest, source, len);
235 } else if(level == 10) {
237 } else if(level < 10) {
239 unsigned long destlen = MAXSIZE;
240 if(compress2(dest, &destlen, source, len, level) == Z_OK)
252 static uint16_t uncompress_packet(uint8_t *dest, const uint8_t *source, uint16_t len, int level) {
254 memcpy(dest, source, len);
256 } else if(level > 9) {
261 unsigned long destlen = MAXSIZE;
262 if(uncompress(dest, &destlen, source, len) == Z_OK)
274 static void receive_packet(node_t *n, vpn_packet_t *packet) {
275 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Received packet of %d bytes from %s (%s)",
276 packet->len, n->name, n->hostname);
279 n->in_bytes += packet->len;
284 static bool try_mac(node_t *n, const vpn_packet_t *inpkt) {
285 return sptps_verify_datagram(&n->sptps, inpkt->data, inpkt->len);
288 static void receive_udppacket(node_t *n, vpn_packet_t *inpkt) {
289 if(!n->sptps.state) {
290 if(!n->status.waitingforkey) {
291 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but we haven't exchanged keys yet", n->name, n->hostname);
294 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
298 sptps_receive_data(&n->sptps, inpkt->data, inpkt->len);
301 void receive_tcppacket(connection_t *c, const char *buffer, int len) {
304 if(len > sizeof outpkt.data)
309 memcpy(outpkt.data, buffer, len);
311 receive_packet(c->node, &outpkt);
314 static void send_sptps_packet(node_t *n, vpn_packet_t *origpkt) {
315 if(!n->status.validkey) {
316 logger(DEBUG_TRAFFIC, LOG_INFO, "No valid key known yet for %s (%s)", n->name, n->hostname);
317 if(!n->status.waitingforkey)
319 else if(n->last_req_key + 10 < mesh->loop.now.tv_sec) {
320 logger(DEBUG_ALWAYS, LOG_DEBUG, "No key from %s after 10 seconds, restarting SPTPS", n->name);
321 sptps_stop(&n->sptps);
322 n->status.waitingforkey = false;
330 // If it's a probe, send it immediately without trying to compress it.
332 sptps_send_record(&n->sptps, PKT_PROBE, origpkt->data, origpkt->len);
338 if(n->outcompression) {
339 int len = compress_packet(outpkt.data, origpkt->data, origpkt->len, n->outcompression);
341 logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)", n->name, n->hostname);
342 } else if(len < origpkt->len) {
345 type |= PKT_COMPRESSED;
349 sptps_send_record(&n->sptps, type, origpkt->data, origpkt->len);
353 static void choose_udp_address(const node_t *n, const sockaddr_t **sa, int *sock) {
358 /* If the UDP address is confirmed, use it. */
359 if(n->status.udp_confirmed)
362 /* Send every third packet to n->address; that could be set
363 to the node's reflexive UDP address discovered during key
372 /* Otherwise, address are found in edges to this node.
373 So we pick a random edge and a random socket. */
376 int j = rand() % n->edge_tree->count;
377 edge_t *candidate = NULL;
379 for splay_each(edge_t, e, n->edge_tree) {
381 candidate = e->reverse;
387 *sa = &candidate->address;
388 *sock = rand() % mesh->listen_sockets;
391 /* Make sure we have a suitable socket for the chosen address */
392 if(mesh->listen_socket[*sock].sa.sa.sa_family != (*sa)->sa.sa_family) {
393 for(int i = 0; i < mesh->listen_sockets; i++) {
394 if(mesh->listen_socket[i].sa.sa.sa_family == (*sa)->sa.sa_family) {
402 static void choose_broadcast_address(const node_t *n, const sockaddr_t **sa, int *sock) {
403 static sockaddr_t broadcast_ipv4 = {
405 .sin_family = AF_INET,
406 .sin_addr.s_addr = -1,
410 static sockaddr_t broadcast_ipv6 = {
412 .sin6_family = AF_INET6,
413 .sin6_addr.s6_addr[0x0] = 0xff,
414 .sin6_addr.s6_addr[0x1] = 0x02,
415 .sin6_addr.s6_addr[0xf] = 0x01,
419 *sock = rand() % mesh->listen_sockets;
421 if(mesh->listen_socket[*sock].sa.sa.sa_family == AF_INET6) {
422 if(mesh->localdiscovery_address.sa.sa_family == AF_INET6) {
423 mesh->localdiscovery_address.in6.sin6_port = n->prevedge->address.in.sin_port;
424 *sa = &mesh->localdiscovery_address;
426 broadcast_ipv6.in6.sin6_port = n->prevedge->address.in.sin_port;
427 broadcast_ipv6.in6.sin6_scope_id = mesh->listen_socket[*sock].sa.in6.sin6_scope_id;
428 *sa = &broadcast_ipv6;
431 if(mesh->localdiscovery_address.sa.sa_family == AF_INET) {
432 mesh->localdiscovery_address.in.sin_port = n->prevedge->address.in.sin_port;
433 *sa = &mesh->localdiscovery_address;
435 broadcast_ipv4.in.sin_port = n->prevedge->address.in.sin_port;
436 *sa = &broadcast_ipv4;
441 static void send_udppacket(node_t *n, vpn_packet_t *origpkt) {
442 vpn_packet_t pkt1, pkt2;
443 vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
444 vpn_packet_t *inpkt = origpkt;
446 vpn_packet_t *outpkt;
447 int origlen = origpkt->len;
450 if(!n->status.reachable) {
451 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send UDP packet to unreachable node %s (%s)", n->name, n->hostname);
455 return send_sptps_packet(n, origpkt);
458 bool send_sptps_data(void *handle, uint8_t type, const char *data, size_t len) {
461 /* Send it via TCP if it is a handshake packet, TCPOnly is in use, or this packet is larger than the MTU. */
463 if(type >= SPTPS_HANDSHAKE || ((mesh->self->options | to->options) & OPTION_TCPONLY) || (type != PKT_PROBE && len > to->minmtu)) {
464 char buf[len * 4 / 3 + 5];
465 b64encode(data, buf, len);
466 /* If no valid key is known yet, send the packets using ANS_KEY requests,
467 to ensure we get to learn the reflexive UDP address. */
468 if(!to->status.validkey) {
469 to->incompression = mesh->self->incompression;
470 return send_request(to->nexthop->connection, "%d %s %s %s -1 -1 -1 %d", ANS_KEY, mesh->self->name, to->name, buf, to->incompression);
472 return send_request(to->nexthop->connection, "%d %s %s %d %s", REQ_KEY, mesh->self->name, to->name, REQ_SPTPS, buf);
476 /* Otherwise, send the packet via UDP */
478 const sockaddr_t *sa;
481 if(to->status.broadcast)
482 choose_broadcast_address(to, &sa, &sock);
484 choose_udp_address(to, &sa, &sock);
486 if(sendto(mesh->listen_socket[sock].udp.fd, data, len, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
487 if(sockmsgsize(sockerrno)) {
488 if(to->maxmtu >= len)
489 to->maxmtu = len - 1;
493 logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending UDP SPTPS packet to %s (%s): %s", to->name, to->hostname, sockstrerror(sockerrno));
501 bool receive_sptps_record(void *handle, uint8_t type, const char *data, uint16_t len) {
502 node_t *from = handle;
504 if(type == SPTPS_HANDSHAKE) {
505 if(!from->status.validkey) {
506 from->status.validkey = true;
507 from->status.waitingforkey = false;
508 logger(DEBUG_META, LOG_INFO, "SPTPS key exchange with %s (%s) succesful", from->name, from->hostname);
514 logger(DEBUG_ALWAYS, LOG_ERR, "Packet from %s (%s) larger than maximum supported size (%d > %d)", from->name, from->hostname, len, MTU);
520 if(type == PKT_PROBE) {
523 memcpy(inpkt.data, data, len);
524 mtu_probe_h(from, &inpkt, len);
530 if(type & ~(PKT_COMPRESSED)) {
531 logger(DEBUG_ALWAYS, LOG_ERR, "Unexpected SPTPS record type %d len %d from %s (%s)", type, len, from->name, from->hostname);
535 if(type & PKT_COMPRESSED) {
536 uint16_t ulen = uncompress_packet(inpkt.data, (const uint8_t *)data, len, from->incompression);
542 if(inpkt.len > MAXSIZE)
545 memcpy(inpkt.data, data, len);
549 receive_packet(from, &inpkt);
554 send a packet to the given vpn ip.
556 void send_packet(node_t *n, vpn_packet_t *packet) {
559 if(n == mesh->self) {
561 n->out_bytes += packet->len;
562 // TODO: send to application
566 logger(DEBUG_TRAFFIC, LOG_ERR, "Sending packet of %d bytes to %s (%s)",
567 packet->len, n->name, n->hostname);
569 if(!n->status.reachable) {
570 logger(DEBUG_TRAFFIC, LOG_INFO, "Node %s (%s) is not reachable",
571 n->name, n->hostname);
576 n->out_bytes += packet->len;
578 send_sptps_packet(n, packet);
582 /* Broadcast a packet using the minimum spanning tree */
584 void broadcast_packet(const node_t *from, vpn_packet_t *packet) {
585 // Always give ourself a copy of the packet.
586 if(from != mesh->self)
587 send_packet(mesh->self, packet);
589 logger(DEBUG_TRAFFIC, LOG_INFO, "Broadcasting packet of %d bytes from %s (%s)",
590 packet->len, from->name, from->hostname);
592 for list_each(connection_t, c, mesh->connections)
593 if(c->status.active && c->status.mst && c != from->nexthop->connection)
594 send_packet(c->node, packet);
597 static node_t *try_harder(const sockaddr_t *from, const vpn_packet_t *pkt) {
600 static time_t last_hard_try = 0;
602 for splay_each(edge_t, e, mesh->edges) {
603 if(!e->to->status.reachable || e->to == mesh->self)
606 if(sockaddrcmp_noport(from, &e->address)) {
607 if(last_hard_try == mesh->loop.now.tv_sec)
612 if(!try_mac(e->to, pkt))
620 last_hard_try = mesh->loop.now.tv_sec;
622 last_hard_try = mesh->loop.now.tv_sec;
626 void handle_incoming_vpn_data(event_loop_t *loop, void *data, int flags) {
627 listen_socket_t *ls = data;
630 sockaddr_t from = {{0}};
631 socklen_t fromlen = sizeof from;
635 len = recvfrom(ls->udp.fd, pkt.data, MAXSIZE, 0, &from.sa, &fromlen);
637 if(len <= 0 || len > MAXSIZE) {
638 if(!sockwouldblock(sockerrno))
639 logger(DEBUG_ALWAYS, LOG_ERR, "Receiving packet failed: %s", sockstrerror(sockerrno));
645 sockaddrunmap(&from); /* Some braindead IPv6 implementations do stupid things. */
647 n = lookup_node_udp(&from);
650 n = try_harder(&from, &pkt);
652 update_node_udp(n, &from);
653 else if(mesh->debug_level >= DEBUG_PROTOCOL) {
654 hostname = sockaddr2hostname(&from);
655 logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet from unknown source %s", hostname);
663 n->sock = ls - mesh->listen_socket;
665 receive_udppacket(n, &pkt);