2 net_packet.c -- Handles in- and outgoing VPN packets
3 Copyright (C) 2014 Guus Sliepen <guus@meshlink.io>
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License along
16 with this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
28 #include "connection.h"
42 static void send_udppacket(node_t *, vpn_packet_t *);
44 unsigned replaywin = 16;
45 bool localdiscovery = false;
46 sockaddr_t localdiscovery_address;
48 #define MAX_SEQNO 1073741824
50 /* mtuprobes == 1..30: initial discovery, send bursts with 1 second interval
51 mtuprobes == 31: sleep pinginterval seconds
52 mtuprobes == 32: send 1 burst, sleep pingtimeout second
53 mtuprobes == 33: no response from other side, restart PMTU discovery process
55 Probes are sent in batches of at least three, with random sizes between the
56 lower and upper boundaries for the MTU thus far discovered.
58 After the initial discovery, a fourth packet is added to each batch with a
59 size larger than the currently known PMTU, to test if the PMTU has increased.
61 In case local discovery is enabled, another packet is added to each batch,
62 which will be broadcast to the local network.
66 static void send_mtu_probe_handler(void *data) {
72 if(!n->status.reachable || !n->status.validkey) {
73 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send MTU probe to unreachable or rekeying node %s (%s)", n->name, n->hostname);
78 if(n->mtuprobes > 32) {
81 timeout = pinginterval;
85 logger(DEBUG_TRAFFIC, LOG_INFO, "%s (%s) did not respond to UDP ping, restarting PMTU discovery", n->name, n->hostname);
86 n->status.udp_confirmed = false;
92 if(n->mtuprobes >= 10 && n->mtuprobes < 32 && !n->minmtu) {
93 logger(DEBUG_TRAFFIC, LOG_INFO, "No response to MTU probes from %s (%s)", n->name, n->hostname);
97 if(n->mtuprobes == 30 || (n->mtuprobes < 30 && n->minmtu >= n->maxmtu)) {
98 if(n->minmtu > n->maxmtu)
99 n->minmtu = n->maxmtu;
101 n->maxmtu = n->minmtu;
103 logger(DEBUG_TRAFFIC, LOG_INFO, "Fixing MTU of %s (%s) to %d after %d probes", n->name, n->hostname, n->mtu, n->mtuprobes);
107 if(n->mtuprobes == 31) {
108 timeout = pinginterval;
110 } else if(n->mtuprobes == 32) {
111 timeout = pingtimeout;
114 for(int i = 0; i < 4 + localdiscovery; i++) {
118 if(n->mtuprobes < 30 || n->maxmtu + 8 >= MTU)
121 } else if(n->maxmtu <= n->minmtu) {
124 len = n->minmtu + 1 + rand() % (n->maxmtu - n->minmtu);
131 memset(packet.data, 0, 14);
132 randomize(packet.data + 14, len - 14);
135 n->status.broadcast = i >= 4 && n->mtuprobes <= 10 && n->prevedge;
137 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending MTU probe length %d to %s (%s)", len, n->name, n->hostname);
139 send_udppacket(n, &packet);
142 n->status.broadcast = false;
143 n->probe_counter = 0;
144 gettimeofday(&n->probe_time, NULL);
146 /* Calculate the packet loss of incoming traffic by comparing the rate of
147 packets received to the rate with which the sequence number has increased.
150 if(n->received > n->prev_received)
151 n->packetloss = 1.0 - (n->received - n->prev_received) / (float)(n->received_seqno - n->prev_received_seqno);
153 n->packetloss = n->received_seqno <= n->prev_received_seqno;
155 n->prev_received_seqno = n->received_seqno;
156 n->prev_received = n->received;
159 timeout_set(&n->mtutimeout, &(struct timeval){timeout, rand() % 100000});
162 void send_mtu_probe(node_t *n) {
163 timeout_add(&n->mtutimeout, send_mtu_probe_handler, n, &(struct timeval){1, 0});
164 send_mtu_probe_handler(n);
167 static void mtu_probe_h(node_t *n, vpn_packet_t *packet, length_t len) {
168 logger(DEBUG_TRAFFIC, LOG_INFO, "Got MTU probe length %d from %s (%s)", packet->len, n->name, n->hostname);
170 if(!packet->data[0]) {
171 /* It's a probe request, send back a reply */
175 /* Temporarily set udp_confirmed, so that the reply is sent
176 back exactly the way it came in. */
178 bool udp_confirmed = n->status.udp_confirmed;
179 n->status.udp_confirmed = true;
180 send_udppacket(n, packet);
181 n->status.udp_confirmed = udp_confirmed;
183 /* It's a valid reply: now we know bidirectional communication
184 is possible using the address and socket that the reply
187 n->status.udp_confirmed = true;
189 /* If we haven't established the PMTU yet, restart the discovery process. */
191 if(n->mtuprobes > 30) {
192 if (len == n->maxmtu + 8) {
193 logger(DEBUG_TRAFFIC, LOG_INFO, "Increase in PMTU to %s (%s) detected, restarting PMTU discovery", n->name, n->hostname);
205 /* If applicable, raise the minimum supported MTU */
212 /* Calculate RTT and bandwidth.
213 The RTT is the time between the MTU probe burst was sent and the first
214 reply is received. The bandwidth is measured using the time between the
215 arrival of the first and third probe reply.
218 struct timeval now, diff;
219 gettimeofday(&now, NULL);
220 timersub(&now, &n->probe_time, &diff);
224 if(n->probe_counter == 1) {
225 n->rtt = diff.tv_sec + diff.tv_usec * 1e-6;
227 } else if(n->probe_counter == 3) {
228 n->bandwidth = 2.0 * len / (diff.tv_sec + diff.tv_usec * 1e-6);
229 logger(DEBUG_TRAFFIC, LOG_DEBUG, "%s (%s) RTT %.2f ms, burst bandwidth %.3f Mbit/s, rx packet loss %.2f %%", n->name, n->hostname, n->rtt * 1e3, n->bandwidth * 8e-6, n->packetloss * 1e2);
234 static length_t compress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
236 memcpy(dest, source, len);
238 } else if(level == 10) {
240 } else if(level < 10) {
242 unsigned long destlen = MAXSIZE;
243 if(compress2(dest, &destlen, source, len, level) == Z_OK)
255 static length_t uncompress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
257 memcpy(dest, source, len);
259 } else if(level > 9) {
264 unsigned long destlen = MAXSIZE;
265 if(uncompress(dest, &destlen, source, len) == Z_OK)
277 static void receive_packet(node_t *n, vpn_packet_t *packet) {
278 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Received packet of %d bytes from %s (%s)",
279 packet->len, n->name, n->hostname);
282 n->in_bytes += packet->len;
287 static bool try_mac(node_t *n, const vpn_packet_t *inpkt) {
288 return sptps_verify_datagram(&n->sptps, (char *)&inpkt->seqno, inpkt->len);
291 static void receive_udppacket(node_t *n, vpn_packet_t *inpkt) {
292 vpn_packet_t pkt1, pkt2;
293 vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
295 vpn_packet_t *outpkt = pkt[0];
298 if(!n->sptps.state) {
299 if(!n->status.waitingforkey) {
300 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but we haven't exchanged keys yet", n->name, n->hostname);
303 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
307 sptps_receive_data(&n->sptps, (char *)&inpkt->seqno, inpkt->len);
310 void receive_tcppacket(connection_t *c, const char *buffer, int len) {
313 if(len > sizeof outpkt.data)
317 if(c->options & OPTION_TCPONLY)
320 outpkt.priority = -1;
321 memcpy(outpkt.data, buffer, len);
323 receive_packet(c->node, &outpkt);
326 static void send_sptps_packet(node_t *n, vpn_packet_t *origpkt) {
327 if(!n->status.validkey) {
328 logger(DEBUG_TRAFFIC, LOG_INFO, "No valid key known yet for %s (%s)", n->name, n->hostname);
329 if(!n->status.waitingforkey)
331 else if(n->last_req_key + 10 < now.tv_sec) {
332 logger(DEBUG_ALWAYS, LOG_DEBUG, "No key from %s after 10 seconds, restarting SPTPS", n->name);
333 sptps_stop(&n->sptps);
334 n->status.waitingforkey = false;
343 if(!(origpkt->data[12] | origpkt->data[13])) {
344 sptps_send_record(&n->sptps, PKT_PROBE, (char *)origpkt->data, origpkt->len);
348 if(routing_mode == RMODE_ROUTER)
353 if(origpkt->len < offset)
358 if(n->outcompression) {
359 int len = compress_packet(outpkt.data + offset, origpkt->data + offset, origpkt->len - offset, n->outcompression);
361 logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)", n->name, n->hostname);
362 } else if(len < origpkt->len - offset) {
363 outpkt.len = len + offset;
365 type |= PKT_COMPRESSED;
369 sptps_send_record(&n->sptps, type, (char *)origpkt->data + offset, origpkt->len - offset);
373 static void choose_udp_address(const node_t *n, const sockaddr_t **sa, int *sock) {
378 /* If the UDP address is confirmed, use it. */
379 if(n->status.udp_confirmed)
382 /* Send every third packet to n->address; that could be set
383 to the node's reflexive UDP address discovered during key
392 /* Otherwise, address are found in edges to this node.
393 So we pick a random edge and a random socket. */
396 int j = rand() % n->edge_tree->count;
397 edge_t *candidate = NULL;
399 for splay_each(edge_t, e, n->edge_tree) {
401 candidate = e->reverse;
407 *sa = &candidate->address;
408 *sock = rand() % listen_sockets;
411 /* Make sure we have a suitable socket for the chosen address */
412 if(listen_socket[*sock].sa.sa.sa_family != (*sa)->sa.sa_family) {
413 for(int i = 0; i < listen_sockets; i++) {
414 if(listen_socket[i].sa.sa.sa_family == (*sa)->sa.sa_family) {
422 static void choose_broadcast_address(const node_t *n, const sockaddr_t **sa, int *sock) {
423 static sockaddr_t broadcast_ipv4 = {
425 .sin_family = AF_INET,
426 .sin_addr.s_addr = -1,
430 static sockaddr_t broadcast_ipv6 = {
432 .sin6_family = AF_INET6,
433 .sin6_addr.s6_addr[0x0] = 0xff,
434 .sin6_addr.s6_addr[0x1] = 0x02,
435 .sin6_addr.s6_addr[0xf] = 0x01,
439 *sock = rand() % listen_sockets;
441 if(listen_socket[*sock].sa.sa.sa_family == AF_INET6) {
442 if(localdiscovery_address.sa.sa_family == AF_INET6) {
443 localdiscovery_address.in6.sin6_port = n->prevedge->address.in.sin_port;
444 *sa = &localdiscovery_address;
446 broadcast_ipv6.in6.sin6_port = n->prevedge->address.in.sin_port;
447 broadcast_ipv6.in6.sin6_scope_id = listen_socket[*sock].sa.in6.sin6_scope_id;
448 *sa = &broadcast_ipv6;
451 if(localdiscovery_address.sa.sa_family == AF_INET) {
452 localdiscovery_address.in.sin_port = n->prevedge->address.in.sin_port;
453 *sa = &localdiscovery_address;
455 broadcast_ipv4.in.sin_port = n->prevedge->address.in.sin_port;
456 *sa = &broadcast_ipv4;
461 static void send_udppacket(node_t *n, vpn_packet_t *origpkt) {
462 vpn_packet_t pkt1, pkt2;
463 vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
464 vpn_packet_t *inpkt = origpkt;
466 vpn_packet_t *outpkt;
467 int origlen = origpkt->len;
469 #if defined(SOL_IP) && defined(IP_TOS)
470 static int priority = 0;
472 int origpriority = origpkt->priority;
474 if(!n->status.reachable) {
475 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send UDP packet to unreachable node %s (%s)", n->name, n->hostname);
479 return send_sptps_packet(n, origpkt);
482 bool send_sptps_data(void *handle, uint8_t type, const char *data, size_t len) {
485 /* Send it via TCP if it is a handshake packet, TCPOnly is in use, or this packet is larger than the MTU. */
487 if(type >= SPTPS_HANDSHAKE || ((myself->options | to->options) & OPTION_TCPONLY) || (type != PKT_PROBE && len > to->minmtu)) {
488 char buf[len * 4 / 3 + 5];
489 b64encode(data, buf, len);
490 /* If no valid key is known yet, send the packets using ANS_KEY requests,
491 to ensure we get to learn the reflexive UDP address. */
492 if(!to->status.validkey) {
493 to->incompression = myself->incompression;
494 return send_request(to->nexthop->connection, "%d %s %s %s -1 -1 -1 %d", ANS_KEY, myself->name, to->name, buf, to->incompression);
496 return send_request(to->nexthop->connection, "%d %s %s %d %s", REQ_KEY, myself->name, to->name, REQ_SPTPS, buf);
500 /* Otherwise, send the packet via UDP */
502 const sockaddr_t *sa;
505 if(to->status.broadcast)
506 choose_broadcast_address(to, &sa, &sock);
508 choose_udp_address(to, &sa, &sock);
510 if(sendto(listen_socket[sock].udp.fd, data, len, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
511 if(sockmsgsize(sockerrno)) {
512 if(to->maxmtu >= len)
513 to->maxmtu = len - 1;
517 logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending UDP SPTPS packet to %s (%s): %s", to->name, to->hostname, sockstrerror(sockerrno));
525 bool receive_sptps_record(void *handle, uint8_t type, const char *data, uint16_t len) {
526 node_t *from = handle;
528 if(type == SPTPS_HANDSHAKE) {
529 if(!from->status.validkey) {
530 from->status.validkey = true;
531 from->status.waitingforkey = false;
532 logger(DEBUG_META, LOG_INFO, "SPTPS key exchange with %s (%s) succesful", from->name, from->hostname);
538 logger(DEBUG_ALWAYS, LOG_ERR, "Packet from %s (%s) larger than maximum supported size (%d > %d)", from->name, from->hostname, len, MTU);
544 if(type == PKT_PROBE) {
546 memcpy(inpkt.data, data, len);
547 mtu_probe_h(from, &inpkt, len);
551 if(type & ~(PKT_COMPRESSED | PKT_MAC)) {
552 logger(DEBUG_ALWAYS, LOG_ERR, "Unexpected SPTPS record type %d len %d from %s (%s)", type, len, from->name, from->hostname);
556 /* Check if we have the headers we need */
557 if(routing_mode != RMODE_ROUTER && !(type & PKT_MAC)) {
558 logger(DEBUG_TRAFFIC, LOG_ERR, "Received packet from %s (%s) without MAC header (maybe Mode is not set correctly)", from->name, from->hostname);
560 } else if(routing_mode == RMODE_ROUTER && (type & PKT_MAC)) {
561 logger(DEBUG_TRAFFIC, LOG_WARNING, "Received packet from %s (%s) with MAC header (maybe Mode is not set correctly)", from->name, from->hostname);
564 int offset = (type & PKT_MAC) ? 0 : 14;
565 if(type & PKT_COMPRESSED) {
566 length_t ulen = uncompress_packet(inpkt.data + offset, (const uint8_t *)data, len, from->incompression);
570 inpkt.len = ulen + offset;
572 if(inpkt.len > MAXSIZE)
575 memcpy(inpkt.data + offset, data, len);
576 inpkt.len = len + offset;
579 /* Generate the Ethernet packet type if necessary */
581 switch(inpkt.data[14] >> 4) {
583 inpkt.data[12] = 0x08;
584 inpkt.data[13] = 0x00;
587 inpkt.data[12] = 0x86;
588 inpkt.data[13] = 0xDD;
591 logger(DEBUG_TRAFFIC, LOG_ERR,
592 "Unknown IP version %d while reading packet from %s (%s)",
593 inpkt.data[14] >> 4, from->name, from->hostname);
598 receive_packet(from, &inpkt);
603 send a packet to the given vpn ip.
605 void send_packet(node_t *n, vpn_packet_t *packet) {
610 n->out_bytes += packet->len;
611 // TODO: send to application
615 logger(DEBUG_TRAFFIC, LOG_ERR, "Sending packet of %d bytes to %s (%s)",
616 packet->len, n->name, n->hostname);
618 if(!n->status.reachable) {
619 logger(DEBUG_TRAFFIC, LOG_INFO, "Node %s (%s) is not reachable",
620 n->name, n->hostname);
625 n->out_bytes += packet->len;
627 send_sptps_packet(n, packet);
631 /* Broadcast a packet using the minimum spanning tree */
633 void broadcast_packet(const node_t *from, vpn_packet_t *packet) {
634 // Always give ourself a copy of the packet.
636 send_packet(myself, packet);
638 logger(DEBUG_TRAFFIC, LOG_INFO, "Broadcasting packet of %d bytes from %s (%s)",
639 packet->len, from->name, from->hostname);
641 switch(broadcast_mode) {
642 // In MST mode, broadcast packets travel via the Minimum Spanning Tree.
643 // This guarantees all nodes receive the broadcast packet, and
644 // usually distributes the sending of broadcast packets over all nodes.
646 for list_each(connection_t, c, connection_list)
647 if(c->status.active && c->status.mst && c != from->nexthop->connection)
648 send_packet(c->node, packet);
651 // In direct mode, we send copies to each node we know of.
652 // However, this only reaches nodes that can be reached in a single hop.
653 // We don't have enough information to forward broadcast packets in this case.
658 for splay_each(node_t, n, node_tree)
659 if(n->status.reachable && n != myself && ((n->via == myself && n->nexthop == n) || n->via == n))
660 send_packet(n, packet);
668 static node_t *try_harder(const sockaddr_t *from, const vpn_packet_t *pkt) {
671 static time_t last_hard_try = 0;
673 for splay_each(edge_t, e, edge_weight_tree) {
674 if(!e->to->status.reachable || e->to == myself)
677 if(sockaddrcmp_noport(from, &e->address)) {
678 if(last_hard_try == now.tv_sec)
683 if(!try_mac(e->to, pkt))
691 last_hard_try = now.tv_sec;
693 last_hard_try = now.tv_sec;
697 void handle_incoming_vpn_data(void *data, int flags) {
698 listen_socket_t *ls = data;
701 sockaddr_t from = {{0}};
702 socklen_t fromlen = sizeof from;
706 len = recvfrom(ls->udp.fd, (char *) &pkt.seqno, MAXSIZE, 0, &from.sa, &fromlen);
708 if(len <= 0 || len > MAXSIZE) {
709 if(!sockwouldblock(sockerrno))
710 logger(DEBUG_ALWAYS, LOG_ERR, "Receiving packet failed: %s", sockstrerror(sockerrno));
716 sockaddrunmap(&from); /* Some braindead IPv6 implementations do stupid things. */
718 n = lookup_node_udp(&from);
721 n = try_harder(&from, &pkt);
723 update_node_udp(n, &from);
724 else if(debug_level >= DEBUG_PROTOCOL) {
725 hostname = sockaddr2hostname(&from);
726 logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet from unknown source %s", hostname);
734 n->sock = ls - listen_socket;
736 receive_udppacket(n, &pkt);