2 net_packet.c -- Handles in- and outgoing VPN packets
3 Copyright (C) 2014 Guus Sliepen <guus@meshlink.io>
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License along
16 with this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 #include "connection.h"
31 #include "meshlink_internal.h"
41 static void send_udppacket(node_t *, vpn_packet_t *);
43 unsigned replaywin = 16;
45 #define MAX_SEQNO 1073741824
47 /* mtuprobes == 1..30: initial discovery, send bursts with 1 second interval
48 mtuprobes == 31: sleep pinginterval seconds
49 mtuprobes == 32: send 1 burst, sleep pingtimeout second
50 mtuprobes == 33: no response from other side, restart PMTU discovery process
52 Probes are sent in batches of at least three, with random sizes between the
53 lower and upper boundaries for the MTU thus far discovered.
55 After the initial discovery, a fourth packet is added to each batch with a
56 size larger than the currently known PMTU, to test if the PMTU has increased.
58 In case local discovery is enabled, another packet is added to each batch,
59 which will be broadcast to the local network.
63 static void send_mtu_probe_handler(void *data) {
69 if(!n->status.reachable || !n->status.validkey) {
70 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send MTU probe to unreachable or rekeying node %s (%s)", n->name, n->hostname);
75 if(n->mtuprobes > 32) {
78 timeout = pinginterval;
82 logger(DEBUG_TRAFFIC, LOG_INFO, "%s (%s) did not respond to UDP ping, restarting PMTU discovery", n->name, n->hostname);
83 n->status.udp_confirmed = false;
89 if(n->mtuprobes >= 10 && n->mtuprobes < 32 && !n->minmtu) {
90 logger(DEBUG_TRAFFIC, LOG_INFO, "No response to MTU probes from %s (%s)", n->name, n->hostname);
94 if(n->mtuprobes == 30 || (n->mtuprobes < 30 && n->minmtu >= n->maxmtu)) {
95 if(n->minmtu > n->maxmtu)
96 n->minmtu = n->maxmtu;
98 n->maxmtu = n->minmtu;
100 logger(DEBUG_TRAFFIC, LOG_INFO, "Fixing MTU of %s (%s) to %d after %d probes", n->name, n->hostname, n->mtu, n->mtuprobes);
104 if(n->mtuprobes == 31) {
105 timeout = pinginterval;
107 } else if(n->mtuprobes == 32) {
108 timeout = pingtimeout;
111 for(int i = 0; i < 4 + mesh->localdiscovery; i++) {
115 if(n->mtuprobes < 30 || n->maxmtu + 8 >= MTU)
118 } else if(n->maxmtu <= n->minmtu) {
121 len = n->minmtu + 1 + rand() % (n->maxmtu - n->minmtu);
128 memset(packet.data, 0, 14);
129 randomize(packet.data + 14, len - 14);
131 n->status.broadcast = i >= 4 && n->mtuprobes <= 10 && n->prevedge;
133 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending MTU probe length %d to %s (%s)", len, n->name, n->hostname);
135 send_udppacket(n, &packet);
138 n->status.broadcast = false;
139 n->probe_counter = 0;
140 gettimeofday(&n->probe_time, NULL);
142 /* Calculate the packet loss of incoming traffic by comparing the rate of
143 packets received to the rate with which the sequence number has increased.
146 if(n->received > n->prev_received)
147 n->packetloss = 1.0 - (n->received - n->prev_received) / (float)(n->received_seqno - n->prev_received_seqno);
149 n->packetloss = n->received_seqno <= n->prev_received_seqno;
151 n->prev_received_seqno = n->received_seqno;
152 n->prev_received = n->received;
155 timeout_set(&n->mtutimeout, &(struct timeval){timeout, rand() % 100000});
158 void send_mtu_probe(node_t *n) {
159 timeout_add(&n->mtutimeout, send_mtu_probe_handler, n, &(struct timeval){1, 0});
160 send_mtu_probe_handler(n);
163 static void mtu_probe_h(node_t *n, vpn_packet_t *packet, uint16_t len) {
164 logger(DEBUG_TRAFFIC, LOG_INFO, "Got MTU probe length %d from %s (%s)", packet->len, n->name, n->hostname);
166 if(!packet->data[0]) {
167 /* It's a probe request, send back a reply */
171 /* Temporarily set udp_confirmed, so that the reply is sent
172 back exactly the way it came in. */
174 bool udp_confirmed = n->status.udp_confirmed;
175 n->status.udp_confirmed = true;
176 send_udppacket(n, packet);
177 n->status.udp_confirmed = udp_confirmed;
179 /* It's a valid reply: now we know bidirectional communication
180 is possible using the address and socket that the reply
183 n->status.udp_confirmed = true;
185 /* If we haven't established the PMTU yet, restart the discovery process. */
187 if(n->mtuprobes > 30) {
188 if (len == n->maxmtu + 8) {
189 logger(DEBUG_TRAFFIC, LOG_INFO, "Increase in PMTU to %s (%s) detected, restarting PMTU discovery", n->name, n->hostname);
201 /* If applicable, raise the minimum supported MTU */
208 /* Calculate RTT and bandwidth.
209 The RTT is the time between the MTU probe burst was sent and the first
210 reply is received. The bandwidth is measured using the time between the
211 arrival of the first and third probe reply.
214 struct timeval now, diff;
215 gettimeofday(&now, NULL);
216 timersub(&now, &n->probe_time, &diff);
220 if(n->probe_counter == 1) {
221 n->rtt = diff.tv_sec + diff.tv_usec * 1e-6;
223 } else if(n->probe_counter == 3) {
224 n->bandwidth = 2.0 * len / (diff.tv_sec + diff.tv_usec * 1e-6);
225 logger(DEBUG_TRAFFIC, LOG_DEBUG, "%s (%s) RTT %.2f ms, burst bandwidth %.3f Mbit/s, rx packet loss %.2f %%", n->name, n->hostname, n->rtt * 1e3, n->bandwidth * 8e-6, n->packetloss * 1e2);
230 static uint16_t compress_packet(uint8_t *dest, const uint8_t *source, uint16_t len, int level) {
232 memcpy(dest, source, len);
234 } else if(level == 10) {
236 } else if(level < 10) {
238 unsigned long destlen = MAXSIZE;
239 if(compress2(dest, &destlen, source, len, level) == Z_OK)
251 static uint16_t uncompress_packet(uint8_t *dest, const uint8_t *source, uint16_t len, int level) {
253 memcpy(dest, source, len);
255 } else if(level > 9) {
260 unsigned long destlen = MAXSIZE;
261 if(uncompress(dest, &destlen, source, len) == Z_OK)
273 static void receive_packet(node_t *n, vpn_packet_t *packet) {
274 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Received packet of %d bytes from %s (%s)",
275 packet->len, n->name, n->hostname);
278 n->in_bytes += packet->len;
283 static bool try_mac(node_t *n, const vpn_packet_t *inpkt) {
284 return sptps_verify_datagram(&n->sptps, inpkt->data, inpkt->len);
287 static void receive_udppacket(node_t *n, vpn_packet_t *inpkt) {
288 if(!n->sptps.state) {
289 if(!n->status.waitingforkey) {
290 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but we haven't exchanged keys yet", n->name, n->hostname);
293 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
297 sptps_receive_data(&n->sptps, inpkt->data, inpkt->len);
300 void receive_tcppacket(connection_t *c, const char *buffer, int len) {
303 if(len > sizeof outpkt.data)
308 memcpy(outpkt.data, buffer, len);
310 receive_packet(c->node, &outpkt);
313 static void send_sptps_packet(node_t *n, vpn_packet_t *origpkt) {
314 if(!n->status.validkey) {
315 logger(DEBUG_TRAFFIC, LOG_INFO, "No valid key known yet for %s (%s)", n->name, n->hostname);
316 if(!n->status.waitingforkey)
318 else if(n->last_req_key + 10 < now.tv_sec) {
319 logger(DEBUG_ALWAYS, LOG_DEBUG, "No key from %s after 10 seconds, restarting SPTPS", n->name);
320 sptps_stop(&n->sptps);
321 n->status.waitingforkey = false;
329 // If it's a probe, send it immediately without trying to compress it.
331 sptps_send_record(&n->sptps, PKT_PROBE, origpkt->data, origpkt->len);
337 if(n->outcompression) {
338 int len = compress_packet(outpkt.data, origpkt->data, origpkt->len, n->outcompression);
340 logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)", n->name, n->hostname);
341 } else if(len < origpkt->len) {
344 type |= PKT_COMPRESSED;
348 sptps_send_record(&n->sptps, type, origpkt->data, origpkt->len);
352 static void choose_udp_address(const node_t *n, const sockaddr_t **sa, int *sock) {
357 /* If the UDP address is confirmed, use it. */
358 if(n->status.udp_confirmed)
361 /* Send every third packet to n->address; that could be set
362 to the node's reflexive UDP address discovered during key
371 /* Otherwise, address are found in edges to this node.
372 So we pick a random edge and a random socket. */
375 int j = rand() % n->edge_tree->count;
376 edge_t *candidate = NULL;
378 for splay_each(edge_t, e, n->edge_tree) {
380 candidate = e->reverse;
386 *sa = &candidate->address;
387 *sock = rand() % mesh->listen_sockets;
390 /* Make sure we have a suitable socket for the chosen address */
391 if(mesh->listen_socket[*sock].sa.sa.sa_family != (*sa)->sa.sa_family) {
392 for(int i = 0; i < mesh->listen_sockets; i++) {
393 if(mesh->listen_socket[i].sa.sa.sa_family == (*sa)->sa.sa_family) {
401 static void choose_broadcast_address(const node_t *n, const sockaddr_t **sa, int *sock) {
402 static sockaddr_t broadcast_ipv4 = {
404 .sin_family = AF_INET,
405 .sin_addr.s_addr = -1,
409 static sockaddr_t broadcast_ipv6 = {
411 .sin6_family = AF_INET6,
412 .sin6_addr.s6_addr[0x0] = 0xff,
413 .sin6_addr.s6_addr[0x1] = 0x02,
414 .sin6_addr.s6_addr[0xf] = 0x01,
418 *sock = rand() % mesh->listen_sockets;
420 if(mesh->listen_socket[*sock].sa.sa.sa_family == AF_INET6) {
421 if(mesh->localdiscovery_address.sa.sa_family == AF_INET6) {
422 mesh->localdiscovery_address.in6.sin6_port = n->prevedge->address.in.sin_port;
423 *sa = &mesh->localdiscovery_address;
425 broadcast_ipv6.in6.sin6_port = n->prevedge->address.in.sin_port;
426 broadcast_ipv6.in6.sin6_scope_id = mesh->listen_socket[*sock].sa.in6.sin6_scope_id;
427 *sa = &broadcast_ipv6;
430 if(mesh->localdiscovery_address.sa.sa_family == AF_INET) {
431 mesh->localdiscovery_address.in.sin_port = n->prevedge->address.in.sin_port;
432 *sa = &mesh->localdiscovery_address;
434 broadcast_ipv4.in.sin_port = n->prevedge->address.in.sin_port;
435 *sa = &broadcast_ipv4;
440 static void send_udppacket(node_t *n, vpn_packet_t *origpkt) {
441 vpn_packet_t pkt1, pkt2;
442 vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
443 vpn_packet_t *inpkt = origpkt;
445 vpn_packet_t *outpkt;
446 int origlen = origpkt->len;
449 if(!n->status.reachable) {
450 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send UDP packet to unreachable node %s (%s)", n->name, n->hostname);
454 return send_sptps_packet(n, origpkt);
457 bool send_sptps_data(void *handle, uint8_t type, const char *data, size_t len) {
460 /* Send it via TCP if it is a handshake packet, TCPOnly is in use, or this packet is larger than the MTU. */
462 if(type >= SPTPS_HANDSHAKE || ((mesh->self->options | to->options) & OPTION_TCPONLY) || (type != PKT_PROBE && len > to->minmtu)) {
463 char buf[len * 4 / 3 + 5];
464 b64encode(data, buf, len);
465 /* If no valid key is known yet, send the packets using ANS_KEY requests,
466 to ensure we get to learn the reflexive UDP address. */
467 if(!to->status.validkey) {
468 to->incompression = mesh->self->incompression;
469 return send_request(to->nexthop->connection, "%d %s %s %s -1 -1 -1 %d", ANS_KEY, mesh->self->name, to->name, buf, to->incompression);
471 return send_request(to->nexthop->connection, "%d %s %s %d %s", REQ_KEY, mesh->self->name, to->name, REQ_SPTPS, buf);
475 /* Otherwise, send the packet via UDP */
477 const sockaddr_t *sa;
480 if(to->status.broadcast)
481 choose_broadcast_address(to, &sa, &sock);
483 choose_udp_address(to, &sa, &sock);
485 if(sendto(mesh->listen_socket[sock].udp.fd, data, len, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
486 if(sockmsgsize(sockerrno)) {
487 if(to->maxmtu >= len)
488 to->maxmtu = len - 1;
492 logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending UDP SPTPS packet to %s (%s): %s", to->name, to->hostname, sockstrerror(sockerrno));
500 bool receive_sptps_record(void *handle, uint8_t type, const char *data, uint16_t len) {
501 node_t *from = handle;
503 if(type == SPTPS_HANDSHAKE) {
504 if(!from->status.validkey) {
505 from->status.validkey = true;
506 from->status.waitingforkey = false;
507 logger(DEBUG_META, LOG_INFO, "SPTPS key exchange with %s (%s) succesful", from->name, from->hostname);
513 logger(DEBUG_ALWAYS, LOG_ERR, "Packet from %s (%s) larger than maximum supported size (%d > %d)", from->name, from->hostname, len, MTU);
519 if(type == PKT_PROBE) {
522 memcpy(inpkt.data, data, len);
523 mtu_probe_h(from, &inpkt, len);
529 if(type & ~(PKT_COMPRESSED)) {
530 logger(DEBUG_ALWAYS, LOG_ERR, "Unexpected SPTPS record type %d len %d from %s (%s)", type, len, from->name, from->hostname);
534 if(type & PKT_COMPRESSED) {
535 uint16_t ulen = uncompress_packet(inpkt.data, (const uint8_t *)data, len, from->incompression);
541 if(inpkt.len > MAXSIZE)
544 memcpy(inpkt.data, data, len);
548 receive_packet(from, &inpkt);
553 send a packet to the given vpn ip.
555 void send_packet(node_t *n, vpn_packet_t *packet) {
558 if(n == mesh->self) {
560 n->out_bytes += packet->len;
561 // TODO: send to application
565 logger(DEBUG_TRAFFIC, LOG_ERR, "Sending packet of %d bytes to %s (%s)",
566 packet->len, n->name, n->hostname);
568 if(!n->status.reachable) {
569 logger(DEBUG_TRAFFIC, LOG_INFO, "Node %s (%s) is not reachable",
570 n->name, n->hostname);
575 n->out_bytes += packet->len;
577 send_sptps_packet(n, packet);
581 /* Broadcast a packet using the minimum spanning tree */
583 void broadcast_packet(const node_t *from, vpn_packet_t *packet) {
584 // Always give ourself a copy of the packet.
585 if(from != mesh->self)
586 send_packet(mesh->self, packet);
588 logger(DEBUG_TRAFFIC, LOG_INFO, "Broadcasting packet of %d bytes from %s (%s)",
589 packet->len, from->name, from->hostname);
591 for list_each(connection_t, c, mesh->connections)
592 if(c->status.active && c->status.mst && c != from->nexthop->connection)
593 send_packet(c->node, packet);
596 static node_t *try_harder(const sockaddr_t *from, const vpn_packet_t *pkt) {
599 static time_t last_hard_try = 0;
601 for splay_each(edge_t, e, mesh->edges) {
602 if(!e->to->status.reachable || e->to == mesh->self)
605 if(sockaddrcmp_noport(from, &e->address)) {
606 if(last_hard_try == now.tv_sec)
611 if(!try_mac(e->to, pkt))
619 last_hard_try = now.tv_sec;
621 last_hard_try = now.tv_sec;
625 void handle_incoming_vpn_data(void *data, int flags) {
626 listen_socket_t *ls = data;
629 sockaddr_t from = {{0}};
630 socklen_t fromlen = sizeof from;
634 len = recvfrom(ls->udp.fd, pkt.data, MAXSIZE, 0, &from.sa, &fromlen);
636 if(len <= 0 || len > MAXSIZE) {
637 if(!sockwouldblock(sockerrno))
638 logger(DEBUG_ALWAYS, LOG_ERR, "Receiving packet failed: %s", sockstrerror(sockerrno));
644 sockaddrunmap(&from); /* Some braindead IPv6 implementations do stupid things. */
646 n = lookup_node_udp(&from);
649 n = try_harder(&from, &pkt);
651 update_node_udp(n, &from);
652 else if(debug_level >= DEBUG_PROTOCOL) {
653 hostname = sockaddr2hostname(&from);
654 logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet from unknown source %s", hostname);
662 n->sock = ls - mesh->listen_socket;
664 receive_udppacket(n, &pkt);