2 net_packet.c -- Handles in- and outgoing VPN packets
3 Copyright (C) 2014 Guus Sliepen <guus@meshlink.io>
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License along
16 with this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 #include "connection.h"
31 #include "meshlink_internal.h"
41 static void send_udppacket(meshlink_handle_t *mesh, node_t *, vpn_packet_t *);
43 unsigned replaywin = 16;
45 #define MAX_SEQNO 1073741824
47 /* mtuprobes == 1..30: initial discovery, send bursts with 1 second interval
48 mtuprobes == 31: sleep pinginterval seconds
49 mtuprobes == 32: send 1 burst, sleep pingtimeout second
50 mtuprobes == 33: no response from other side, restart PMTU discovery process
52 Probes are sent in batches of at least three, with random sizes between the
53 lower and upper boundaries for the MTU thus far discovered.
55 After the initial discovery, a fourth packet is added to each batch with a
56 size larger than the currently known PMTU, to test if the PMTU has increased.
58 In case local discovery is enabled, another packet is added to each batch,
59 which will be broadcast to the local network.
63 static void send_mtu_probe_handler(event_loop_t *loop, void *data) {
64 meshlink_handle_t *mesh = loop->data;
70 if(!n->status.reachable || !n->status.validkey) {
71 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send MTU probe to unreachable or rekeying node %s (%s)", n->name, n->hostname);
76 if(n->mtuprobes > 32) {
79 timeout = mesh->pinginterval;
83 logger(DEBUG_TRAFFIC, LOG_INFO, "%s (%s) did not respond to UDP ping, restarting PMTU discovery", n->name, n->hostname);
84 n->status.udp_confirmed = false;
90 if(n->mtuprobes >= 10 && n->mtuprobes < 32 && !n->minmtu) {
91 logger(DEBUG_TRAFFIC, LOG_INFO, "No response to MTU probes from %s (%s)", n->name, n->hostname);
95 if(n->mtuprobes == 30 || (n->mtuprobes < 30 && n->minmtu >= n->maxmtu)) {
96 if(n->minmtu > n->maxmtu)
97 n->minmtu = n->maxmtu;
99 n->maxmtu = n->minmtu;
101 logger(DEBUG_TRAFFIC, LOG_INFO, "Fixing MTU of %s (%s) to %d after %d probes", n->name, n->hostname, n->mtu, n->mtuprobes);
105 if(n->mtuprobes == 31) {
106 timeout = mesh->pinginterval;
108 } else if(n->mtuprobes == 32) {
109 timeout = mesh->pingtimeout;
112 for(int i = 0; i < 4 + mesh->localdiscovery; i++) {
116 if(n->mtuprobes < 30 || n->maxmtu + 8 >= MTU)
119 } else if(n->maxmtu <= n->minmtu) {
122 len = n->minmtu + 1 + rand() % (n->maxmtu - n->minmtu);
130 memset(packet.data, 0, 14);
131 randomize(packet.data + 14, len - 14);
133 n->status.broadcast = i >= 4 && n->mtuprobes <= 10 && n->prevedge;
135 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending MTU probe length %d to %s (%s)", len, n->name, n->hostname);
137 send_udppacket(mesh, n, &packet);
140 n->status.broadcast = false;
141 n->probe_counter = 0;
142 gettimeofday(&n->probe_time, NULL);
144 /* Calculate the packet loss of incoming traffic by comparing the rate of
145 packets received to the rate with which the sequence number has increased.
148 if(n->received > n->prev_received)
149 n->packetloss = 1.0 - (n->received - n->prev_received) / (float)(n->received_seqno - n->prev_received_seqno);
151 n->packetloss = n->received_seqno <= n->prev_received_seqno;
153 n->prev_received_seqno = n->received_seqno;
154 n->prev_received = n->received;
157 timeout_set(&mesh->loop, &n->mtutimeout, &(struct timeval){timeout, rand() % 100000});
160 void send_mtu_probe(meshlink_handle_t *mesh, node_t *n) {
161 timeout_add(&mesh->loop, &n->mtutimeout, send_mtu_probe_handler, n, &(struct timeval){1, 0});
162 send_mtu_probe_handler(&mesh->loop, n);
165 static void mtu_probe_h(meshlink_handle_t *mesh, node_t *n, vpn_packet_t *packet, uint16_t len) {
166 logger(DEBUG_TRAFFIC, LOG_INFO, "Got MTU probe length %d from %s (%s)", packet->len, n->name, n->hostname);
168 if(!packet->data[0]) {
169 /* It's a probe request, send back a reply */
173 /* Temporarily set udp_confirmed, so that the reply is sent
174 back exactly the way it came in. */
176 bool udp_confirmed = n->status.udp_confirmed;
177 n->status.udp_confirmed = true;
178 send_udppacket(mesh, n, packet);
179 n->status.udp_confirmed = udp_confirmed;
181 /* It's a valid reply: now we know bidirectional communication
182 is possible using the address and socket that the reply
185 n->status.udp_confirmed = true;
187 /* If we haven't established the PMTU yet, restart the discovery process. */
189 if(n->mtuprobes > 30) {
190 if (len == n->maxmtu + 8) {
191 logger(DEBUG_TRAFFIC, LOG_INFO, "Increase in PMTU to %s (%s) detected, restarting PMTU discovery", n->name, n->hostname);
203 /* If applicable, raise the minimum supported MTU */
210 /* Calculate RTT and bandwidth.
211 The RTT is the time between the MTU probe burst was sent and the first
212 reply is received. The bandwidth is measured using the time between the
213 arrival of the first and third probe reply.
216 struct timeval now, diff;
217 gettimeofday(&now, NULL);
218 timersub(&now, &n->probe_time, &diff);
222 if(n->probe_counter == 1) {
223 n->rtt = diff.tv_sec + diff.tv_usec * 1e-6;
225 } else if(n->probe_counter == 3) {
226 n->bandwidth = 2.0 * len / (diff.tv_sec + diff.tv_usec * 1e-6);
227 logger(DEBUG_TRAFFIC, LOG_DEBUG, "%s (%s) RTT %.2f ms, burst bandwidth %.3f Mbit/s, rx packet loss %.2f %%", n->name, n->hostname, n->rtt * 1e3, n->bandwidth * 8e-6, n->packetloss * 1e2);
232 static uint16_t compress_packet(uint8_t *dest, const uint8_t *source, uint16_t len, int level) {
234 memcpy(dest, source, len);
236 } else if(level == 10) {
238 } else if(level < 10) {
240 unsigned long destlen = MAXSIZE;
241 if(compress2(dest, &destlen, source, len, level) == Z_OK)
253 static uint16_t uncompress_packet(uint8_t *dest, const uint8_t *source, uint16_t len, int level) {
255 memcpy(dest, source, len);
257 } else if(level > 9) {
262 unsigned long destlen = MAXSIZE;
263 if(uncompress(dest, &destlen, source, len) == Z_OK)
275 static void receive_packet(meshlink_handle_t *mesh, node_t *n, vpn_packet_t *packet) {
276 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Received packet of %d bytes from %s (%s)",
277 packet->len, n->name, n->hostname);
279 if (n->status.blacklisted) {
280 logger(DEBUG_PROTOCOL, LOG_WARNING, "Dropping packet from blacklisted node %s", n->name);
283 n->in_bytes += packet->len;
285 route(mesh, n, packet);
289 static bool try_mac(meshlink_handle_t *mesh, node_t *n, const vpn_packet_t *inpkt) {
290 return sptps_verify_datagram(&n->sptps, inpkt->data, inpkt->len);
293 static void receive_udppacket(meshlink_handle_t *mesh, node_t *n, vpn_packet_t *inpkt) {
294 if(!n->sptps.state) {
295 if(!n->status.waitingforkey) {
296 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but we haven't exchanged keys yet", n->name, n->hostname);
297 send_req_key(mesh, n);
299 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
303 sptps_receive_data(&n->sptps, inpkt->data, inpkt->len);
306 void receive_tcppacket(meshlink_handle_t *mesh, connection_t *c, const char *buffer, int len) {
309 if(len > sizeof outpkt.data)
314 memcpy(outpkt.data, buffer, len);
316 receive_packet(mesh, c->node, &outpkt);
319 static void send_sptps_packet(meshlink_handle_t *mesh, node_t *n, vpn_packet_t *origpkt) {
320 if(!n->status.validkey) {
321 logger(DEBUG_TRAFFIC, LOG_INFO, "No valid key known yet for %s (%s)", n->name, n->hostname);
322 if(!n->status.waitingforkey)
323 send_req_key(mesh, n);
324 else if(n->last_req_key + 10 < mesh->loop.now.tv_sec) {
325 logger(DEBUG_ALWAYS, LOG_DEBUG, "No key from %s after 10 seconds, restarting SPTPS", n->name);
326 sptps_stop(&n->sptps);
327 n->status.waitingforkey = false;
328 send_req_key(mesh, n);
335 // If it's a probe, send it immediately without trying to compress it.
337 sptps_send_record(&n->sptps, PKT_PROBE, origpkt->data, origpkt->len);
343 if(n->outcompression) {
344 int len = compress_packet(outpkt.data, origpkt->data, origpkt->len, n->outcompression);
346 logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)", n->name, n->hostname);
347 } else if(len < origpkt->len) {
350 type |= PKT_COMPRESSED;
354 sptps_send_record(&n->sptps, type, origpkt->data, origpkt->len);
358 static void choose_udp_address(meshlink_handle_t *mesh, const node_t *n, const sockaddr_t **sa, int *sock) {
363 /* If the UDP address is confirmed, use it. */
364 if(n->status.udp_confirmed)
367 /* Send every third packet to n->address; that could be set
368 to the node's reflexive UDP address discovered during key
377 /* Otherwise, address are found in edges to this node.
378 So we pick a random edge and a random socket. */
381 int j = rand() % n->edge_tree->count;
382 edge_t *candidate = NULL;
384 for splay_each(edge_t, e, n->edge_tree) {
386 candidate = e->reverse;
392 *sa = &candidate->address;
393 *sock = rand() % mesh->listen_sockets;
396 /* Make sure we have a suitable socket for the chosen address */
397 if(mesh->listen_socket[*sock].sa.sa.sa_family != (*sa)->sa.sa_family) {
398 for(int i = 0; i < mesh->listen_sockets; i++) {
399 if(mesh->listen_socket[i].sa.sa.sa_family == (*sa)->sa.sa_family) {
407 static void choose_broadcast_address(meshlink_handle_t *mesh, const node_t *n, const sockaddr_t **sa, int *sock) {
408 static sockaddr_t broadcast_ipv4 = {
410 .sin_family = AF_INET,
411 .sin_addr.s_addr = -1,
415 static sockaddr_t broadcast_ipv6 = {
417 .sin6_family = AF_INET6,
418 .sin6_addr.s6_addr[0x0] = 0xff,
419 .sin6_addr.s6_addr[0x1] = 0x02,
420 .sin6_addr.s6_addr[0xf] = 0x01,
424 *sock = rand() % mesh->listen_sockets;
426 if(mesh->listen_socket[*sock].sa.sa.sa_family == AF_INET6) {
427 if(mesh->localdiscovery_address.sa.sa_family == AF_INET6) {
428 mesh->localdiscovery_address.in6.sin6_port = n->prevedge->address.in.sin_port;
429 *sa = &mesh->localdiscovery_address;
431 broadcast_ipv6.in6.sin6_port = n->prevedge->address.in.sin_port;
432 broadcast_ipv6.in6.sin6_scope_id = mesh->listen_socket[*sock].sa.in6.sin6_scope_id;
433 *sa = &broadcast_ipv6;
436 if(mesh->localdiscovery_address.sa.sa_family == AF_INET) {
437 mesh->localdiscovery_address.in.sin_port = n->prevedge->address.in.sin_port;
438 *sa = &mesh->localdiscovery_address;
440 broadcast_ipv4.in.sin_port = n->prevedge->address.in.sin_port;
441 *sa = &broadcast_ipv4;
446 static void send_udppacket(meshlink_handle_t *mesh, node_t *n, vpn_packet_t *origpkt) {
447 if(!n->status.reachable) {
448 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send UDP packet to unreachable node %s (%s)", n->name, n->hostname);
452 return send_sptps_packet(mesh, n, origpkt);
455 bool send_sptps_data(void *handle, uint8_t type, const void *data, size_t len) {
457 meshlink_handle_t *mesh = to->mesh;
459 /* Send it via TCP if it is a handshake packet, TCPOnly is in use, or this packet is larger than the MTU. */
461 if(type >= SPTPS_HANDSHAKE || ((mesh->self->options | to->options) & OPTION_TCPONLY) || (type != PKT_PROBE && len > to->minmtu)) {
462 char buf[len * 4 / 3 + 5];
463 b64encode(data, buf, len);
464 /* If no valid key is known yet, send the packets using ANS_KEY requests,
465 to ensure we get to learn the reflexive UDP address. */
466 if(!to->status.validkey) {
467 to->incompression = mesh->self->incompression;
468 return send_request(mesh, to->nexthop->connection, "%d %s %s %s -1 -1 -1 %d", ANS_KEY, mesh->self->name, to->name, buf, to->incompression);
470 return send_request(mesh, to->nexthop->connection, "%d %s %s %d %s", REQ_KEY, mesh->self->name, to->name, REQ_SPTPS, buf);
474 /* Otherwise, send the packet via UDP */
476 const sockaddr_t *sa;
479 if(to->status.broadcast)
480 choose_broadcast_address(mesh, to, &sa, &sock);
482 choose_udp_address(mesh, to, &sa, &sock);
484 if(sendto(mesh->listen_socket[sock].udp.fd, data, len, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
485 if(sockmsgsize(sockerrno)) {
486 if(to->maxmtu >= len)
487 to->maxmtu = len - 1;
491 logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending UDP SPTPS packet to %s (%s): %s", to->name, to->hostname, sockstrerror(sockerrno));
499 bool receive_sptps_record(void *handle, uint8_t type, const void *data, uint16_t len) {
500 node_t *from = handle;
501 meshlink_handle_t *mesh = from->mesh;
503 if(type == SPTPS_HANDSHAKE) {
504 if(!from->status.validkey) {
505 from->status.validkey = true;
506 from->status.waitingforkey = false;
507 logger(DEBUG_META, LOG_INFO, "SPTPS key exchange with %s (%s) succesful", from->name, from->hostname);
513 logger(DEBUG_ALWAYS, LOG_ERR, "Packet from %s (%s) larger than maximum supported size (%d > %d)", from->name, from->hostname, len, MTU);
519 if(type == PKT_PROBE) {
522 memcpy(inpkt.data, data, len);
523 mtu_probe_h(mesh, from, &inpkt, len);
529 if(type & ~(PKT_COMPRESSED)) {
530 logger(DEBUG_ALWAYS, LOG_ERR, "Unexpected SPTPS record type %d len %d from %s (%s)", type, len, from->name, from->hostname);
534 if(type & PKT_COMPRESSED) {
535 uint16_t ulen = uncompress_packet(inpkt.data, (const uint8_t *)data, len, from->incompression);
541 if(inpkt.len > MAXSIZE)
544 memcpy(inpkt.data, data, len);
548 receive_packet(mesh, from, &inpkt);
553 send a packet to the given vpn ip.
555 void send_packet(meshlink_handle_t *mesh, node_t *n, vpn_packet_t *packet) {
556 if(n == mesh->self) {
558 n->out_bytes += packet->len;
559 // TODO: send to application
563 logger(DEBUG_TRAFFIC, LOG_ERR, "Sending packet of %d bytes to %s (%s)",
564 packet->len, n->name, n->hostname);
566 if(!n->status.reachable) {
567 logger(DEBUG_TRAFFIC, LOG_INFO, "Node %s (%s) is not reachable",
568 n->name, n->hostname);
573 n->out_bytes += packet->len;
575 send_sptps_packet(mesh, n, packet);
579 /* Broadcast a packet using the minimum spanning tree */
581 void broadcast_packet(meshlink_handle_t *mesh, const node_t *from, vpn_packet_t *packet) {
582 // Always give ourself a copy of the packet.
583 if(from != mesh->self)
584 send_packet(mesh, mesh->self, packet);
586 logger(DEBUG_TRAFFIC, LOG_INFO, "Broadcasting packet of %d bytes from %s (%s)",
587 packet->len, from->name, from->hostname);
589 for list_each(connection_t, c, mesh->connections)
590 if(c->status.active && c->status.mst && c != from->nexthop->connection)
591 send_packet(mesh, c->node, packet);
594 static node_t *try_harder(meshlink_handle_t *mesh, const sockaddr_t *from, const vpn_packet_t *pkt) {
597 static time_t last_hard_try = 0;
599 for splay_each(edge_t, e, mesh->edges) {
600 if(!e->to->status.reachable || e->to == mesh->self)
603 if(sockaddrcmp_noport(from, &e->address)) {
604 if(last_hard_try == mesh->loop.now.tv_sec)
609 if(!try_mac(mesh, e->to, pkt))
617 last_hard_try = mesh->loop.now.tv_sec;
619 last_hard_try = mesh->loop.now.tv_sec;
623 void handle_incoming_vpn_data(event_loop_t *loop, void *data, int flags) {
624 meshlink_handle_t *mesh = loop->data;
625 listen_socket_t *ls = data;
628 sockaddr_t from = {{0}};
629 socklen_t fromlen = sizeof from;
633 len = recvfrom(ls->udp.fd, pkt.data, MAXSIZE, 0, &from.sa, &fromlen);
635 if(len <= 0 || len > MAXSIZE) {
636 if(!sockwouldblock(sockerrno))
637 logger(DEBUG_ALWAYS, LOG_ERR, "Receiving packet failed: %s", sockstrerror(sockerrno));
643 sockaddrunmap(&from); /* Some braindead IPv6 implementations do stupid things. */
645 n = lookup_node_udp(mesh, &from);
648 n = try_harder(mesh, &from, &pkt);
650 update_node_udp(mesh, n, &from);
651 else if(mesh->debug_level >= DEBUG_PROTOCOL) {
652 hostname = sockaddr2hostname(&from);
653 logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet from unknown source %s", hostname);
661 if (n->status.blacklisted) {
662 logger(DEBUG_PROTOCOL, LOG_WARNING, "Dropping packet from blacklisted node %s", n->name);
665 n->sock = ls - mesh->listen_socket;
667 receive_udppacket(mesh, n, &pkt);