]> git.meshlink.io Git - meshlink/blob - src/net_packet.c
Remove legacy Ethernet header from vpn_packet_t, add a flag for PMTU probes.
[meshlink] / src / net_packet.c
1 /*
2     net_packet.c -- Handles in- and outgoing VPN packets
3     Copyright (C) 2014 Guus Sliepen <guus@meshlink.io>
4
5     This program is free software; you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation; either version 2 of the License, or
8     (at your option) any later version.
9
10     This program is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14
15     You should have received a copy of the GNU General Public License along
16     with this program; if not, write to the Free Software Foundation, Inc.,
17     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #include "system.h"
21
22 #ifdef HAVE_ZLIB
23 #include <zlib.h>
24 #endif
25
26 #include "cipher.h"
27 #include "conf.h"
28 #include "connection.h"
29 #include "crypto.h"
30 #include "digest.h"
31 #include "graph.h"
32 #include "logger.h"
33 #include "net.h"
34 #include "netutl.h"
35 #include "protocol.h"
36 #include "route.h"
37 #include "utils.h"
38 #include "xalloc.h"
39
40 int keylifetime = 0;
41
42 static void send_udppacket(node_t *, vpn_packet_t *);
43
44 unsigned replaywin = 16;
45 bool localdiscovery = false;
46 sockaddr_t localdiscovery_address;
47
48 #define MAX_SEQNO 1073741824
49
50 /* mtuprobes == 1..30: initial discovery, send bursts with 1 second interval
51    mtuprobes ==    31: sleep pinginterval seconds
52    mtuprobes ==    32: send 1 burst, sleep pingtimeout second
53    mtuprobes ==    33: no response from other side, restart PMTU discovery process
54
55    Probes are sent in batches of at least three, with random sizes between the
56    lower and upper boundaries for the MTU thus far discovered.
57
58    After the initial discovery, a fourth packet is added to each batch with a
59    size larger than the currently known PMTU, to test if the PMTU has increased.
60
61    In case local discovery is enabled, another packet is added to each batch,
62    which will be broadcast to the local network.
63
64 */
65
66 static void send_mtu_probe_handler(void *data) {
67         node_t *n = data;
68         int timeout = 1;
69
70         n->mtuprobes++;
71
72         if(!n->status.reachable || !n->status.validkey) {
73                 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send MTU probe to unreachable or rekeying node %s (%s)", n->name, n->hostname);
74                 n->mtuprobes = 0;
75                 return;
76         }
77
78         if(n->mtuprobes > 32) {
79                 if(!n->minmtu) {
80                         n->mtuprobes = 31;
81                         timeout = pinginterval;
82                         goto end;
83                 }
84
85                 logger(DEBUG_TRAFFIC, LOG_INFO, "%s (%s) did not respond to UDP ping, restarting PMTU discovery", n->name, n->hostname);
86                 n->status.udp_confirmed = false;
87                 n->mtuprobes = 1;
88                 n->minmtu = 0;
89                 n->maxmtu = MTU;
90         }
91
92         if(n->mtuprobes >= 10 && n->mtuprobes < 32 && !n->minmtu) {
93                 logger(DEBUG_TRAFFIC, LOG_INFO, "No response to MTU probes from %s (%s)", n->name, n->hostname);
94                 n->mtuprobes = 31;
95         }
96
97         if(n->mtuprobes == 30 || (n->mtuprobes < 30 && n->minmtu >= n->maxmtu)) {
98                 if(n->minmtu > n->maxmtu)
99                         n->minmtu = n->maxmtu;
100                 else
101                         n->maxmtu = n->minmtu;
102                 n->mtu = n->minmtu;
103                 logger(DEBUG_TRAFFIC, LOG_INFO, "Fixing MTU of %s (%s) to %d after %d probes", n->name, n->hostname, n->mtu, n->mtuprobes);
104                 n->mtuprobes = 31;
105         }
106
107         if(n->mtuprobes == 31) {
108                 timeout = pinginterval;
109                 goto end;
110         } else if(n->mtuprobes == 32) {
111                 timeout = pingtimeout;
112         }
113
114         for(int i = 0; i < 4 + localdiscovery; i++) {
115                 int len;
116
117                 if(i == 0) {
118                         if(n->mtuprobes < 30 || n->maxmtu + 8 >= MTU)
119                                 continue;
120                         len = n->maxmtu + 8;
121                 } else if(n->maxmtu <= n->minmtu) {
122                         len = n->maxmtu;
123                 } else {
124                         len = n->minmtu + 1 + rand() % (n->maxmtu - n->minmtu);
125                 }
126
127                 if(len < 64)
128                         len = 64;
129
130                 vpn_packet_t packet;
131                 memset(packet.data, 0, 14);
132                 randomize(packet.data + 14, len - 14);
133                 packet.len = len;
134                 n->status.broadcast = i >= 4 && n->mtuprobes <= 10 && n->prevedge;
135
136                 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending MTU probe length %d to %s (%s)", len, n->name, n->hostname);
137
138                 send_udppacket(n, &packet);
139         }
140
141         n->status.broadcast = false;
142         n->probe_counter = 0;
143         gettimeofday(&n->probe_time, NULL);
144
145         /* Calculate the packet loss of incoming traffic by comparing the rate of
146            packets received to the rate with which the sequence number has increased.
147          */
148
149         if(n->received > n->prev_received)
150                 n->packetloss = 1.0 - (n->received - n->prev_received) / (float)(n->received_seqno - n->prev_received_seqno);
151         else
152                 n->packetloss = n->received_seqno <= n->prev_received_seqno;
153
154         n->prev_received_seqno = n->received_seqno;
155         n->prev_received = n->received;
156
157 end:
158         timeout_set(&n->mtutimeout, &(struct timeval){timeout, rand() % 100000});
159 }
160
161 void send_mtu_probe(node_t *n) {
162         timeout_add(&n->mtutimeout, send_mtu_probe_handler, n, &(struct timeval){1, 0});
163         send_mtu_probe_handler(n);
164 }
165
166 static void mtu_probe_h(node_t *n, vpn_packet_t *packet, length_t len) {
167         logger(DEBUG_TRAFFIC, LOG_INFO, "Got MTU probe length %d from %s (%s)", packet->len, n->name, n->hostname);
168
169         if(!packet->data[0]) {
170                 /* It's a probe request, send back a reply */
171
172                 packet->data[0] = 1;
173
174                 /* Temporarily set udp_confirmed, so that the reply is sent
175                    back exactly the way it came in. */
176
177                 bool udp_confirmed = n->status.udp_confirmed;
178                 n->status.udp_confirmed = true;
179                 send_udppacket(n, packet);
180                 n->status.udp_confirmed = udp_confirmed;
181         } else {
182                 /* It's a valid reply: now we know bidirectional communication
183                    is possible using the address and socket that the reply
184                    packet used. */
185
186                 n->status.udp_confirmed = true;
187
188                 /* If we haven't established the PMTU yet, restart the discovery process. */
189
190                 if(n->mtuprobes > 30) {
191                         if (len == n->maxmtu + 8) {
192                                 logger(DEBUG_TRAFFIC, LOG_INFO, "Increase in PMTU to %s (%s) detected, restarting PMTU discovery", n->name, n->hostname);
193                                 n->maxmtu = MTU;
194                                 n->mtuprobes = 10;
195                                 return;
196                         }
197
198                         if(n->minmtu)
199                                 n->mtuprobes = 30;
200                         else
201                                 n->mtuprobes = 1;
202                 }
203
204                 /* If applicable, raise the minimum supported MTU */
205
206                 if(len > n->maxmtu)
207                         len = n->maxmtu;
208                 if(n->minmtu < len)
209                         n->minmtu = len;
210
211                 /* Calculate RTT and bandwidth.
212                    The RTT is the time between the MTU probe burst was sent and the first
213                    reply is received. The bandwidth is measured using the time between the
214                    arrival of the first and third probe reply.
215                  */
216
217                 struct timeval now, diff;
218                 gettimeofday(&now, NULL);
219                 timersub(&now, &n->probe_time, &diff);
220                 
221                 n->probe_counter++;
222
223                 if(n->probe_counter == 1) {
224                         n->rtt = diff.tv_sec + diff.tv_usec * 1e-6;
225                         n->probe_time = now;
226                 } else if(n->probe_counter == 3) {
227                         n->bandwidth = 2.0 * len / (diff.tv_sec + diff.tv_usec * 1e-6);
228                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "%s (%s) RTT %.2f ms, burst bandwidth %.3f Mbit/s, rx packet loss %.2f %%", n->name, n->hostname, n->rtt * 1e3, n->bandwidth * 8e-6, n->packetloss * 1e2);
229                 }
230         }
231 }
232
233 static length_t compress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
234         if(level == 0) {
235                 memcpy(dest, source, len);
236                 return len;
237         } else if(level == 10) {
238                 return -1;
239         } else if(level < 10) {
240 #ifdef HAVE_ZLIB
241                 unsigned long destlen = MAXSIZE;
242                 if(compress2(dest, &destlen, source, len, level) == Z_OK)
243                         return destlen;
244                 else
245 #endif
246                         return -1;
247         } else {
248                 return -1;
249         }
250
251         return -1;
252 }
253
254 static length_t uncompress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
255         if(level == 0) {
256                 memcpy(dest, source, len);
257                 return len;
258         } else if(level > 9) {
259                         return -1;
260         }
261 #ifdef HAVE_ZLIB
262         else {
263                 unsigned long destlen = MAXSIZE;
264                 if(uncompress(dest, &destlen, source, len) == Z_OK)
265                         return destlen;
266                 else
267                         return -1;
268         }
269 #endif
270
271         return -1;
272 }
273
274 /* VPN packet I/O */
275
276 static void receive_packet(node_t *n, vpn_packet_t *packet) {
277         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Received packet of %d bytes from %s (%s)",
278                            packet->len, n->name, n->hostname);
279
280         n->in_packets++;
281         n->in_bytes += packet->len;
282
283         route(n, packet);
284 }
285
286 static bool try_mac(node_t *n, const vpn_packet_t *inpkt) {
287         return sptps_verify_datagram(&n->sptps, inpkt->data, inpkt->len);
288 }
289
290 static void receive_udppacket(node_t *n, vpn_packet_t *inpkt) {
291         if(!n->sptps.state) {
292                 if(!n->status.waitingforkey) {
293                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but we haven't exchanged keys yet", n->name, n->hostname);
294                         send_req_key(n);
295                 } else {
296                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
297                 }
298                 return;
299         }
300         sptps_receive_data(&n->sptps, inpkt->data, inpkt->len);
301 }
302
303 void receive_tcppacket(connection_t *c, const char *buffer, int len) {
304         vpn_packet_t outpkt;
305
306         if(len > sizeof outpkt.data)
307                 return;
308
309         outpkt.len = len;
310         outpkt.tcp = true;
311         memcpy(outpkt.data, buffer, len);
312
313         receive_packet(c->node, &outpkt);
314 }
315
316 static void send_sptps_packet(node_t *n, vpn_packet_t *origpkt) {
317         if(!n->status.validkey) {
318                 logger(DEBUG_TRAFFIC, LOG_INFO, "No valid key known yet for %s (%s)", n->name, n->hostname);
319                 if(!n->status.waitingforkey)
320                         send_req_key(n);
321                 else if(n->last_req_key + 10 < now.tv_sec) {
322                         logger(DEBUG_ALWAYS, LOG_DEBUG, "No key from %s after 10 seconds, restarting SPTPS", n->name);
323                         sptps_stop(&n->sptps);
324                         n->status.waitingforkey = false;
325                         send_req_key(n);
326                 }
327                 return;
328         }
329
330         uint8_t type = 0;
331
332         // If it's a probe, send it immediately without trying to compress it.
333         if(origpkt->probe) {
334                 sptps_send_record(&n->sptps, PKT_PROBE, origpkt->data, origpkt->len);
335                 return;
336         }
337
338         vpn_packet_t outpkt;
339
340         if(n->outcompression) {
341                 int len = compress_packet(outpkt.data, origpkt->data, origpkt->len, n->outcompression);
342                 if(len < 0) {
343                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)", n->name, n->hostname);
344                 } else if(len < origpkt->len) {
345                         outpkt.len = len;
346                         origpkt = &outpkt;
347                         type |= PKT_COMPRESSED;
348                 }
349         }
350
351         sptps_send_record(&n->sptps, type, origpkt->data, origpkt->len);
352         return;
353 }
354
355 static void choose_udp_address(const node_t *n, const sockaddr_t **sa, int *sock) {
356         /* Latest guess */
357         *sa = &n->address;
358         *sock = n->sock;
359
360         /* If the UDP address is confirmed, use it. */
361         if(n->status.udp_confirmed)
362                 return;
363
364         /* Send every third packet to n->address; that could be set
365            to the node's reflexive UDP address discovered during key
366            exchange. */
367
368         static int x = 0;
369         if(++x >= 3) {
370                 x = 0;
371                 return;
372         }
373
374         /* Otherwise, address are found in edges to this node.
375            So we pick a random edge and a random socket. */
376
377         int i = 0;
378         int j = rand() % n->edge_tree->count;
379         edge_t *candidate = NULL;
380
381         for splay_each(edge_t, e, n->edge_tree) {
382                 if(i++ == j) {
383                         candidate = e->reverse;
384                         break;
385                 }
386         }
387
388         if(candidate) {
389                 *sa = &candidate->address;
390                 *sock = rand() % listen_sockets;
391         }
392
393         /* Make sure we have a suitable socket for the chosen address */
394         if(listen_socket[*sock].sa.sa.sa_family != (*sa)->sa.sa_family) {
395                 for(int i = 0; i < listen_sockets; i++) {
396                         if(listen_socket[i].sa.sa.sa_family == (*sa)->sa.sa_family) {
397                                 *sock = i;
398                                 break;
399                         }
400                 }
401         }
402 }
403
404 static void choose_broadcast_address(const node_t *n, const sockaddr_t **sa, int *sock) {
405         static sockaddr_t broadcast_ipv4 = {
406                 .in = {
407                         .sin_family = AF_INET,
408                         .sin_addr.s_addr = -1,
409                 }
410         };
411
412         static sockaddr_t broadcast_ipv6 = {
413                 .in6 = {
414                         .sin6_family = AF_INET6,
415                         .sin6_addr.s6_addr[0x0] = 0xff,
416                         .sin6_addr.s6_addr[0x1] = 0x02,
417                         .sin6_addr.s6_addr[0xf] = 0x01,
418                 }
419         };
420
421         *sock = rand() % listen_sockets;
422
423         if(listen_socket[*sock].sa.sa.sa_family == AF_INET6) {
424                 if(localdiscovery_address.sa.sa_family == AF_INET6) {
425                         localdiscovery_address.in6.sin6_port = n->prevedge->address.in.sin_port;
426                         *sa = &localdiscovery_address;
427                 } else {
428                         broadcast_ipv6.in6.sin6_port = n->prevedge->address.in.sin_port;
429                         broadcast_ipv6.in6.sin6_scope_id = listen_socket[*sock].sa.in6.sin6_scope_id;
430                         *sa = &broadcast_ipv6;
431                 }
432         } else {
433                 if(localdiscovery_address.sa.sa_family == AF_INET) {
434                         localdiscovery_address.in.sin_port = n->prevedge->address.in.sin_port;
435                         *sa = &localdiscovery_address;
436                 } else {
437                         broadcast_ipv4.in.sin_port = n->prevedge->address.in.sin_port;
438                         *sa = &broadcast_ipv4;
439                 }
440         }
441 }
442
443 static void send_udppacket(node_t *n, vpn_packet_t *origpkt) {
444         vpn_packet_t pkt1, pkt2;
445         vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
446         vpn_packet_t *inpkt = origpkt;
447         int nextpkt = 0;
448         vpn_packet_t *outpkt;
449         int origlen = origpkt->len;
450         size_t outlen;
451
452         if(!n->status.reachable) {
453                 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send UDP packet to unreachable node %s (%s)", n->name, n->hostname);
454                 return;
455         }
456
457         return send_sptps_packet(n, origpkt);
458 }
459
460 bool send_sptps_data(void *handle, uint8_t type, const char *data, size_t len) {
461         node_t *to = handle;
462
463         /* Send it via TCP if it is a handshake packet, TCPOnly is in use, or this packet is larger than the MTU. */
464
465         if(type >= SPTPS_HANDSHAKE || ((myself->options | to->options) & OPTION_TCPONLY) || (type != PKT_PROBE && len > to->minmtu)) {
466                 char buf[len * 4 / 3 + 5];
467                 b64encode(data, buf, len);
468                 /* If no valid key is known yet, send the packets using ANS_KEY requests,
469                    to ensure we get to learn the reflexive UDP address. */
470                 if(!to->status.validkey) {
471                         to->incompression = myself->incompression;
472                         return send_request(to->nexthop->connection, "%d %s %s %s -1 -1 -1 %d", ANS_KEY, myself->name, to->name, buf, to->incompression);
473                 } else {
474                         return send_request(to->nexthop->connection, "%d %s %s %d %s", REQ_KEY, myself->name, to->name, REQ_SPTPS, buf);
475                 }
476         }
477
478         /* Otherwise, send the packet via UDP */
479
480         const sockaddr_t *sa;
481         int sock;
482
483         if(to->status.broadcast)
484                 choose_broadcast_address(to, &sa, &sock);
485         else
486                 choose_udp_address(to, &sa, &sock);
487
488         if(sendto(listen_socket[sock].udp.fd, data, len, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
489                 if(sockmsgsize(sockerrno)) {
490                         if(to->maxmtu >= len)
491                                 to->maxmtu = len - 1;
492                         if(to->mtu >= len)
493                                 to->mtu = len - 1;
494                 } else {
495                         logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending UDP SPTPS packet to %s (%s): %s", to->name, to->hostname, sockstrerror(sockerrno));
496                         return false;
497                 }
498         }
499
500         return true;
501 }
502
503 bool receive_sptps_record(void *handle, uint8_t type, const char *data, uint16_t len) {
504         node_t *from = handle;
505
506         if(type == SPTPS_HANDSHAKE) {
507                 if(!from->status.validkey) {
508                         from->status.validkey = true;
509                         from->status.waitingforkey = false;
510                         logger(DEBUG_META, LOG_INFO, "SPTPS key exchange with %s (%s) succesful", from->name, from->hostname);
511                 }
512                 return true;
513         }
514
515         if(len > MTU) {
516                 logger(DEBUG_ALWAYS, LOG_ERR, "Packet from %s (%s) larger than maximum supported size (%d > %d)", from->name, from->hostname, len, MTU);
517                 return false;
518         }
519
520         vpn_packet_t inpkt;
521
522         if(type == PKT_PROBE) {
523                 inpkt.len = len;
524                 inpkt.probe = true;
525                 memcpy(inpkt.data, data, len);
526                 mtu_probe_h(from, &inpkt, len);
527                 return true;
528         } else {
529                 inpkt.probe = false;
530         }
531
532         if(type & ~(PKT_COMPRESSED)) {
533                 logger(DEBUG_ALWAYS, LOG_ERR, "Unexpected SPTPS record type %d len %d from %s (%s)", type, len, from->name, from->hostname);
534                 return false;
535         }
536
537         if(type & PKT_COMPRESSED) {
538                 length_t ulen = uncompress_packet(inpkt.data, (const uint8_t *)data, len, from->incompression);
539                 if(ulen < 0) {
540                         return false;
541                 } else {
542                         inpkt.len = ulen;
543                 }
544                 if(inpkt.len > MAXSIZE)
545                         abort();
546         } else {
547                 memcpy(inpkt.data, data, len);
548                 inpkt.len = len;
549         }
550
551         receive_packet(from, &inpkt);
552         return true;
553 }
554
555 /*
556   send a packet to the given vpn ip.
557 */
558 void send_packet(node_t *n, vpn_packet_t *packet) {
559         node_t *via;
560
561         if(n == myself) {
562                 n->out_packets++;
563                 n->out_bytes += packet->len;
564                 // TODO: send to application
565                 return;
566         }
567
568         logger(DEBUG_TRAFFIC, LOG_ERR, "Sending packet of %d bytes to %s (%s)",
569                            packet->len, n->name, n->hostname);
570
571         if(!n->status.reachable) {
572                 logger(DEBUG_TRAFFIC, LOG_INFO, "Node %s (%s) is not reachable",
573                                    n->name, n->hostname);
574                 return;
575         }
576
577         n->out_packets++;
578         n->out_bytes += packet->len;
579
580         send_sptps_packet(n, packet);
581         return;
582 }
583
584 /* Broadcast a packet using the minimum spanning tree */
585
586 void broadcast_packet(const node_t *from, vpn_packet_t *packet) {
587         // Always give ourself a copy of the packet.
588         if(from != myself)
589                 send_packet(myself, packet);
590
591         logger(DEBUG_TRAFFIC, LOG_INFO, "Broadcasting packet of %d bytes from %s (%s)",
592                            packet->len, from->name, from->hostname);
593
594         for list_each(connection_t, c, connection_list)
595                 if(c->status.active && c->status.mst && c != from->nexthop->connection)
596                         send_packet(c->node, packet);
597 }
598
599 static node_t *try_harder(const sockaddr_t *from, const vpn_packet_t *pkt) {
600         node_t *n = NULL;
601         bool hard = false;
602         static time_t last_hard_try = 0;
603
604         for splay_each(edge_t, e, edge_weight_tree) {
605                 if(!e->to->status.reachable || e->to == myself)
606                         continue;
607
608                 if(sockaddrcmp_noport(from, &e->address)) {
609                         if(last_hard_try == now.tv_sec)
610                                 continue;
611                         hard = true;
612                 }
613
614                 if(!try_mac(e->to, pkt))
615                         continue;
616
617                 n = e->to;
618                 break;
619         }
620
621         if(hard)
622                 last_hard_try = now.tv_sec;
623
624         last_hard_try = now.tv_sec;
625         return n;
626 }
627
628 void handle_incoming_vpn_data(void *data, int flags) {
629         listen_socket_t *ls = data;
630         vpn_packet_t pkt;
631         char *hostname;
632         sockaddr_t from = {{0}};
633         socklen_t fromlen = sizeof from;
634         node_t *n;
635         int len;
636
637         len = recvfrom(ls->udp.fd, pkt.data, MAXSIZE, 0, &from.sa, &fromlen);
638
639         if(len <= 0 || len > MAXSIZE) {
640                 if(!sockwouldblock(sockerrno))
641                         logger(DEBUG_ALWAYS, LOG_ERR, "Receiving packet failed: %s", sockstrerror(sockerrno));
642                 return;
643         }
644
645         pkt.len = len;
646
647         sockaddrunmap(&from); /* Some braindead IPv6 implementations do stupid things. */
648
649         n = lookup_node_udp(&from);
650
651         if(!n) {
652                 n = try_harder(&from, &pkt);
653                 if(n)
654                         update_node_udp(n, &from);
655                 else if(debug_level >= DEBUG_PROTOCOL) {
656                         hostname = sockaddr2hostname(&from);
657                         logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet from unknown source %s", hostname);
658                         free(hostname);
659                         return;
660                 }
661                 else
662                         return;
663         }
664
665         n->sock = ls - listen_socket;
666
667         receive_udppacket(n, &pkt);
668 }