From: Guus Sliepen Date: Sun, 26 Apr 2020 12:48:18 +0000 (+0200) Subject: Improved PMTU probe algorithm. X-Git-Url: https://git.meshlink.io/?a=commitdiff_plain;h=bc2e55e940e45748a49f1a7eb44b156917fca469;p=meshlink Improved PMTU probe algorithm. This changes the PMTU probing algorithm to: - Send frequent (10 second interval) small UDP packets (~60 bytes) to probe whether UDP is possible at all, and to keep NAT mappings alive. - Initial PMTU probing sends 1 packet every 0.333 seconds, starting with the interface MTU (which is assumed to be the most likely PMTU), then doing a bisection with up to 20 probes if that didn't work. - After the PMTU has been fixed, it sends one packet of size PMTU and one of PMTU+1, to check whether there are changes in the PMTU. Furthermore, probes are now sent on-demand where possible. Small probes are always sent every 10 second between nodes that have a meta-connection, since they need to keep their NAT mappings alive to be able to assist with UDP hole punching. But large probes, and small probes between nodes that don't share a meta-connection, are only sent if there is actual channel traffic between the nodes. A final optimization is that probe replies are now short (~60 bytes) packets that encode the length of the probe request. Before, around 9000 bytes/minute would be used for probes, with this commit only 2280 bytes/minute will be used in case there is channel traffic, otherwise only 720 bytes/minute will be used. --- diff --git a/src/Makefile.am b/src/Makefile.am index fad47bff..3003e8a4 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -67,6 +67,7 @@ libmeshlink_la_SOURCES = \ node.c node.h \ submesh.c submesh.h \ packmsg.h \ + pmtu.c pmtu.h \ prf.c prf.h \ protocol.c protocol.h \ protocol_auth.c \ diff --git a/src/graph.c b/src/graph.c index 9a2bfb19..f68cf008 100644 --- a/src/graph.c +++ b/src/graph.c @@ -162,8 +162,6 @@ static void check_reachability(meshlink_handle_t *mesh) { n->maxmtu = MTU; n->minmtu = 0; n->mtuprobes = 0; - - timeout_del(&mesh->loop, &n->mtutimeout); } if(n->status.visited != n->status.reachable) { @@ -193,7 +191,7 @@ static void check_reachability(meshlink_handle_t *mesh) { n->minmtu = 0; n->mtuprobes = 0; - timeout_del(&mesh->loop, &n->mtutimeout); + timeout_del(&mesh->loop, &n->udp_ping_timeout); if(!n->status.blacklisted) { update_node_status(mesh, n); diff --git a/src/meshlink.c b/src/meshlink.c index e70a6169..f92f0098 100644 --- a/src/meshlink.c +++ b/src/meshlink.c @@ -2200,7 +2200,6 @@ static bool prepare_packet(meshlink_handle_t *mesh, meshlink_node_t *destination // Prepare the packet packet->probe = false; - packet->tcp = false; packet->len = len + sizeof(*hdr); hdr = (meshlink_packethdr_t *)packet->data; @@ -4038,8 +4037,8 @@ static void channel_retransmit(struct utcp_connection *utcp_connection) { node_t *n = utcp_connection->utcp->priv; meshlink_handle_t *mesh = n->mesh; - if(n->mtuprobes == 31 && n->mtutimeout.cb) { - timeout_set(&mesh->loop, &n->mtutimeout, &(struct timespec) { + if(n->mtuprobes == -1 && n->udp_ping_timeout.cb) { + timeout_set(&mesh->loop, &n->udp_ping_timeout, &(struct timespec) { 0, 0 }); } diff --git a/src/net.c b/src/net.c index 35cfc650..41c82d36 100644 --- a/src/net.c +++ b/src/net.c @@ -29,6 +29,7 @@ #include "meta.h" #include "net.h" #include "netutl.h" +#include "pmtu.h" #include "protocol.h" #include "sptps.h" #include "xalloc.h" @@ -128,6 +129,8 @@ static void timeout_handler(event_loop_t *loop, void *data) { if(c->node->status.waitingforkey && c->node->last_req_key + pingtimeout < mesh->loop.now.tv_sec) { send_req_key(mesh, c->node); } + + keepalive(mesh, c->node, false); } if(c->status.active && c->last_key_renewal + 3600 < mesh->loop.now.tv_sec) { @@ -161,6 +164,7 @@ static void timeout_handler(event_loop_t *loop, void *data) { } terminate_connection(mesh, c, c->status.active); + continue; } } diff --git a/src/net.h b/src/net.h index 9994fac6..8aff86ed 100644 --- a/src/net.h +++ b/src/net.h @@ -107,11 +107,12 @@ bool node_read_from_config(struct meshlink_handle *mesh, struct node_t *, const bool read_ecdsa_public_key(struct meshlink_handle *mesh, struct connection_t *) __attribute__((__warn_unused_result__)); bool read_ecdsa_private_key(struct meshlink_handle *mesh) __attribute__((__warn_unused_result__)); bool node_write_config(struct meshlink_handle *mesh, struct node_t *, bool new_key) __attribute__((__warn_unused_result__)); -void send_mtu_probe(struct meshlink_handle *mesh, struct node_t *); void handle_meta_connection_data(struct meshlink_handle *mesh, struct connection_t *); void retry(struct meshlink_handle *mesh); int check_port(struct meshlink_handle *mesh); void flush_meta(struct meshlink_handle *mesh, struct connection_t *); +void send_udppacket(struct meshlink_handle *mesh, struct node_t *, struct vpn_packet_t *); +void choose_udp_address(struct meshlink_handle *mesh, const struct node_t *n, const union sockaddr_t **sa, int *sock, union sockaddr_t *sa_buf); #ifndef HAVE_MINGW #define closesocket(s) close(s) diff --git a/src/net_packet.c b/src/net_packet.c index acc90c97..0dc931ba 100644 --- a/src/net_packet.c +++ b/src/net_packet.c @@ -27,217 +27,14 @@ #include "meshlink_internal.h" #include "net.h" #include "netutl.h" +#include "pmtu.h" #include "protocol.h" #include "route.h" #include "sptps.h" #include "utils.h" #include "xalloc.h" -int keylifetime = 0; - -static void send_udppacket(meshlink_handle_t *mesh, node_t *, vpn_packet_t *); - -#define MAX_SEQNO 1073741824 -#define PROBE_OVERHEAD (SPTPS_DATAGRAM_OVERHEAD + 40) - -/* mtuprobes == 1..30: initial discovery, send bursts with 1 second interval - mtuprobes == 31: sleep pinginterval seconds - mtuprobes == 32: send 1 burst, sleep pingtimeout second - mtuprobes == 33: no response from other side, restart PMTU discovery process - - Probes are sent in batches of at least three, with random sizes between the - lower and upper boundaries for the MTU thus far discovered. - - After the initial discovery, a fourth packet is added to each batch with a - size larger than the currently known PMTU, to test if the PMTU has increased. - - In case local discovery is enabled, another packet is added to each batch, - which will be broadcast to the local network. - -*/ - -static void send_mtu_probe_handler(event_loop_t *loop, void *data) { - meshlink_handle_t *mesh = loop->data; - node_t *n = data; - int timeout = 1; - - n->mtuprobes++; - - if(!n->status.reachable || !n->status.validkey) { - logger(mesh, MESHLINK_INFO, "Trying to send MTU probe to unreachable or rekeying node %s", n->name); - n->mtuprobes = 0; - return; - } - - if(n->mtuprobes > 32) { - if(!n->minmtu) { - n->mtuprobes = 31; - timeout = mesh->dev_class_traits[n->devclass].pinginterval; - goto end; - } - - logger(mesh, MESHLINK_INFO, "%s did not respond to UDP ping, restarting PMTU discovery", n->name); - n->status.udp_confirmed = false; - n->mtuprobes = 1; - n->minmtu = 0; - n->maxmtu = MTU; - - update_node_pmtu(mesh, n); - } - - if(n->mtuprobes >= 10 && n->mtuprobes < 32 && !n->minmtu) { - logger(mesh, MESHLINK_INFO, "No response to MTU probes from %s", n->name); - n->mtuprobes = 31; - } - - if(n->mtuprobes == 30 || (n->mtuprobes < 30 && n->minmtu >= n->maxmtu)) { - if(n->minmtu > n->maxmtu) { - n->minmtu = n->maxmtu; - update_node_pmtu(mesh, n); - } else { - n->maxmtu = n->minmtu; - } - - n->mtu = n->minmtu; - logger(mesh, MESHLINK_INFO, "Fixing MTU of %s to %d after %d probes", n->name, n->mtu, n->mtuprobes); - n->mtuprobes = 31; - } - - if(n->mtuprobes == 31) { - if(!n->minmtu && n->status.want_udp && n->nexthop && n->nexthop->connection) { - /* Send a dummy ANS_KEY to try to update the reflexive UDP address */ - send_request(mesh, n->nexthop->connection, NULL, "%d %s %s . -1 -1 -1 0", ANS_KEY, mesh->self->name, n->name); - n->status.want_udp = false; - } - - timeout = mesh->dev_class_traits[n->devclass].pinginterval; - goto end; - } else if(n->mtuprobes == 32) { - timeout = mesh->dev_class_traits[n->devclass].pingtimeout; - } - - for(int i = 0; i < 5; i++) { - int len; - - if(i == 0) { - if(n->mtuprobes < 30 || n->maxmtu + 8 >= MTU) { - continue; - } - - len = n->maxmtu + 8; - } else if(n->maxmtu <= n->minmtu) { - len = n->maxmtu; - } else { - len = n->minmtu + 1 + prng(mesh, n->maxmtu - n->minmtu); - } - - if(len < 64) { - len = 64; - } - - vpn_packet_t packet; - packet.probe = true; - memset(packet.data, 0, 14); - randomize(packet.data + 14, len - 14); - packet.len = len; - n->status.broadcast = i >= 4 && n->mtuprobes <= 10 && n->prevedge; - - logger(mesh, MESHLINK_DEBUG, "Sending MTU probe length %d to %s", len, n->name); - - n->out_meta += packet.len + PROBE_OVERHEAD; - send_udppacket(mesh, n, &packet); - } - - n->status.broadcast = false; - -end: - timeout_set(&mesh->loop, &n->mtutimeout, &(struct timespec) { - timeout, prng(mesh, TIMER_FUDGE) - }); -} - -void send_mtu_probe(meshlink_handle_t *mesh, node_t *n) { - timeout_add(&mesh->loop, &n->mtutimeout, send_mtu_probe_handler, n, &(struct timespec) { - 1, 0 - }); - send_mtu_probe_handler(&mesh->loop, n); -} - -static void mtu_probe_h(meshlink_handle_t *mesh, node_t *n, vpn_packet_t *packet, uint16_t len) { - n->in_meta += len + PROBE_OVERHEAD; - - if(len < 64) { - logger(mesh, MESHLINK_WARNING, "Got too short MTU probe length %d from %s", packet->len, n->name); - return; - } - - logger(mesh, MESHLINK_DEBUG, "Got MTU probe length %d from %s", packet->len, n->name); - - if(!packet->data[0]) { - /* It's a probe request, send back a reply */ - - packet->data[0] = 1; - - /* Temporarily set udp_confirmed, so that the reply is sent - back exactly the way it came in. */ - - bool udp_confirmed = n->status.udp_confirmed; - n->status.udp_confirmed = true; - logger(mesh, MESHLINK_DEBUG, "Sending MTU probe reply %d to %s", packet->len, n->name); - n->out_meta += packet->len + PROBE_OVERHEAD; - send_udppacket(mesh, n, packet); - n->status.udp_confirmed = udp_confirmed; - } else { - /* It's a valid reply: now we know bidirectional communication - is possible using the address and socket that the reply - packet used. */ - - if(!n->status.udp_confirmed) { - char *address, *port; - sockaddr2str(&n->address, &address, &port); - - if(n->nexthop && n->nexthop->connection) { - send_request(mesh, n->nexthop->connection, NULL, "%d %s %s . -1 -1 -1 0 %s %s", ANS_KEY, n->name, n->name, address, port); - } else { - logger(mesh, MESHLINK_WARNING, "Cannot send reflexive address to %s via %s", n->name, n->nexthop ? n->nexthop->name : n->name); - } - - free(address); - free(port); - n->status.udp_confirmed = true; - } - - /* If we haven't established the PMTU yet, restart the discovery process. */ - - if(n->mtuprobes > 30) { - if(len == n->maxmtu + 8) { - logger(mesh, MESHLINK_INFO, "Increase in PMTU to %s detected, restarting PMTU discovery", n->name); - n->maxmtu = MTU; - n->mtuprobes = 10; - return; - } - - if(n->minmtu) { - n->mtuprobes = 30; - } else { - n->mtuprobes = 1; - } - } - - /* If applicable, raise the minimum supported MTU */ - - if(len > n->maxmtu) { - len = n->maxmtu; - } - - if(n->minmtu < len) { - n->minmtu = len; - update_node_pmtu(mesh, n); - } - } -} - -/* VPN packet I/O */ +/* Packet I/O */ static void receive_packet(meshlink_handle_t *mesh, node_t *n, vpn_packet_t *packet) { logger(mesh, MESHLINK_DEBUG, "Received packet of %d bytes from %s", packet->len, n->name); @@ -314,7 +111,7 @@ static void send_sptps_packet(meshlink_handle_t *mesh, node_t *n, vpn_packet_t * return; } -static void choose_udp_address(meshlink_handle_t *mesh, const node_t *n, const sockaddr_t **sa, int *sock, sockaddr_t *sa_buf) { +void choose_udp_address(meshlink_handle_t *mesh, const node_t *n, const sockaddr_t **sa, int *sock, sockaddr_t *sa_buf) { /* Latest guess */ *sa = &n->address; *sock = n->sock; @@ -406,7 +203,7 @@ static void choose_broadcast_address(meshlink_handle_t *mesh, const node_t *n, c *sa = broadcast_sa; } -static void send_udppacket(meshlink_handle_t *mesh, node_t *n, vpn_packet_t *origpkt) { +void send_udppacket(meshlink_handle_t *mesh, node_t *n, vpn_packet_t *origpkt) { if(!n->status.reachable) { logger(mesh, MESHLINK_INFO, "Trying to send UDP packet to unreachable node %s", n->name); return; @@ -461,13 +258,13 @@ bool send_sptps_data(void *handle, uint8_t type, const void *data, size_t len) { } if(sendto(mesh->listen_socket[sock].udp.fd, data, len, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) { - if(sockmsgsize(sockerrno)) { - if(to->maxmtu >= len) { - to->maxmtu = len - 1; + if(sockmsgsize(sockerrno) && len > 21) { + if(to->maxmtu >= len - 21) { + to->maxmtu = len - 22; } - if(to->mtu >= len) { - to->mtu = len - 1; + if(to->mtu >= len - 21) { + to->mtu = len - 22; } } else { logger(mesh, MESHLINK_WARNING, "Error sending UDP SPTPS packet to %s: %s", to->name, sockstrerror(sockerrno)); @@ -510,7 +307,7 @@ bool receive_sptps_record(void *handle, uint8_t type, const void *data, uint16_t inpkt.len = len; inpkt.probe = true; memcpy(inpkt.data, data, len); - mtu_probe_h(mesh, from, &inpkt, len); + udp_probe_h(mesh, from, &inpkt, len); return true; } else { inpkt.probe = false; @@ -552,6 +349,7 @@ void send_packet(meshlink_handle_t *mesh, node_t *n, vpn_packet_t *packet) { n->status.want_udp = true; send_sptps_packet(mesh, n, packet); + keepalive(mesh, n, true); return; } diff --git a/src/node.c b/src/node.c index b8caed6c..70c29328 100644 --- a/src/node.c +++ b/src/node.c @@ -76,10 +76,6 @@ void free_node(node_t *n) { ecdsa_free(n->ecdsa); sptps_stop(&n->sptps); - if(n->mtutimeout.cb) { - abort(); - } - free(n->name); free(n->canonical_address); @@ -92,7 +88,7 @@ void node_add(meshlink_handle_t *mesh, node_t *n) { } void node_del(meshlink_handle_t *mesh, node_t *n) { - timeout_del(&mesh->loop, &n->mtutimeout); + timeout_del(&mesh->loop, &n->udp_ping_timeout); for splay_each(edge_t, e, n->edge_tree) { edge_del(mesh, e); diff --git a/src/node.h b/src/node.h index 918f1cce..24f78dcf 100644 --- a/src/node.h +++ b/src/node.h @@ -70,8 +70,11 @@ typedef struct node_t { uint64_t out_meta; /* Bytes sent on meta-connections, heartbeat packets etc. */ // MTU probes - timeout_t mtutimeout; /* Probe event */ + timeout_t udp_ping_timeout; /* UDP probe event */ + struct timespec last_mtu_probe_sent; /* Time that the last MTU probe was sent */ + struct timespec last_udp_probe_sent; /* Time that the last UDP probe was sent */ int mtuprobes; /* Number of probes */ + uint16_t last_mtu_len; /* Size of the last sent probe */ uint16_t mtu; /* Maximum size of packets to send to this node */ uint16_t maxmtu; /* Probed maximum MTU */ diff --git a/src/pmtu.c b/src/pmtu.c new file mode 100644 index 00000000..770f1828 --- /dev/null +++ b/src/pmtu.c @@ -0,0 +1,407 @@ +/* + pmtu.c -- PMTU probing + Copyright (C) 2020 Guus Sliepen + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +*/ + +#include "system.h" + +#include "crypto.h" +#include "logger.h" +#include "net.h" +#include "netutl.h" +#include "node.h" +#include "pmtu.h" +#include "protocol.h" +#include "utils.h" + +/* PMTU probing serves two purposes: + * + * - establishing a working UDP connection between two peers + * - determining the path MTU (PMTU) between two peers + * + * Establishing a working UDP connection requires NAT hole punching and regular + * packets to keep the NAT mappings alive. For this, we can use very small UDP + * packets, and send them rather frequently (once every 10 seconds). This also + * allows us to detect connection loss rather quickly. + * + * For PMTU discovery, we need to send packets of various size, and determine + * which ones are received by the other end. Once the PMTU is established, we + * want to keep monitoring that the discovered PMTU value is still valid. + * However, we assume PMTU changes are unlikely, so they do not have to be done + * very often. + * + * To keep track of how far we are in the PMTU probing process, the variable + * mtuprobes is used. The meaning of its value is: + * + * - mtuprobes == -4: maxmtu no longer valid, reset minmtu and maxmtu and go to 0 + * - mtuprobes ==-2..-3: send one maxmtu probe every second + * - mtuprobes == -1: send one maxmtu and one maxmtu + 1 probe every pinginterval + * - mtuprobes == 0..19: initial discovery, send three packets per second, mtuprobes++ + * - mtuprobes == 20: fix PMTU, and go to -1 + * + * The first probe is always the maximum MTU supported by the interface, + * then a binary search is done until the minimum and maximum converge, + * or until 20 packets have been sent. + * + * After the initial discovery, PMTU probing only sends two packets; one with + * the same size as the discovered PMTU, and one which has a size slightly + * larger than the currently known PMTU, to test if the PMTU has increased. + */ + +static void try_fix_mtu(meshlink_handle_t *mesh, node_t *n) { + if(n->mtuprobes < 0) { + return; + } + + if(n->mtuprobes == 20 || n->minmtu >= n->maxmtu) { + if(n->minmtu > n->maxmtu) { + n->minmtu = n->maxmtu; + } else { + n->maxmtu = n->minmtu; + } + + n->mtu = n->minmtu; + logger(mesh, MESHLINK_INFO, "Fixing PMTU of %s to %d after %d probes", n->name, n->mtu, n->mtuprobes); + n->mtuprobes = -1; + } +} + +static void udp_probe_timeout_handler(event_loop_t *loop, void *data) { + node_t *n = data; + meshlink_handle_t *mesh = loop->data; + + if(!n->status.udp_confirmed) { + return; + } + + logger(mesh, MESHLINK_INFO, "Too much time has elapsed since last UDP ping response from %s, stopping UDP communication", n->name); + n->status.udp_confirmed = false; + n->mtuprobes = 0; + n->minmtu = 0; + n->maxmtu = MTU; +} + +static void send_udp_probe_reply(meshlink_handle_t *mesh, node_t *n, vpn_packet_t *packet, uint16_t len) { + if(!n->status.validkey) { + logger(mesh, MESHLINK_INFO, "Trying to send UDP probe reply to %s but we don't have his key yet", n->name); + return; + } + + packet->data[0] = 1; + + if(packet->data[1]) { + packet->data[1] = 1; + memcpy(packet->data + 2, &len, 2); + len = MIN_PROBE_SIZE; + } + + /* Temporarily set udp_confirmed, so that the reply is sent + back exactly the way it came in. */ + + bool udp_confirmed = n->status.udp_confirmed; + n->status.udp_confirmed = true; + logger(mesh, MESHLINK_DEBUG, "Sending UDP reply length %d to %s", packet->len, n->name); + n->out_meta += packet->len; + send_udppacket(mesh, n, packet); + n->status.udp_confirmed = udp_confirmed; +} + +void udp_probe_h(meshlink_handle_t *mesh, node_t *n, vpn_packet_t *packet, uint16_t len) { + if(len < MIN_PROBE_SIZE) { + logger(mesh, MESHLINK_WARNING, "Got too short PMTU probe length %d from %s", packet->len, n->name); + return; + } + + n->in_meta += packet->len; + + if(!packet->data[0]) { + /* It's a probe request, send back a reply */ + logger(mesh, MESHLINK_DEBUG, "Got PMTU probe length %d from %s", packet->len, n->name); + send_udp_probe_reply(mesh, n, packet, len); + return; + } + + if(packet->data[1]) { + memcpy(&len, packet->data + 2, 2); + } + + logger(mesh, MESHLINK_DEBUG, "Got PMTU reply length %d from %s", len, n->name); + + /* It's a valid reply: now we know bidirectional communication + is possible using the address and socket that the reply + packet used. */ + if(!n->status.udp_confirmed) { + char *address, *port; + sockaddr2str(&n->address, &address, &port); + send_request(mesh, n->nexthop->connection, NULL, "%d %s %s . -1 -1 -1 0 %s %s", ANS_KEY, n->name, n->name, address, port); + + free(address); + free(port); + n->status.udp_confirmed = true; + } + + // Reset the UDP ping timer. + + timeout_del(&mesh->loop, &n->udp_ping_timeout); + timeout_add(&mesh->loop, &n->udp_ping_timeout, &udp_probe_timeout_handler, n, &(struct timespec) { + 30, 0 + }); + + if(len > n->maxmtu) { + logger(mesh, MESHLINK_INFO, "Increase in PMTU to %s detected, restarting PMTU discovery", n->name); + n->minmtu = len; + n->maxmtu = MTU; + /* Set mtuprobes to 1 so that try_pmtu() doesn't reset maxmtu */ + n->mtuprobes = 1; + return; + } else if(n->mtuprobes < 0 && len == n->maxmtu) { + /* We got a maxmtu sized packet, confirming the PMTU is still valid. */ + n->mtuprobes = -1; + n->last_mtu_probe_sent = mesh->loop.now; + } + + /* If applicable, raise the minimum supported PMTU */ + + try_fix_mtu(mesh, n); + + if(n->minmtu < len) { + n->minmtu = len; + update_node_pmtu(mesh, n); + } +} + +static void send_udp_probe_packet(meshlink_handle_t *mesh, node_t *n, int len) { + if(len < MIN_PROBE_SIZE) { + len = MIN_PROBE_SIZE; + } + + vpn_packet_t packet; + memset(packet.data, 0, 4); + packet.probe = true; + packet.data[0] = 0; + packet.data[1] = 1; + packet.data[2] = 0; + packet.data[3] = 0; + + if(len > 4) { + randomize(packet.data + 4, len - 4); + } + + packet.len = len; + + logger(mesh, MESHLINK_DEBUG, "Sending UDP probe length %d to %s", len, n->name); + + n->out_meta += packet.len; + send_udppacket(mesh, n, &packet); +} + +static void try_udp(meshlink_handle_t *mesh, node_t *n) { + /* Probe request */ + + struct timespec elapsed; + timespec_sub(&mesh->loop.now, &n->last_udp_probe_sent, &elapsed); + + int interval = n->status.udp_confirmed ? 10 : 2; + + logger(mesh, MESHLINK_DEBUG, "try_udp(%s) %d %d\n", n->name, (int)elapsed.tv_sec, interval); + + if(elapsed.tv_sec >= interval) { + n->last_udp_probe_sent = mesh->loop.now; + send_udp_probe_packet(mesh, n, MIN_PROBE_SIZE); + + if(!n->status.udp_confirmed && n->prevedge) { + n->status.broadcast = true; + send_udp_probe_packet(mesh, n, MIN_PROBE_SIZE); + n->status.broadcast = false; + } + } +} + +static uint16_t choose_initial_maxmtu(meshlink_handle_t *mesh, node_t *n) { +#ifdef IP_MTU + + int sock = -1; + + sockaddr_t sa_buf; + const sockaddr_t *sa; + int sockindex; + choose_udp_address(mesh, n, &sa, &sockindex, &sa_buf); + + if(!sa) { + return MTU; + } + + sock = socket(sa->sa.sa_family, SOCK_DGRAM, IPPROTO_UDP); + + if(sock < 0) { + logger(mesh, MESHLINK_ERROR, "Creating MTU assessment socket for %s failed: %s", n->name, sockstrerror(sockerrno)); + return MTU; + } + + if(connect(sock, &sa->sa, SALEN(sa->sa))) { + logger(mesh, MESHLINK_ERROR, "Connecting MTU assessment socket for %s failed: %s", n->name, sockstrerror(sockerrno)); + close(sock); + return MTU; + } + + int ip_mtu; + socklen_t ip_mtu_len = sizeof(ip_mtu); + + if(getsockopt(sock, IPPROTO_IP, IP_MTU, &ip_mtu, &ip_mtu_len)) { + logger(mesh, MESHLINK_ERROR, "getsockopt(IP_MTU) on %s failed: %s", n->name, sockstrerror(sockerrno)); + close(sock); + return MTU; + } + + close(sock); + + /* Calculate the maximum SPTPS payload based on the interface MTU */ + uint16_t mtu = ip_mtu; + mtu -= (sa->sa.sa_family == AF_INET6) ? 40 : 20; /* IPv6 or IPv4 */ + mtu -= 8; /* UDP */ + mtu -= 21; /* SPTPS */ + + if(mtu < 512) { + logger(mesh, MESHLINK_ERROR, "getsockopt(IP_MTU) on %s returned absurdly small value: %d", n->name, ip_mtu); + return MTU; + } + + if(mtu > MTU) { + return MTU; + } + + logger(mesh, MESHLINK_INFO, "Using system-provided maximum MTU for %s: %hd", n->name, mtu); + return mtu; + +#else + (void)n; + return MTU; +#endif +} + +/* This function tries to determines the PMTU of a node. + By calling this function repeatedly, n->minmtu will be progressively + increased, and at some point, n->mtu will be fixed to n->minmtu. If the PMTU + is already fixed, this function checks if it can be increased. +*/ + +static void try_pmtu(meshlink_handle_t *mesh, node_t *n) { + logger(mesh, MESHLINK_DEBUG, "try_pmtu(%s) %d %d\n", n->name, n->mtuprobes, n->status.udp_confirmed); + + if(!n->status.udp_confirmed) { + n->mtuprobes = 0; + n->minmtu = 0; + n->maxmtu = MTU; + return; + } + + struct timespec elapsed; + + timespec_sub(&mesh->loop.now, &n->last_mtu_probe_sent, &elapsed); + + if(n->mtuprobes >= 0) { + /* Fast probing, send three packets per second */ + if(n->mtuprobes != 0 && elapsed.tv_sec == 0 && elapsed.tv_nsec < 333333333) { + return; + } + } else { + if(n->mtuprobes < -1) { + /* We didn't get an answer to the last probe, try again once every second */ + if(elapsed.tv_sec < 1) { + return; + } + } else { + /* Slow probing, send one packet every pinginterval */ + int pinginterval = mesh->dev_class_traits[n->devclass].pinginterval; + + if(elapsed.tv_sec < pinginterval) { + return; + } + } + } + + n->last_mtu_probe_sent = mesh->loop.now; + + try_fix_mtu(mesh, n); + + if(n->mtuprobes < -3) { + /* We lost three PMTU probes, restart discovery */ + logger(mesh, MESHLINK_INFO, "Decrease in PMTU to %s detected, restarting PMTU discovery", n->name); + n->mtuprobes = 0; + n->minmtu = 0; + } + + if(n->mtuprobes < 0) { + /* After the initial discovery, we only send one maxmtu and one + maxmtu + 1 probe to detect PMTU increases. */ + send_udp_probe_packet(mesh, n, n->maxmtu); + + if(n->mtuprobes == -1 && n->maxmtu + 1 < MTU) { + send_udp_probe_packet(mesh, n, n->maxmtu + 1); + } + + n->mtuprobes--; + } else { + /* Starting parameters. */ + uint16_t len; + + if(n->mtuprobes == 0) { + /* First packet is always the maximum MTU size */ + n->maxmtu = choose_initial_maxmtu(mesh, n); + len = n->maxmtu; + } else { + if(n->last_mtu_len == n->minmtu) { + /* The previous probe was succesful, increase the size */ + len = n->minmtu + (n->maxmtu - n->minmtu + 1) / 2; + } else { + /* The previous probe was unsuccesful, decrease the size */ + len = n->minmtu + (n->last_mtu_len - n->minmtu) / 2; + } + } + + n->last_mtu_len = len; + send_udp_probe_packet(mesh, n, len); + n->mtuprobes++; + } +} + +/* Keep the connection to the given node alive. + * Ensure we have a valid key, and check whether UDP is working. + */ + +void keepalive(meshlink_handle_t *mesh, node_t *n, bool traffic) { + logger(mesh, MESHLINK_DEBUG, "keepalive(%s) %d %d\n", n->name, n->status.reachable, n->status.validkey); + + if(!n->status.reachable || !n->status.validkey) { + return; + } + + try_udp(mesh, n); + + if(traffic) { + try_pmtu(mesh, n); + } + + /* If we want to send traffic but we don't have a working UDP + * connection, we are going to forward the traffic to the nexthop, so + * try to keep that one alive as well. */ + + if(traffic && !n->status.udp_confirmed && n != n->nexthop) { + keepalive(mesh, n->nexthop, traffic); + } +} + diff --git a/src/pmtu.h b/src/pmtu.h new file mode 100644 index 00000000..3e4c5ac6 --- /dev/null +++ b/src/pmtu.h @@ -0,0 +1,28 @@ +#ifndef MESHLINK_PMTU_H +#define MESHLINK_PMTU_H + +/* + pmtu.h -- header for pmtu.c + Copyright (C) 2020 Guus Sliepen + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +*/ + +#define MIN_PROBE_SIZE 4 + +extern void keepalive(struct meshlink_handle *mesh, struct node_t *n, bool traffic); +extern void udp_probe_h(struct meshlink_handle *mesh, struct node_t *n, struct vpn_packet_t *packet, uint16_t len); + +#endif diff --git a/src/protocol_key.c b/src/protocol_key.c index 745845a6..be81ec4a 100644 --- a/src/protocol_key.c +++ b/src/protocol_key.c @@ -26,6 +26,7 @@ #include "netutl.h" #include "node.h" #include "prf.h" +#include "pmtu.h" #include "protocol.h" #include "sptps.h" #include "utils.h" @@ -496,7 +497,7 @@ bool ans_key_h(meshlink_handle_t *mesh, connection_t *c, const char *request) { update_node_udp(mesh, from, &sa); } - send_mtu_probe(mesh, from); + keepalive(mesh, from, true); } return true; diff --git a/test/channels-udp.c b/test/channels-udp.c index 6b71ebdd..be1ced64 100644 --- a/test/channels-udp.c +++ b/test/channels-udp.c @@ -169,13 +169,17 @@ int main(void) { fprintf(stderr, "%s received %zu\n", clients[i].mesh->name, clients[i].received); } + bool got_large_packet = false; + for(int i = 0; i < 3; i++) { size_t max_received = SMALL_SIZE * SMALL_COUNT + LARGE_SIZE; assert(clients[i].received >= max_received / 2); assert(clients[i].received <= max_received); - assert(clients[i].got_large_packet); + got_large_packet |= clients[i].got_large_packet; } + assert(got_large_packet); + // Clean up. for(int i = 0; i < 3; i++) {