2 net.c -- most of the network code
3 Copyright (C) 2014-2017 Guus Sliepen <guus@meshlink.io>
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License along
16 with this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 #include "connection.h"
28 #include "meshlink_internal.h"
40 static inline int min(int a, int b) {
45 static const int default_timeout = 5;
46 static const int default_interval = 60;
49 Terminate a connection:
51 - Remove the edge representing this connection
53 - Check if we need to retry making an outgoing connection
55 void terminate_connection(meshlink_handle_t *mesh, connection_t *c, bool report) {
56 if(c->status.active) {
57 logger(mesh, MESHLINK_INFO, "Closing connection with %s", c->name);
60 if(c->node && c->node->connection == c) {
61 if(c->status.active && mesh->meta_status_cb) {
62 mesh->meta_status_cb(mesh, (meshlink_node_t *)c->node, false);
65 c->node->connection = NULL;
68 c->status.active = false;
72 send_del_edge(mesh, mesh->everyone, c->edge, 0);
75 edge_del(mesh, c->edge);
78 /* Run MST and SSSP algorithms */
82 /* If the node is not reachable anymore but we remember it had an edge to us, clean it up */
84 if(report && c->node && !c->node->status.reachable) {
86 e = lookup_edge(c->node, mesh->self);
89 send_del_edge(mesh, mesh->everyone, e, 0);
95 outgoing_t *outgoing = c->outgoing;
96 connection_del(mesh, c);
98 /* Check if this was our outgoing connection */
101 do_outgoing_connection(mesh, outgoing);
106 Check if the other end is active.
107 If we have sent packets, but didn't receive any,
108 then possibly the other end is dead. We send a
109 PING request over the meta connection. If the other
110 end does not reply in time, we consider them dead
111 and close the connection.
113 static void timeout_handler(event_loop_t *loop, void *data) {
116 meshlink_handle_t *mesh = loop->data;
117 logger(mesh, MESHLINK_DEBUG, "timeout_handler()");
119 for list_each(connection_t, c, mesh->connections) {
120 int pingtimeout = c->node ? mesh->dev_class_traits[c->node->devclass].pingtimeout : default_timeout;
121 int pinginterval = c->node ? mesh->dev_class_traits[c->node->devclass].pinginterval : default_interval;
123 if(c->outgoing && !c->status.active && c->outgoing->timeout < 5) {
127 // Also make sure that if outstanding key requests for the UDP counterpart of a connection has timed out, we restart it.
129 if(c->node->status.waitingforkey && c->node->last_req_key + pingtimeout < mesh->loop.now.tv_sec) {
130 send_req_key(mesh, c->node);
133 keepalive(mesh, c->node, false);
136 if(c->status.active && c->last_key_renewal + 3600 < mesh->loop.now.tv_sec) {
137 devtool_sptps_renewal_probe((meshlink_node_t *)c->node);
139 if(!sptps_force_kex(&c->sptps)) {
140 logger(mesh, MESHLINK_ERROR, "SPTPS key renewal for connection with %s failed", c->name);
141 terminate_connection(mesh, c, true);
144 c->last_key_renewal = mesh->loop.now.tv_sec;
148 if(c->last_ping_time + pingtimeout < mesh->loop.now.tv_sec) {
149 if(c->status.active) {
150 if(c->status.pinged) {
151 logger(mesh, MESHLINK_INFO, "%s didn't respond to PING in %ld seconds", c->name, (long)mesh->loop.now.tv_sec - c->last_ping_time);
152 } else if(c->last_ping_time + pinginterval <= mesh->loop.now.tv_sec) {
159 if(c->status.connecting) {
160 logger(mesh, MESHLINK_WARNING, "Timeout while connecting to %s", c->name);
162 logger(mesh, MESHLINK_WARNING, "Timeout from %s during authentication", c->name);
166 terminate_connection(mesh, c, c->status.active);
171 timeout_set(&mesh->loop, data, &(struct timespec) {
172 1, prng(mesh, TIMER_FUDGE)
176 // devclass asc, last_successfull_connection desc
177 static int node_compare_devclass_asc_lsc_desc(const void *a, const void *b) {
178 const node_t *na = a, *nb = b;
180 if(na->devclass < nb->devclass) {
184 if(na->devclass > nb->devclass) {
188 if(na->last_successfull_connection == nb->last_successfull_connection) {
192 if(na->last_successfull_connection == 0 || na->last_successfull_connection > nb->last_successfull_connection) {
196 if(nb->last_successfull_connection == 0 || na->last_successfull_connection < nb->last_successfull_connection) {
211 // last_successfull_connection desc
212 static int node_compare_lsc_desc(const void *a, const void *b) {
213 const node_t *na = a, *nb = b;
215 if(na->last_successfull_connection == nb->last_successfull_connection) {
219 if(na->last_successfull_connection == 0 || na->last_successfull_connection > nb->last_successfull_connection) {
223 if(nb->last_successfull_connection == 0 || na->last_successfull_connection < nb->last_successfull_connection) {
239 static int node_compare_devclass_desc(const void *a, const void *b) {
240 const node_t *na = a, *nb = b;
242 if(na->devclass < nb->devclass) {
246 if(na->devclass > nb->devclass) {
268 // find the best one for initial connect
273 where dclass <= my.dclass and !connection and (timestamp - last_retry) > retry_timeout
274 order by dclass asc, last_connection desc
280 // find better nodes to connect to: in case we have less than min connections within [BACKBONE, i] and there are nodes which we are not connected to within the range
284 for i = BACKBONE to my.dclass
285 j += count(from connections where node.dclass = i)
289 where dclass = i and !connection and (timestamp - last_retry) > retry_timeout
290 order by last_connection desc
302 where dclass <= my.dclass and !reachable and (timestamp - last_retry) > retry_timeout
303 order by dclass asc, last_connection desc
315 // disconnect outgoing connections in case we have more than min connections within [BACKBONE, i] and there are nodes which we are connected to within the range [i, PORTABLE]
319 for i = BACKBONE to my.dclass
320 j += count(from connections where node.dclass = i)
324 where dclass >= i and outgoing_connection
332 // disconnect connections in case we have more than enough connections
337 where outgoing_connection
349 next (timeout, autoconnect)
356 static void periodic_handler(event_loop_t *loop, void *data) {
357 meshlink_handle_t *mesh = loop->data;
359 /* Check if there are too many contradicting ADD_EDGE and DEL_EDGE messages.
360 This usually only happens when another node has the same Name as this node.
361 If so, sleep for a short while to prevent a storm of contradicting messages.
364 if(mesh->contradicting_del_edge > 100 && mesh->contradicting_add_edge > 100) {
365 logger(mesh, MESHLINK_WARNING, "Possible node with same Name as us! Sleeping %d seconds.", mesh->sleeptime);
366 struct timespec ts = {mesh->sleeptime, 0};
367 nanosleep(&ts, NULL);
368 mesh->sleeptime *= 2;
370 if(mesh->sleeptime < 0) {
371 mesh->sleeptime = 3600;
374 mesh->sleeptime /= 2;
376 if(mesh->sleeptime < 10) {
377 mesh->sleeptime = 10;
381 mesh->contradicting_add_edge = 0;
382 mesh->contradicting_del_edge = 0;
384 int timeout = default_timeout;
386 /* Check if we need to make or break connections. */
388 if(mesh->nodes->count > 1) {
390 logger(mesh, MESHLINK_DEBUG, "--- autoconnect begin ---");
392 int retry_timeout = min(mesh->nodes->count * default_timeout, 60);
394 logger(mesh, MESHLINK_DEBUG, "* devclass = %d", mesh->devclass);
395 logger(mesh, MESHLINK_DEBUG, "* nodes = %d", mesh->nodes->count);
396 logger(mesh, MESHLINK_DEBUG, "* retry_timeout = %d", retry_timeout);
399 // connect disconnect nodes
401 node_t *connect_to = NULL;
402 node_t *disconnect_from = NULL;
407 unsigned int cur_connects = 0;
409 for list_each(connection_t, c, mesh->connections) {
410 if(c->status.active) {
415 logger(mesh, MESHLINK_DEBUG, "* cur_connects = %d", cur_connects);
416 logger(mesh, MESHLINK_DEBUG, "* outgoings = %d", mesh->outgoings->count);
418 // get min_connects and max_connects
420 unsigned int min_connects = mesh->dev_class_traits[mesh->devclass].min_connects;
421 unsigned int max_connects = mesh->dev_class_traits[mesh->devclass].max_connects;
423 logger(mesh, MESHLINK_DEBUG, "* min_connects = %d", min_connects);
424 logger(mesh, MESHLINK_DEBUG, "* max_connects = %d", max_connects);
426 // find the best one for initial connect
428 if(cur_connects < min_connects) {
429 splay_tree_t *nodes = splay_alloc_tree(node_compare_devclass_asc_lsc_desc, NULL);
431 for splay_each(node_t, n, mesh->nodes) {
432 logger(mesh, MESHLINK_DEBUG, "* %s->devclass = %d", n->name, n->devclass);
434 if(n != mesh->self && n->devclass <= mesh->devclass && !n->connection && !n->status.blacklisted && (n->last_connect_try == 0 || (mesh->loop.now.tv_sec - n->last_connect_try) > retry_timeout)) {
435 splay_insert(nodes, n);
441 connect_to = (node_t *)nodes->head->data;
443 logger(mesh, MESHLINK_DEBUG, "* found best one for initial connect: %s", connect_to->name);
445 logger(mesh, MESHLINK_DEBUG, "* could not find node for initial connect");
448 splay_delete_tree(nodes);
452 // find better nodes to connect to
454 if(!connect_to && min_connects <= cur_connects && cur_connects < max_connects) {
455 unsigned int connects = 0;
457 for(dev_class_t devclass = 0; devclass <= mesh->devclass; ++devclass) {
458 for list_each(connection_t, c, mesh->connections) {
459 if(c->status.active && c->node && c->node->devclass == devclass) {
464 if(connects < min_connects) {
465 splay_tree_t *nodes = splay_alloc_tree(node_compare_lsc_desc, NULL);
467 for splay_each(node_t, n, mesh->nodes) {
468 if(n != mesh->self && n->devclass == devclass && !n->connection && !n->status.blacklisted && (n->last_connect_try == 0 || (mesh->loop.now.tv_sec - n->last_connect_try) > retry_timeout)) {
469 splay_insert(nodes, n);
474 logger(mesh, MESHLINK_DEBUG, "* found better node");
475 connect_to = (node_t *)nodes->head->data;
477 splay_delete_tree(nodes);
481 splay_delete_tree(nodes);
488 logger(mesh, MESHLINK_DEBUG, "* could not find better nodes");
495 if(!connect_to && min_connects <= cur_connects && cur_connects < max_connects) {
496 splay_tree_t *nodes = splay_alloc_tree(node_compare_devclass_asc_lsc_desc, NULL);
498 for splay_each(node_t, n, mesh->nodes) {
499 if(n != mesh->self && n->devclass <= mesh->devclass && !n->status.reachable && !n->status.blacklisted && (n->last_connect_try == 0 || (mesh->loop.now.tv_sec - n->last_connect_try) > retry_timeout)) {
500 splay_insert(nodes, n);
505 logger(mesh, MESHLINK_DEBUG, "* try to heal partition");
506 connect_to = (node_t *)nodes->head->data;
508 logger(mesh, MESHLINK_DEBUG, "* could not find nodes for partition healing");
511 splay_delete_tree(nodes);
517 if(connect_to && !connect_to->connection) {
518 connect_to->last_connect_try = mesh->loop.now.tv_sec;
519 logger(mesh, MESHLINK_DEBUG, "Autoconnect trying to connect to %s", connect_to->name);
521 /* check if there is already a connection attempt to this node */
524 for list_each(outgoing_t, outgoing, mesh->outgoings) {
525 if(outgoing->node == connect_to) {
526 logger(mesh, MESHLINK_DEBUG, "* skip autoconnect since it is an outgoing connection already");
532 if(!connect_to->status.reachable && !node_read_public_key(mesh, connect_to)) {
533 logger(mesh, MESHLINK_DEBUG, "* skip autoconnect since we don't know this node's public key");
538 logger(mesh, MESHLINK_DEBUG, "Autoconnecting to %s", connect_to->name);
539 outgoing_t *outgoing = xzalloc(sizeof(outgoing_t));
540 outgoing->node = connect_to;
541 list_insert_tail(mesh->outgoings, outgoing);
542 setup_outgoing_connection(mesh, outgoing);
547 // disconnect suboptimal outgoing connections
549 if(min_connects < cur_connects /*&& cur_connects <= max_connects*/) {
550 unsigned int connects = 0;
552 for(dev_class_t devclass = 0; devclass <= mesh->devclass; ++devclass) {
553 for list_each(connection_t, c, mesh->connections) {
554 if(c->status.active && c->node && c->node->devclass == devclass) {
559 if(min_connects < connects) {
560 splay_tree_t *nodes = splay_alloc_tree(node_compare_devclass_desc, NULL);
562 for list_each(connection_t, c, mesh->connections) {
563 if(c->outgoing && c->node && c->node->devclass >= devclass) {
564 splay_insert(nodes, c->node);
569 logger(mesh, MESHLINK_DEBUG, "* disconnect suboptimal outgoing connection");
570 disconnect_from = (node_t *)nodes->head->data;
573 splay_delete_tree(nodes);
578 if(!disconnect_from) {
579 logger(mesh, MESHLINK_DEBUG, "* no suboptimal outgoing connections");
584 // disconnect connections (too many connections)
586 if(!disconnect_from && max_connects < cur_connects) {
587 splay_tree_t *nodes = splay_alloc_tree(node_compare_devclass_desc, NULL);
589 for list_each(connection_t, c, mesh->connections) {
590 if(c->status.active && c->node) {
591 splay_insert(nodes, c->node);
596 logger(mesh, MESHLINK_DEBUG, "* disconnect connection (too many connections)");
599 disconnect_from = (node_t *)nodes->head->data;
601 logger(mesh, MESHLINK_DEBUG, "* no node we want to disconnect, even though we have too many connections");
604 splay_delete_tree(nodes);
608 // perform disconnect
610 if(disconnect_from && disconnect_from->connection) {
611 logger(mesh, MESHLINK_DEBUG, "Autodisconnecting from %s", disconnect_from->connection->name);
612 list_delete(mesh->outgoings, disconnect_from->connection->outgoing);
613 disconnect_from->connection->outgoing = NULL;
614 terminate_connection(mesh, disconnect_from->connection, disconnect_from->connection->status.active);
617 // reduce timeout if we don't have enough connections + outgoings
618 if(cur_connects + mesh->outgoings->count < 3) {
624 logger(mesh, MESHLINK_DEBUG, "--- autoconnect end ---");
627 for splay_each(node_t, n, mesh->nodes) {
628 if(n->status.dirty) {
629 if(!node_write_config(mesh, n, false)) {
630 logger(mesh, MESHLINK_DEBUG, "Could not update %s", n->name);
634 if(n->status.reachable && n->status.validkey && n->last_req_key + 3600 < mesh->loop.now.tv_sec) {
635 logger(mesh, MESHLINK_DEBUG, "SPTPS key renewal for node %s", n->name);
636 devtool_sptps_renewal_probe((meshlink_node_t *)n);
638 if(!sptps_force_kex(&n->sptps)) {
639 logger(mesh, MESHLINK_ERROR, "SPTPS key renewal for node %s failed", n->name);
640 n->status.validkey = false;
641 sptps_stop(&n->sptps);
642 n->status.waitingforkey = false;
643 n->last_req_key = -3600;
645 n->last_req_key = mesh->loop.now.tv_sec;
650 timeout_set(&mesh->loop, data, &(struct timespec) {
651 timeout, prng(mesh, TIMER_FUDGE)
655 void handle_meta_connection_data(meshlink_handle_t *mesh, connection_t *c) {
656 if(!receive_meta(mesh, c)) {
657 terminate_connection(mesh, c, c->status.active);
662 void retry(meshlink_handle_t *mesh) {
663 /* Reset the reconnection timers for all outgoing connections */
664 for list_each(outgoing_t, outgoing, mesh->outgoings) {
665 outgoing->timeout = 0;
667 if(outgoing->ev.cb) {
668 timeout_set(&mesh->loop, &outgoing->ev, &(struct timespec) {
674 /* For active connections, check if their addresses are still valid.
675 * If yes, reset their ping timers, otherwise terminate them. */
676 for list_each(connection_t, c, mesh->connections) {
677 if(!c->status.active) {
681 if(!c->status.pinged) {
682 c->last_ping_time = -3600;
686 socklen_t salen = sizeof(sa);
688 if(getsockname(c->socket, &sa.sa, &salen)) {
692 switch(sa.sa.sa_family) {
698 sa.in6.sin6_port = 0;
705 int sock = socket(sa.sa.sa_family, SOCK_STREAM, IPPROTO_TCP);
711 if(bind(sock, &sa.sa, salen) && errno == EADDRNOTAVAIL) {
712 logger(mesh, MESHLINK_DEBUG, "Local address for connection to %s no longer valid, terminating", c->name);
713 terminate_connection(mesh, c, c->status.active);
719 /* Kick the ping timeout handler */
720 if(mesh->pingtimer.cb) {
721 timeout_set(&mesh->loop, &mesh->pingtimer, &(struct timespec) {
728 this is where it all happens...
730 void main_loop(meshlink_handle_t *mesh) {
731 timeout_add(&mesh->loop, &mesh->pingtimer, timeout_handler, &mesh->pingtimer, &(struct timespec) {
732 1, prng(mesh, TIMER_FUDGE)
734 timeout_add(&mesh->loop, &mesh->periodictimer, periodic_handler, &mesh->periodictimer, &(struct timespec) {
739 mesh->datafromapp.signum = 0;
740 signal_add(&mesh->loop, &mesh->datafromapp, meshlink_send_from_queue, mesh, mesh->datafromapp.signum);
742 if(!event_loop_run(&mesh->loop, mesh)) {
743 logger(mesh, MESHLINK_ERROR, "Error while waiting for input: %s", strerror(errno));
744 call_error_cb(mesh, MESHLINK_ENETWORK);
747 signal_del(&mesh->loop, &mesh->datafromapp);
748 timeout_del(&mesh->loop, &mesh->periodictimer);
749 timeout_del(&mesh->loop, &mesh->pingtimer);