2 net.c -- most of the network code
3 Copyright (C) 2014 Guus Sliepen <guus@meshlink.io>
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License along
16 with this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 #include "connection.h"
27 #include "meshlink_internal.h"
36 static const int min(int a, int b) {
41 Terminate a connection:
43 - Remove the edge representing this connection
45 - Check if we need to retry making an outgoing connection
47 void terminate_connection(meshlink_handle_t *mesh, connection_t *c, bool report) {
48 logger(mesh, MESHLINK_INFO, "Closing connection with %s (%s)", c->name, c->hostname);
50 c->status.active = false;
52 if(c->node && c->node->connection == c)
53 c->node->connection = NULL;
57 send_del_edge(mesh, mesh->everyone, c->edge);
59 edge_del(mesh, c->edge);
62 /* Run MST and SSSP algorithms */
66 /* If the node is not reachable anymore but we remember it had an edge to us, clean it up */
68 if(report && !c->node->status.reachable) {
70 e = lookup_edge(c->node, mesh->self);
72 send_del_edge(mesh, mesh->everyone, e);
78 outgoing_t *outgoing = c->outgoing;
79 connection_del(mesh, c);
81 /* Check if this was our outgoing connection */
84 do_outgoing_connection(mesh, outgoing);
87 /* Clean up dead proxy processes */
89 while(waitpid(-1, NULL, WNOHANG) > 0);
94 Check if the other end is active.
95 If we have sent packets, but didn't receive any,
96 then possibly the other end is dead. We send a
97 PING request over the meta connection. If the other
98 end does not reply in time, we consider them dead
99 and close the connection.
101 static void timeout_handler(event_loop_t *loop, void *data) {
102 meshlink_handle_t *mesh = loop->data;
103 logger(mesh, MESHLINK_DEBUG, "timeout_handler()");
105 for list_each(connection_t, c, mesh->connections) {
106 // Also make sure that if outstanding key requests for the UDP counterpart of a connection has timed out, we restart it.
108 if(c->node->status.waitingforkey && c->node->last_req_key + mesh->pingtimeout <= mesh->loop.now.tv_sec)
109 send_req_key(mesh, c->node);
111 if(c->last_ping_time + mesh->pingtimeout <= mesh->loop.now.tv_sec) {
112 if(c->status.active) {
113 if(c->status.pinged) {
114 logger(mesh, MESHLINK_INFO, "%s (%s) didn't respond to PING in %ld seconds", c->name, c->hostname, (long)mesh->loop.now.tv_sec - c->last_ping_time);
115 } else if(c->last_ping_time + mesh->pinginterval <= mesh->loop.now.tv_sec) {
122 if(c->status.connecting)
123 logger(mesh, MESHLINK_WARNING, "Timeout while connecting to %s (%s)", c->name, c->hostname);
125 logger(mesh, MESHLINK_WARNING, "Timeout from %s (%s) during authentication", c->name, c->hostname);
127 terminate_connection(mesh, c, c->status.active);
131 timeout_set(&mesh->loop, data, &(struct timeval){mesh->pingtimeout, rand() % 100000});
134 // devclass asc, last_successfull_connection desc
135 static int node_compare_devclass_asc_lsc_desc(const void *a, const void *b)
137 const node_t *na = a, *nb = b;
139 if(na->devclass < nb->devclass)
142 if(na->devclass > nb->devclass)
145 if(na->last_successfull_connection == nb->last_successfull_connection)
148 if(na->last_successfull_connection == 0 || na->last_successfull_connection > nb->last_successfull_connection)
151 if(nb->last_successfull_connection == 0 || na->last_successfull_connection < nb->last_successfull_connection)
163 // last_successfull_connection desc
164 static int node_compare_lsc_desc(const void *a, const void *b)
166 const node_t *na = a, *nb = b;
168 if(na->last_successfull_connection == nb->last_successfull_connection)
171 if(na->last_successfull_connection == 0 || na->last_successfull_connection > nb->last_successfull_connection)
174 if(nb->last_successfull_connection == 0 || na->last_successfull_connection < nb->last_successfull_connection)
187 static int node_compare_devclass_desc(const void *a, const void *b)
189 const node_t *na = a, *nb = b;
191 if(na->devclass < nb->devclass)
194 if(na->devclass > nb->devclass)
213 // find the best one for initial connect
218 where dclass <= my.dclass and !connection and (timestamp - last_retry) > retry_timeout
219 order by dclass asc, last_connection desc
225 // find better nodes to connect to: in case we have less than min connections within [BACKBONE, i] and there are nodes which we are not connected to within the range
229 for i = BACKBONE to my.dclass
230 j += count(from connections where node.dclass = i)
234 where dclass = i and !connection and (timestamp - last_retry) > retry_timeout
235 order by last_connection desc
247 where dclass <= my.dclass and !reachable and (timestamp - last_retry) > retry_timeout
248 order by dclass asc, last_connection desc
260 // disconnect outgoing connections in case we have more than min connections within [BACKBONE, i] and there are nodes which we are connected to within the range [i, PORTABLE]
264 for i = BACKBONE to my.dclass
265 j += count(from connections where node.dclass = i)
269 where dclass >= i and outgoing_connection
277 // disconnect connections in case we have more than enough connections
282 where outgoing_connection
294 next (timeout, autoconnect)
301 static void periodic_handler(event_loop_t *loop, void *data) {
302 meshlink_handle_t *mesh = loop->data;
304 /* Check if there are too many contradicting ADD_EDGE and DEL_EDGE messages.
305 This usually only happens when another node has the same Name as this node.
306 If so, sleep for a short while to prevent a storm of contradicting messages.
309 if(mesh->contradicting_del_edge > 100 && mesh->contradicting_add_edge > 100) {
310 logger(mesh, MESHLINK_WARNING, "Possible node with same Name as us! Sleeping %d seconds.", mesh->sleeptime);
311 usleep(mesh->sleeptime * 1000000LL);
312 mesh->sleeptime *= 2;
313 if(mesh->sleeptime < 0)
314 mesh->sleeptime = 3600;
316 mesh->sleeptime /= 2;
317 if(mesh->sleeptime < 10)
318 mesh->sleeptime = 10;
321 mesh->contradicting_add_edge = 0;
322 mesh->contradicting_del_edge = 0;
326 /* Check if we need to make or break connections. */
328 if(mesh->nodes->count > 1) {
330 logger(mesh, MESHLINK_INFO, "--- autoconnect begin ---");
332 int retry_timeout = min(mesh->nodes->count * 5, 60);
334 logger(mesh, MESHLINK_INFO, "* devclass = %d", mesh->devclass);
335 logger(mesh, MESHLINK_INFO, "* nodes = %d", mesh->nodes->count);
336 logger(mesh, MESHLINK_INFO, "* retry_timeout = %d", retry_timeout);
339 // connect disconnect nodes
341 node_t* connect_to = NULL;
342 node_t* disconnect_from = NULL;
347 int cur_connects = 0;
349 for list_each(connection_t, c, mesh->connections)
351 if(!c->status.remove_unused)
357 logger(mesh, MESHLINK_INFO, "* cur_connects = %d", cur_connects);
358 logger(mesh, MESHLINK_INFO, "* outgoings = %d", mesh->outgoings->count);
360 // get min_connects and max_connects
362 assert(mesh->devclass >= 0 && mesh->devclass <= _DEV_CLASS_MAX);
364 int min_connects = dev_class_traits[mesh->devclass].min_connects;
365 int max_connects = dev_class_traits[mesh->devclass].max_connects;
367 logger(mesh, MESHLINK_INFO, "* min_connects = %d", min_connects);
368 logger(mesh, MESHLINK_INFO, "* max_connects = %d", max_connects);
371 // find the best one for initial connect
373 if(cur_connects < min_connects)
375 splay_tree_t *nodes = splay_alloc_tree(node_compare_devclass_asc_lsc_desc, NULL);
377 for splay_each(node_t, n, mesh->nodes)
379 logger(mesh, MESHLINK_INFO, "* n->devclass = %d", n->devclass);
380 if(n != mesh->self && n->devclass <= mesh->devclass && !n->connection && (n->last_connect_try == 0 || (time(NULL) - n->last_connect_try) > retry_timeout))
381 { splay_insert(nodes, n); }
386 logger(mesh, MESHLINK_INFO, "* found best one for initial connect");
389 connect_to = (node_t*)nodes->head->data;
392 { logger(mesh, MESHLINK_INFO, "* could not find node for initial connect"); }
394 splay_free_tree(nodes);
398 // find better nodes to connect to
400 if(!connect_to && min_connects <= cur_connects && cur_connects < max_connects)
402 unsigned int connects = 0;
404 for(int devclass = 0; devclass <= mesh->devclass; ++devclass)
406 for list_each(connection_t, c, mesh->connections)
408 if(!c->status.remove_unused && c->node && c->node->devclass == devclass)
412 if( connects < min_connects )
414 splay_tree_t *nodes = splay_alloc_tree(node_compare_lsc_desc, NULL);
416 for splay_each(node_t, n, mesh->nodes)
418 if(n != mesh->self && n->devclass == devclass && !n->connection && (n->last_connect_try == 0 || (time(NULL) - n->last_connect_try) > retry_timeout))
419 { splay_insert(nodes, n); }
424 logger(mesh, MESHLINK_INFO, "* found better node");
425 connect_to = (node_t*)nodes->head->data;
427 splay_free_tree(nodes);
431 splay_free_tree(nodes);
438 { logger(mesh, MESHLINK_INFO, "* could not find better nodes"); }
444 if(!connect_to && min_connects <= cur_connects && cur_connects < max_connects)
446 splay_tree_t *nodes = splay_alloc_tree(node_compare_devclass_asc_lsc_desc, NULL);
448 for splay_each(node_t, n, mesh->nodes)
450 if(n != mesh->self && n->devclass <= mesh->devclass && !n->status.reachable && (n->last_connect_try == 0 || (time(NULL) - n->last_connect_try) > retry_timeout))
451 { splay_insert(nodes, n); }
456 logger(mesh, MESHLINK_INFO, "* try to heal partition");
457 connect_to = (node_t*)nodes->head->data;
460 { logger(mesh, MESHLINK_INFO, "* could not find nodes for partition healing"); }
462 splay_free_tree(nodes);
468 if(connect_to && !connect_to->connection)
470 connect_to->last_connect_try = time(NULL);
472 /* check if there is already a connection attempt to this node */
474 for list_each(outgoing_t, outgoing, mesh->outgoings)
476 if(!strcmp(outgoing->name, connect_to->name))
485 logger(mesh, MESHLINK_INFO, "Autoconnecting to %s", connect_to->name);
486 outgoing_t *outgoing = xzalloc(sizeof(outgoing_t));
487 outgoing->mesh = mesh;
488 outgoing->name = xstrdup(connect_to->name);
489 list_insert_tail(mesh->outgoings, outgoing);
490 setup_outgoing_connection(mesh, outgoing);
493 { logger(mesh, MESHLINK_INFO, "* skip autoconnect since it is an outgoing connection already"); }
497 // disconnect suboptimal outgoing connections
499 if(min_connects < cur_connects && cur_connects <= max_connects)
501 unsigned int connects = 0;
503 for(int devclass = 0; devclass <= mesh->devclass; ++devclass)
505 for list_each(connection_t, c, mesh->connections)
507 if(!c->status.remove_unused && c->node && c->node->devclass == devclass)
511 if( min_connects < connects )
513 splay_tree_t *nodes = splay_alloc_tree(node_compare_devclass_desc, NULL);
515 for list_each(connection_t, c, mesh->connections)
517 if(!c->status.remove_unused && c->outgoing && c->node && c->node->devclass >= devclass)
518 { splay_insert(nodes, c->node); }
523 logger(mesh, MESHLINK_INFO, "* disconnect suboptimal outgoing connection");
524 disconnect_from = (node_t*)nodes->head->data;
527 splay_free_tree(nodes);
533 { logger(mesh, MESHLINK_INFO, "* no suboptimal outgoing connections"); }
537 // disconnect connections (too many connections)
539 if(!disconnect_from && max_connects < cur_connects)
541 splay_tree_t *nodes = splay_alloc_tree(node_compare_devclass_desc, NULL);
543 for list_each(connection_t, c, mesh->connections)
545 if(!c->status.remove_unused && c->node)
546 { splay_insert(nodes, c->node); }
551 logger(mesh, MESHLINK_INFO, "* disconnect connection (too many connections)");
554 disconnect_from = (node_t*)nodes->head->data;
557 { logger(mesh, MESHLINK_INFO, "* no node we want to disconnect, even though we have too many connections"); }
559 splay_free_tree(nodes);
563 // perform disconnect
565 if(disconnect_from && disconnect_from->connection)
567 logger(mesh, MESHLINK_INFO, "Autodisconnecting from %s", disconnect_from->connection->name);
568 list_delete(mesh->outgoings, disconnect_from->connection->outgoing);
569 disconnect_from->connection->outgoing = NULL;
570 terminate_connection(mesh, disconnect_from->connection, disconnect_from->connection->status.active);
576 logger(mesh, MESHLINK_INFO, "--- autoconnect end ---");
579 timeout_set(&mesh->loop, data, &(struct timeval){timeout, rand() % 100000});
582 void handle_meta_connection_data(meshlink_handle_t *mesh, connection_t *c) {
583 if (!receive_meta(mesh, c)) {
584 terminate_connection(mesh, c, c->status.active);
589 void retry(meshlink_handle_t *mesh) {
590 /* Reset the reconnection timers for all outgoing connections */
591 for list_each(outgoing_t, outgoing, mesh->outgoings) {
592 outgoing->timeout = 0;
594 timeout_set(&mesh->loop, &outgoing->ev, &(struct timeval){0, 0});
597 /* Check for outgoing connections that are in progress, and reset their ping timers */
598 for list_each(connection_t, c, mesh->connections) {
599 if(c->outgoing && !c->node)
600 c->last_ping_time = 0;
603 /* Kick the ping timeout handler */
604 timeout_set(&mesh->loop, &mesh->pingtimer, &(struct timeval){0, 0});
608 this is where it all happens...
610 int main_loop(meshlink_handle_t *mesh) {
611 timeout_add(&mesh->loop, &mesh->pingtimer, timeout_handler, &mesh->pingtimer, &(struct timeval){mesh->pingtimeout, rand() % 100000});
612 timeout_add(&mesh->loop, &mesh->periodictimer, periodic_handler, &mesh->periodictimer, &(struct timeval){0, 0});
615 mesh->datafromapp.signum = 0;
616 signal_add(&(mesh->loop),&(mesh->datafromapp), (signal_cb_t)meshlink_send_from_queue,mesh, mesh->datafromapp.signum);
618 if(!event_loop_run(&(mesh->loop), &(mesh->mesh_mutex))) {
619 logger(mesh, MESHLINK_ERROR, "Error while waiting for input: %s", strerror(errno));
623 timeout_del(&mesh->loop, &mesh->periodictimer);
624 timeout_del(&mesh->loop, &mesh->pingtimer);