From: Guus Sliepen Date: Mon, 12 Aug 2019 10:42:37 +0000 (+0200) Subject: Improve the autoconnect algorithm. X-Git-Url: https://git.meshlink.io/?a=commitdiff_plain;h=93e77a457c257a2e98bc6309702fce36eb6f536b;p=meshlink Improve the autoconnect algorithm. --- diff --git a/src/Makefile.am b/src/Makefile.am index b3c1dd10..084193bd 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -36,6 +36,7 @@ pkginclude_HEADERS = meshlink++.h meshlink.h libmeshlink_la_LDFLAGS = -export-symbols $(srcdir)/meshlink.sym libmeshlink_la_SOURCES = \ + autoconnect.c autoconnect.h \ buffer.c buffer.h \ conf.c conf.h \ connection.c connection.h \ diff --git a/src/autoconnect.c b/src/autoconnect.c new file mode 100644 index 00000000..3308a0f7 --- /dev/null +++ b/src/autoconnect.c @@ -0,0 +1,259 @@ +/* + autoconnect.c -- automatic connection establishment + Copyright (C) 2019 Guus Sliepen + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +*/ + +#include "system.h" + +#include "connection.h" +#include "list.h" +#include "logger.h" +#include "net.h" +#include "node.h" +#include "xalloc.h" + +/* Make an outgoing connection if possible */ +static bool make_outgoing(meshlink_handle_t *mesh, node_t *n) { + if(!n || n->connection) { + return false; + } + + n->last_connect_try = mesh->loop.now.tv_sec; + logger(mesh, MESHLINK_DEBUG, "Autoconnect trying to connect to %s", n->name); + + /* check if there is already a connection attempt to this node */ + for list_each(outgoing_t, outgoing, mesh->outgoings) { + if(outgoing->node == n) { + logger(mesh, MESHLINK_DEBUG, "* skip autoconnect since it is an outgoing connection already"); + return false; + } + } + + if(!n->status.reachable && !node_read_public_key(mesh, n)) { + logger(mesh, MESHLINK_DEBUG, "* skip autoconnect since we don't know this node's public key"); + return false; + } + + logger(mesh, MESHLINK_DEBUG, "Autoconnecting to %s", n->name); + outgoing_t *outgoing = xzalloc(sizeof(outgoing_t)); + outgoing->node = n; + list_insert_tail(mesh->outgoings, outgoing); + setup_outgoing_connection(mesh, outgoing); + return true; +} + +/* Determine if node n is a better candidate for making an early connection to than node m. */ +static bool compare_candidates(node_t *n, node_t *m) { + /* Check if the last connection attempt was successful */ + bool n_successful = n->last_successful_connection > n->last_connect_try; + bool m_successful = n->last_successful_connection > n->last_connect_try; + + if(n_successful != m_successful) { + return n_successful; + } else { + if(n_successful) { + /* If both were successfully connected to, prefer the most recent one */ + return n->last_successful_connection > m->last_successful_connection; + } else { + /* If the last connections were not successful, prefer the one we least recently tried to connect to. */ + return n->last_connect_try < m->last_connect_try; + } + } +} + +/* Try to connect to any candidate in the same or better device class. Prefer recently connected-to nodes first. */ +static bool make_eager_connection(meshlink_handle_t *mesh) { + node_t *candidate = NULL; + + for splay_each(node_t, n, mesh->nodes) { + if(n == mesh->self || n->devclass > mesh->devclass || n->connection || n->status.blacklisted) { + continue; + } + + if(!candidate) { + candidate = n; + continue; + } + + if(compare_candidates(n, candidate)) { + candidate = n; + } + } + + return make_outgoing(mesh, candidate); +} + +/* Try to connect to balance connections to different device classes. Prefer recently connected-to nodes first. */ +static bool make_better_connection(meshlink_handle_t *mesh) { + const unsigned int min_connects = mesh->dev_class_traits[mesh->devclass].min_connects; + + for(dev_class_t devclass = 0; devclass <= mesh->devclass; ++devclass) { + unsigned int connects = 0; + + for list_each(connection_t, c, mesh->connections) { + if(c->status.active && c->node && c->node->devclass == devclass) { + connects += 1; + + if(connects >= min_connects) { + break; + } + } + } + + if(connects >= min_connects) { + continue; + } + + node_t *candidate = NULL; + + for splay_each(node_t, n, mesh->nodes) { + if(n == mesh->self || n->devclass != devclass || n->connection || n->status.blacklisted) { + continue; + } + + if(!candidate) { + candidate = n; + continue; + } + + if(compare_candidates(n, candidate)) { + candidate = n; + } + } + + if(make_outgoing(mesh, candidate)) { + return true; + } + } + + return false; +} + +/* Disconnect from a random node that doesn't weaken the graph, and cancel redundant outgoings */ +static void disconnect_redundant(meshlink_handle_t *mesh) { + int count = 0; + + for list_each(connection_t, c, mesh->connections) { + if(!c->status.active || !c->outgoing || !c->node) { + continue; + } + + if(c->node->edge_tree->count < 2) { + continue; + } + + count++; + } + + if(!count) { + return; + } + + int r = rand() % count; + + for list_each(connection_t, c, mesh->connections) { + if(!c->status.active || !c->outgoing || !c->node) { + continue; + } + + if(c->node->edge_tree->count < 2) { + continue; + } + + if(r--) { + continue; + } + + logger(mesh, MESHLINK_DEBUG, "Autodisconnecting from %s", c->name); + list_delete(mesh->outgoings, c->outgoing); + c->outgoing = NULL; + terminate_connection(mesh, c, c->status.active); + break; + } + + for list_each(outgoing_t, o, mesh->outgoings) { + if(!o->node->connection) { + list_delete_node(mesh->outgoings, node); + } + } +} + +static void heal_partitions(meshlink_handle_t *mesh) { + /* Select a random known node. The rationale is that if there are many + * reachable nodes, and only a few unreachable nodes, we don't want all + * reachable nodes to try to connect to the unreachable ones at the + * same time. This way, we back off automatically. Conversely, if there + * are only a few reachable nodes, and many unreachable ones, we're + * going to try harder to connect to them. */ + + int r = rand() % mesh->nodes->count; + + for splay_each(node_t, n, mesh->nodes) { + if(r--) { + continue; + } + + if(n == mesh->self || n->connection || n->status.reachable || n->status.blacklisted) { + return; + } + + /* Are we already trying to make an outgoing connection to it? If so, return. */ + for list_each(outgoing_t, outgoing, mesh->outgoings) { + if(outgoing->node == n) { + return; + } + } + + make_outgoing(mesh, n); + return; + } + +} + +unsigned int do_autoconnect(meshlink_handle_t *mesh) { + /* Count the number of active connections. */ + + unsigned int cur_connects = 0; + + for list_each(connection_t, c, mesh->connections) { + if(c->status.active) { + cur_connects += 1; + } + } + + /* We don't have the minimum number of connections? Eagerly try to make a new one. */ + + const unsigned int max_connects = mesh->dev_class_traits[mesh->devclass].max_connects; + const unsigned int min_connects = mesh->dev_class_traits[mesh->devclass].min_connects; + + logger(mesh, MESHLINK_DEBUG, "do_autoconnect() %d %d %d\n", cur_connects, min_connects, max_connects); + + if(cur_connects < min_connects) { + make_eager_connection(mesh); + } else if(cur_connects < max_connects) { + /* Otherwise, try to improve. */ + make_better_connection(mesh); + } + + if (cur_connects >= max_connects) { + disconnect_redundant(mesh); + } + + heal_partitions(mesh); + + return cur_connects; +} diff --git a/src/net.c b/src/net.c index b5705a47..6cf6f630 100644 --- a/src/net.c +++ b/src/net.c @@ -19,7 +19,7 @@ #include "system.h" -#include "utils.h" +#include "autoconnect.h" #include "conf.h" #include "connection.h" #include "graph.h" @@ -29,6 +29,7 @@ #include "net.h" #include "netutl.h" #include "protocol.h" +#include "utils.h" #include "xalloc.h" #include @@ -148,186 +149,6 @@ static void timeout_handler(event_loop_t *loop, void *data) { }); } -// devclass asc, last_successfull_connection desc -static int node_compare_devclass_asc_lsc_desc(const void *a, const void *b) { - const node_t *na = a, *nb = b; - - if(na->devclass < nb->devclass) { - return -1; - } - - if(na->devclass > nb->devclass) { - return 1; - } - - if(na->last_successfull_connection == nb->last_successfull_connection) { - return 0; - } - - if(na->last_successfull_connection == 0 || na->last_successfull_connection > nb->last_successfull_connection) { - return -1; - } - - if(nb->last_successfull_connection == 0 || na->last_successfull_connection < nb->last_successfull_connection) { - return 1; - } - - if(na < nb) { - return -1; - } - - if(na > nb) { - return 1; - } - - return 0; -} - -// last_successfull_connection desc -static int node_compare_lsc_desc(const void *a, const void *b) { - const node_t *na = a, *nb = b; - - if(na->last_successfull_connection == nb->last_successfull_connection) { - return 0; - } - - if(na->last_successfull_connection == 0 || na->last_successfull_connection > nb->last_successfull_connection) { - return -1; - } - - if(nb->last_successfull_connection == 0 || na->last_successfull_connection < nb->last_successfull_connection) { - return 1; - } - - if(na < nb) { - return -1; - } - - if(na > nb) { - return 1; - } - - return 0; -} - -// devclass desc -static int node_compare_devclass_desc(const void *a, const void *b) { - const node_t *na = a, *nb = b; - - if(na->devclass < nb->devclass) { - return -1; - } - - if(na->devclass > nb->devclass) { - return 1; - } - - if(na < nb) { - return -1; - } - - if(na > nb) { - return 1; - } - - return 0; -} - - -/* - -autoconnect() -{ - timeout = 5 - - // find the best one for initial connect - - if cur < min - newcon = - first from nodes - where dclass <= my.dclass and !connection and (timestamp - last_retry) > retry_timeout - order by dclass asc, last_connection desc - if newcon - timeout = 0 - goto connect - - - // find better nodes to connect to: in case we have less than min connections within [BACKBONE, i] and there are nodes which we are not connected to within the range - - if min <= cur < max - j = 0 - for i = BACKBONE to my.dclass - j += count(from connections where node.dclass = i) - if j < min - newcon = - first from nodes - where dclass = i and !connection and (timestamp - last_retry) > retry_timeout - order by last_connection desc - if newcon - goto connect - else - break - - - // heal partitions - - if min <= cur < max - newcon = - first from nodes - where dclass <= my.dclass and !reachable and (timestamp - last_retry) > retry_timeout - order by dclass asc, last_connection desc - if newcon - goto connect - - - // connect - -connect: - if newcon - connect newcon - - - // disconnect outgoing connections in case we have more than min connections within [BACKBONE, i] and there are nodes which we are connected to within the range [i, PORTABLE] - - if min < cur <= max - j = 0 - for i = BACKBONE to my.dclass - j += count(from connections where node.dclass = i) - if min < j - delcon = - first from nodes - where dclass >= i and outgoing_connection - order by dclass desc - if disconnect - goto disconnect - else - break - - - // disconnect connections in case we have more than enough connections - - if max < cur - delcon = - first from nodes - where outgoing_connection - order by dclass desc - goto disconnect - - // disconnect - -disconnect - if delcon - disconnect delcon - - - // next iteration - next (timeout, autoconnect) - -} - -*/ - - static void periodic_handler(event_loop_t *loop, void *data) { meshlink_handle_t *mesh = loop->data; @@ -360,244 +181,16 @@ static void periodic_handler(event_loop_t *loop, void *data) { /* Check if we need to make or break connections. */ if(mesh->nodes->count > 1) { - - logger(mesh, MESHLINK_DEBUG, "--- autoconnect begin ---"); - - int retry_timeout = min(mesh->nodes->count * default_timeout, 60); - - logger(mesh, MESHLINK_DEBUG, "* devclass = %d", mesh->devclass); - logger(mesh, MESHLINK_DEBUG, "* nodes = %d", mesh->nodes->count); - logger(mesh, MESHLINK_DEBUG, "* retry_timeout = %d", retry_timeout); - - - // connect disconnect nodes - - node_t *connect_to = NULL; - node_t *disconnect_from = NULL; - - - // get cur_connects - - unsigned int cur_connects = 0; - - for list_each(connection_t, c, mesh->connections) { - if(c->status.active) { - cur_connects += 1; - } - } - - logger(mesh, MESHLINK_DEBUG, "* cur_connects = %d", cur_connects); - logger(mesh, MESHLINK_DEBUG, "* outgoings = %d", mesh->outgoings->count); - - // get min_connects and max_connects - - unsigned int min_connects = mesh->dev_class_traits[mesh->devclass].min_connects; - unsigned int max_connects = mesh->dev_class_traits[mesh->devclass].max_connects; - - logger(mesh, MESHLINK_DEBUG, "* min_connects = %d", min_connects); - logger(mesh, MESHLINK_DEBUG, "* max_connects = %d", max_connects); - - // find the best one for initial connect - - if(cur_connects < min_connects) { - splay_tree_t *nodes = splay_alloc_tree(node_compare_devclass_asc_lsc_desc, NULL); - - for splay_each(node_t, n, mesh->nodes) { - logger(mesh, MESHLINK_DEBUG, "* %s->devclass = %d", n->name, n->devclass); - - if(n != mesh->self && n->devclass <= mesh->devclass && !n->connection && !n->status.blacklisted && (n->last_connect_try == 0 || (mesh->loop.now.tv_sec - n->last_connect_try) > retry_timeout)) { - splay_insert(nodes, n); - } - } - - if(nodes->head) { - //timeout = 0; - connect_to = (node_t *)nodes->head->data; - - logger(mesh, MESHLINK_DEBUG, "* found best one for initial connect: %s", connect_to->name); - } else { - logger(mesh, MESHLINK_DEBUG, "* could not find node for initial connect"); - } - - splay_free_tree(nodes); - } - - - // find better nodes to connect to - - if(!connect_to && min_connects <= cur_connects && cur_connects < max_connects) { - unsigned int connects = 0; - - for(dev_class_t devclass = 0; devclass <= mesh->devclass; ++devclass) { - for list_each(connection_t, c, mesh->connections) { - if(c->status.active && c->node && c->node->devclass == devclass) { - connects += 1; - } - } - - if(connects < min_connects) { - splay_tree_t *nodes = splay_alloc_tree(node_compare_lsc_desc, NULL); - - for splay_each(node_t, n, mesh->nodes) { - if(n != mesh->self && n->devclass == devclass && !n->connection && !n->status.blacklisted && (n->last_connect_try == 0 || (mesh->loop.now.tv_sec - n->last_connect_try) > retry_timeout)) { - splay_insert(nodes, n); - } - } - - if(nodes->head) { - logger(mesh, MESHLINK_DEBUG, "* found better node"); - connect_to = (node_t *)nodes->head->data; - - splay_free_tree(nodes); - break; - } - - splay_free_tree(nodes); - } else { - break; - } - } - - if(!connect_to) { - logger(mesh, MESHLINK_DEBUG, "* could not find better nodes"); - } - } - - - // heal partitions - - if(!connect_to && min_connects <= cur_connects && cur_connects < max_connects) { - splay_tree_t *nodes = splay_alloc_tree(node_compare_devclass_asc_lsc_desc, NULL); - - for splay_each(node_t, n, mesh->nodes) { - if(n != mesh->self && n->devclass <= mesh->devclass && !n->status.reachable && !n->status.blacklisted && (n->last_connect_try == 0 || (mesh->loop.now.tv_sec - n->last_connect_try) > retry_timeout)) { - splay_insert(nodes, n); - } - } - - if(nodes->head) { - logger(mesh, MESHLINK_DEBUG, "* try to heal partition"); - connect_to = (node_t *)nodes->head->data; - } else { - logger(mesh, MESHLINK_DEBUG, "* could not find nodes for partition healing"); - } - - splay_free_tree(nodes); - } - - - // perform connect - - if(connect_to && !connect_to->connection) { - connect_to->last_connect_try = mesh->loop.now.tv_sec; - logger(mesh, MESHLINK_DEBUG, "Autoconnect trying to connect to %s", connect_to->name); - - /* check if there is already a connection attempt to this node */ - bool skip = false; - - for list_each(outgoing_t, outgoing, mesh->outgoings) { - if(outgoing->node == connect_to) { - logger(mesh, MESHLINK_DEBUG, "* skip autoconnect since it is an outgoing connection already"); - skip = true; - break; - } - } - - if(!connect_to->status.reachable && !node_read_public_key(mesh, connect_to)) { - logger(mesh, MESHLINK_DEBUG, "* skip autoconnect since we don't know this node's public key"); - skip = true; - } - - if(!skip) { - logger(mesh, MESHLINK_DEBUG, "Autoconnecting to %s", connect_to->name); - outgoing_t *outgoing = xzalloc(sizeof(outgoing_t)); - outgoing->node = connect_to; - list_insert_tail(mesh->outgoings, outgoing); - setup_outgoing_connection(mesh, outgoing); - } - } - - - // disconnect suboptimal outgoing connections - - if(min_connects < cur_connects /*&& cur_connects <= max_connects*/) { - unsigned int connects = 0; - - for(dev_class_t devclass = 0; devclass <= mesh->devclass; ++devclass) { - for list_each(connection_t, c, mesh->connections) { - if(c->status.active && c->node && c->node->devclass == devclass) { - connects += 1; - } - } - - if(min_connects < connects) { - splay_tree_t *nodes = splay_alloc_tree(node_compare_devclass_desc, NULL); - - for list_each(connection_t, c, mesh->connections) { - if(c->outgoing && c->node && c->node->devclass >= devclass) { - splay_insert(nodes, c->node); - } - } - - if(nodes->head) { - logger(mesh, MESHLINK_DEBUG, "* disconnect suboptimal outgoing connection"); - disconnect_from = (node_t *)nodes->head->data; - } - - splay_free_tree(nodes); - break; - } - } - - if(!disconnect_from) { - logger(mesh, MESHLINK_DEBUG, "* no suboptimal outgoing connections"); - } - } - - - // disconnect connections (too many connections) - - if(!disconnect_from && max_connects < cur_connects) { - splay_tree_t *nodes = splay_alloc_tree(node_compare_devclass_desc, NULL); - - for list_each(connection_t, c, mesh->connections) { - if(c->status.active && c->node) { - splay_insert(nodes, c->node); - } - } - - if(nodes->head) { - logger(mesh, MESHLINK_DEBUG, "* disconnect connection (too many connections)"); - - //timeout = 0; - disconnect_from = (node_t *)nodes->head->data; - } else { - logger(mesh, MESHLINK_DEBUG, "* no node we want to disconnect, even though we have too many connections"); - } - - splay_free_tree(nodes); - } - - - // perform disconnect - - if(disconnect_from && disconnect_from->connection) { - logger(mesh, MESHLINK_DEBUG, "Autodisconnecting from %s", disconnect_from->connection->name); - list_delete(mesh->outgoings, disconnect_from->connection->outgoing); - disconnect_from->connection->outgoing = NULL; - terminate_connection(mesh, disconnect_from->connection, disconnect_from->connection->status.active); - } + unsigned int cur_connects = do_autoconnect(mesh); // reduce timeout if we don't have enough connections + outgoings - if(cur_connects + mesh->outgoings->count < 3) { + if (cur_connects + mesh->outgoings->count < 3) { timeout = 1; } - - // done! - - logger(mesh, MESHLINK_DEBUG, "--- autoconnect end ---"); } + /* Write dirty config files out to disk */ + for splay_each(node_t, n, mesh->nodes) { if(n->status.dirty) { node_write_config(mesh, n); diff --git a/src/node.h b/src/node.h index e42aac4a..75a8ced7 100644 --- a/src/node.h +++ b/src/node.h @@ -79,7 +79,7 @@ typedef struct node_t { struct connection_t *connection; /* Connection associated with this node (if a direct connection exists) */ time_t last_connect_try; - time_t last_successfull_connection; + time_t last_successful_connection; char *canonical_address; /* The canonical address of this node, if known */ sockaddr_t recent[5]; /* Recently seen addresses */ diff --git a/src/protocol_auth.c b/src/protocol_auth.c index 1193b43c..9fda9065 100644 --- a/src/protocol_auth.c +++ b/src/protocol_auth.c @@ -432,7 +432,7 @@ bool ack_h(meshlink_handle_t *mesh, connection_t *c, const char *request) { n->devclass = devclass; n->status.dirty = true; - n->last_successfull_connection = mesh->loop.now.tv_sec; + n->last_successful_connection = mesh->loop.now.tv_sec; n->connection = c; c->node = n;