X-Git-Url: http://git.meshlink.io/?a=blobdiff_plain;f=src%2Fnet.c;h=c615b893031a63cf727dfe89ccd2d17b1edaaeef;hb=326a86ef927e1f161a6742edfca041a945c7a547;hp=25cc15042f0b2a74d030acc78d625531e757fee8;hpb=e16463732db2045c884fa47aafa36a07c55b0c5c;p=meshlink diff --git a/src/net.c b/src/net.c index 25cc1504..c615b893 100644 --- a/src/net.c +++ b/src/net.c @@ -31,6 +31,10 @@ #include "protocol.h" #include "xalloc.h" +static const int min(int a, int b) { + return a < b ? a : b; +} + /* Terminate a connection: - Mark it as inactive @@ -39,7 +43,7 @@ - Check if we need to retry making an outgoing connection */ void terminate_connection(meshlink_handle_t *mesh, connection_t *c, bool report) { - logger(DEBUG_CONNECTIONS, LOG_NOTICE, "Closing connection with %s (%s)", c->name, c->hostname); + logger(mesh, MESHLINK_INFO, "Closing connection with %s (%s)", c->name, c->hostname); c->status.active = false; @@ -94,12 +98,18 @@ void terminate_connection(meshlink_handle_t *mesh, connection_t *c, bool report) */ static void timeout_handler(event_loop_t *loop, void *data) { meshlink_handle_t *mesh = loop->data; + logger(mesh, MESHLINK_DEBUG, "timeout_handler()"); for list_each(connection_t, c, mesh->connections) { + // Also make sure that if outstanding key requests for the UDP counterpart of a connection has timed out, we restart it. + if(c->node) { + if(c->node->status.waitingforkey && c->node->last_req_key + mesh->pingtimeout <= mesh->loop.now.tv_sec) + send_req_key(mesh, c->node); + } if(c->last_ping_time + mesh->pingtimeout <= mesh->loop.now.tv_sec) { if(c->status.active) { if(c->status.pinged) { - logger(DEBUG_CONNECTIONS, LOG_INFO, "%s (%s) didn't respond to PING in %ld seconds", c->name, c->hostname, (long)mesh->loop.now.tv_sec - c->last_ping_time); + logger(mesh, MESHLINK_INFO, "%s (%s) didn't respond to PING in %ld seconds", c->name, c->hostname, (long)mesh->loop.now.tv_sec - c->last_ping_time); } else if(c->last_ping_time + mesh->pinginterval <= mesh->loop.now.tv_sec) { send_ping(mesh, c); continue; @@ -108,9 +118,9 @@ static void timeout_handler(event_loop_t *loop, void *data) { } } else { if(c->status.connecting) - logger(DEBUG_CONNECTIONS, LOG_WARNING, "Timeout while connecting to %s (%s)", c->name, c->hostname); + logger(mesh, MESHLINK_WARNING, "Timeout while connecting to %s (%s)", c->name, c->hostname); else - logger(DEBUG_CONNECTIONS, LOG_WARNING, "Timeout from %s (%s) during authentication", c->name, c->hostname); + logger(mesh, MESHLINK_WARNING, "Timeout from %s (%s) during authentication", c->name, c->hostname); } terminate_connection(mesh, c, c->status.active); } @@ -119,6 +129,75 @@ static void timeout_handler(event_loop_t *loop, void *data) { timeout_set(&mesh->loop, data, &(struct timeval){mesh->pingtimeout, rand() % 100000}); } +/// Utility function to establish connections based on condition check +/** The function iterates over all nodes, but skips those that do + * not pass the condition check. + * + * The condition check function is passed + * a pointer to a random number r between 0 and rand_modulo, a pointer to the + * current node index i, and the node pointer n. This function should return true + * if a connection attempt to the node should be made. + * + * @param mesh A pointer to the mesh structure + * @param rand_modulo Random index is selected between 0 and rand_modulo + * @cond_check A function pointer. This function should return true + * if a connection attempt to the node should be made + */ +static void cond_add_connection(meshlink_handle_t *mesh, int rand_modulo, bool (*cond_check)(int*, int*, node_t*)) { + int r = rand() % rand_modulo; + int i = 0; + + for splay_each(node_t, n, mesh->nodes) { + /* skip nodes that do not pass condition check */ + if(!(*cond_check)(&i, &r, n)) + continue; + + /* check if there is already a connection attempt to this node */ + bool found = false; + for list_each(outgoing_t, outgoing, mesh->outgoings) { + if(!strcmp(outgoing->name, n->name)) { + found = true; + break; + } + } + + if(!found) { + //TODO: if the node is blacklisted the connection will not happen, but + //the user will read this debug message "Autoconnecting to %s" that is misleading + logger(mesh, MESHLINK_INFO, "Autoconnecting to %s", n->name); + outgoing_t *outgoing = xzalloc(sizeof *outgoing); + outgoing->mesh = mesh; + outgoing->name = xstrdup(n->name); + list_insert_tail(mesh->outgoings, outgoing); + setup_outgoing_connection(mesh, outgoing); + } + break; + } +} + +static bool found_random_node(int *i, int *r, node_t *n) { + if((*i)++ != *r) + return false; + + if(n->connection) + return false; + + return true; +} + +static bool found_random_unreachable_node(int *i, int *r, node_t *n) { + if(n->status.reachable) + return false; + + if((*i)++ != *r) + return false; + + if(n->connection) + return false; + + return true; +} + static void periodic_handler(event_loop_t *loop, void *data) { meshlink_handle_t *mesh = loop->data; @@ -128,7 +207,7 @@ static void periodic_handler(event_loop_t *loop, void *data) { */ if(mesh->contradicting_del_edge > 100 && mesh->contradicting_add_edge > 100) { - logger(DEBUG_ALWAYS, LOG_WARNING, "Possible node with same Name as us! Sleeping %d seconds.", mesh->sleeptime); + logger(mesh, MESHLINK_WARNING, "Possible node with same Name as us! Sleeping %d seconds.", mesh->sleeptime); usleep(mesh->sleeptime * 1000000LL); mesh->sleeptime *= 2; if(mesh->sleeptime < 0) @@ -142,6 +221,8 @@ static void periodic_handler(event_loop_t *loop, void *data) { mesh->contradicting_add_edge = 0; mesh->contradicting_del_edge = 0; + int timeout = 5; + /* If AutoConnect is set, check if we need to make or break connections. */ if(autoconnect && mesh->nodes->count > 1) { @@ -152,62 +233,29 @@ static void periodic_handler(event_loop_t *loop, void *data) { nc++; } + /* Count number of unreachable nodes */ + int num_unreachable = 0; + for splay_each(node_t, n, mesh->nodes) { + if(!n->status.reachable) + num_unreachable++; + } + if(nc < autoconnect) { /* Not enough active connections, try to add one. Choose a random node, if we don't have a connection to it, and we are not already trying to make one, create an outgoing connection to this node. */ - int r = rand() % mesh->nodes->count; - int i = 0; - - for splay_each(node_t, n, mesh->nodes) { - bool trying_unreachable = false; - - if(i++ != r) { - if(n->status.reachable) { - continue; - } else { - /* If we see an unreachable node - before node i, try it anyway. - */ - trying_unreachable = true; - } - } - - if(n->connection) - break; - - bool found = false; - - for list_each(outgoing_t, outgoing, mesh->outgoings) { - if(!strcmp(outgoing->name, n->name)) { - found = true; - break; - } - } - - if(!found) { - //TODO: if the node is blacklisted the connection will not happen, but - //the user will read this debug message "Autoconnecting to %s" that is misleading - logger(DEBUG_CONNECTIONS, LOG_INFO, "Autoconnecting to %s", n->name); - outgoing_t *outgoing = xzalloc(sizeof *outgoing); - outgoing->name = xstrdup(n->name); - list_insert_tail(mesh->outgoings, outgoing); - setup_outgoing_connection(mesh, outgoing); - } else if(trying_unreachable) { - /* We're trying an unreachable node instead - of node i. We already have an outgoing - to it. Try the next node rather than - breaking here, to avoid churning on a - connection attempt to the first - unreachable node. - */ - continue; - } - break; - } - } else if(nc > autoconnect) { + cond_add_connection(mesh, mesh->nodes->count, &found_random_node); + } else if(num_unreachable > 0) { + /* Min number of connections established. Now try + to connect to some unreachable nodes to attempt + to heal possible partitions. + */ + cond_add_connection(mesh, num_unreachable, &found_random_unreachable_node); + } + + if(nc > autoconnect) { /* Too many active connections, try to remove one. Choose a random outgoing connection to a node that has at least one other connection. @@ -225,7 +273,7 @@ static void periodic_handler(event_loop_t *loop, void *data) { if(!c->outgoing || !c->node || c->node->edge_tree->count < 2) break; - logger(DEBUG_CONNECTIONS, LOG_INFO, "Autodisconnecting from %s", c->name); + logger(mesh, MESHLINK_INFO, "Autodisconnecting from %s", c->name); list_delete(mesh->outgoings, c->outgoing); c->outgoing = NULL; terminate_connection(mesh, c, c->status.active); @@ -236,8 +284,16 @@ static void periodic_handler(event_loop_t *loop, void *data) { if(nc >= autoconnect) { /* If we have enough active connections, remove any pending outgoing connections. + Do not remove pending connections to unreachable + nodes. */ + node_t *o_node = NULL; for list_each(outgoing_t, o, mesh->outgoings) { + o_node = lookup_node(mesh, o->name); + /* o_node is NULL if it is not part of the graph yet */ + if(!o_node || !o_node->status.reachable) + continue; + bool found = false; for list_each(connection_t, c, mesh->connections) { if(c->outgoing == o) { @@ -246,14 +302,21 @@ static void periodic_handler(event_loop_t *loop, void *data) { } } if(!found) { - logger(DEBUG_CONNECTIONS, LOG_INFO, "Cancelled outgoing connection to %s", o->name); + logger(mesh, MESHLINK_INFO, "Cancelled outgoing connection to %s", o->name); + /* The node variable is leaked in from using the list_each macro. + The o variable could be used, but using node directly + is more efficient. + */ list_delete_node(mesh->outgoings, node); } } } + + if (nc + mesh->outgoings->count < min(autoconnect, mesh->nodes->count - 1)) + timeout = 0; } - timeout_set(&mesh->loop, data, &(struct timeval){5, rand() % 100000}); + timeout_set(&mesh->loop, data, &(struct timeval){timeout, rand() % 100000}); } void handle_meta_connection_data(meshlink_handle_t *mesh, connection_t *c) { @@ -286,14 +349,14 @@ void retry(meshlink_handle_t *mesh) { */ int main_loop(meshlink_handle_t *mesh) { timeout_add(&mesh->loop, &mesh->pingtimer, timeout_handler, &mesh->pingtimer, &(struct timeval){mesh->pingtimeout, rand() % 100000}); - timeout_add(&mesh->loop, &mesh->periodictimer, periodic_handler, &mesh->periodictimer, &(struct timeval){mesh->pingtimeout, rand() % 100000}); + timeout_add(&mesh->loop, &mesh->periodictimer, periodic_handler, &mesh->periodictimer, &(struct timeval){0, 0}); //Add signal handler mesh->datafromapp.signum = 0; signal_add(&(mesh->loop),&(mesh->datafromapp), (signal_cb_t)meshlink_send_from_queue,mesh, mesh->datafromapp.signum); - if(!event_loop_run(&mesh->loop)) { - logger(DEBUG_ALWAYS, LOG_ERR, "Error while waiting for input: %s", strerror(errno)); + if(!event_loop_run(&(mesh->loop), &(mesh->mesh_mutex))) { + logger(mesh, MESHLINK_ERROR, "Error while waiting for input: %s", strerror(errno)); return 1; }