/*
net.c -- most of the network code
- Copyright (C) 1998-2005 Ivo Timmermans,
- 2000-2006 Guus Sliepen <guus@tinc-vpn.org>
+ Copyright (C) 2014 Guus Sliepen <guus@meshlink.io>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
- $Id$
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include "system.h"
-#include <openssl/rand.h>
-
#include "utils.h"
-#include "avl_tree.h"
#include "conf.h"
#include "connection.h"
-#include "device.h"
#include "graph.h"
#include "logger.h"
+#include "meshlink_internal.h"
#include "meta.h"
#include "net.h"
#include "netutl.h"
-#include "process.h"
#include "protocol.h"
-#include "route.h"
-#include "subnet.h"
#include "xalloc.h"
-volatile bool running = false;
-
-time_t now = 0;
-
-/* Purge edges and subnets of unreachable nodes. Use carefully. */
-
-static void purge(void)
-{
- avl_node_t *nnode, *nnext, *enode, *enext, *snode, *snext;
- node_t *n;
- edge_t *e;
- subnet_t *s;
-
- cp();
-
- ifdebug(PROTOCOL) logger(LOG_DEBUG, _("Purging unreachable nodes"));
-
- /* Remove all edges and subnets owned by unreachable nodes. */
-
- for(nnode = node_tree->head; nnode; nnode = nnext) {
- nnext = nnode->next;
- n = nnode->data;
-
- if(!n->status.reachable) {
- ifdebug(SCARY_THINGS) logger(LOG_DEBUG, _("Purging node %s (%s)"), n->name,
- n->hostname);
-
- for(snode = n->subnet_tree->head; snode; snode = snext) {
- snext = snode->next;
- s = snode->data;
- if(!tunnelserver)
- send_del_subnet(broadcast, s);
- subnet_del(n, s);
- }
-
- for(enode = n->edge_tree->head; enode; enode = enext) {
- enext = enode->next;
- e = enode->data;
- if(!tunnelserver)
- send_del_edge(broadcast, e);
- edge_del(e);
- }
- }
- }
-
- /* Check if anyone else claims to have an edge to an unreachable node. If not, delete node. */
-
- for(nnode = node_tree->head; nnode; nnode = nnext) {
- nnext = nnode->next;
- n = nnode->data;
-
- if(!n->status.reachable) {
- for(enode = edge_weight_tree->head; enode; enode = enext) {
- enext = enode->next;
- e = enode->data;
-
- if(e->to == n)
- break;
- }
-
- if(!enode)
- node_del(n);
- }
- }
-}
-
-/*
- put all file descriptors into events
- While we're at it, purge stuf that needs to be removed.
-*/
-static int build_fdset(void)
-{
- avl_node_t *node, *next;
- connection_t *c;
- int i, max = 0;
-
- cp();
-
- for(node = connection_tree->head; node; node = next) {
- next = node->next;
- c = node->data;
-
- if(c->status.remove) {
- connection_del(c);
- if(!connection_tree->head)
- purge();
- }
- }
-
- return 0;
-}
-
/*
Terminate a connection:
- - Close the socket
- - Remove associated edge and tell other connections about it if report = true
+ - Mark it as inactive
+ - Remove the edge representing this connection
+ - Kill it with fire
- Check if we need to retry making an outgoing connection
- - Deactivate the host
*/
-void terminate_connection(connection_t *c, bool report)
-{
- cp();
-
- if(c->status.remove)
- return;
+void terminate_connection(meshlink_handle_t *mesh, connection_t *c, bool report) {
+ logger(DEBUG_CONNECTIONS, LOG_NOTICE, "Closing connection with %s (%s)", c->name, c->hostname);
- ifdebug(CONNECTIONS) logger(LOG_NOTICE, _("Closing connection with %s (%s)"),
- c->name, c->hostname);
-
- c->status.remove = true;
c->status.active = false;
- if(c->node)
+ if(c->node && c->node->connection == c)
c->node->connection = NULL;
- if(c->socket)
- closesocket(c->socket);
-
- event_del(&c->ev);
-
if(c->edge) {
- if(report && !tunnelserver)
- send_del_edge(broadcast, c->edge);
+ if(report)
+ send_del_edge(mesh, mesh->everyone, c->edge);
- edge_del(c->edge);
+ edge_del(mesh, c->edge);
+ c->edge = NULL;
/* Run MST and SSSP algorithms */
- graph();
+ graph(mesh);
/* If the node is not reachable anymore but we remember it had an edge to us, clean it up */
if(report && !c->node->status.reachable) {
edge_t *e;
- e = lookup_edge(c->node, myself);
+ e = lookup_edge(c->node, mesh->self);
if(e) {
- if(!tunnelserver)
- send_del_edge(broadcast, e);
- edge_del(e);
+ send_del_edge(mesh, mesh->everyone, e);
+ edge_del(mesh, e);
}
}
}
+ outgoing_t *outgoing = c->outgoing;
+ connection_del(mesh, c);
+
/* Check if this was our outgoing connection */
- if(c->outgoing) {
- retry_outgoing(c->outgoing);
- c->outgoing = NULL;
- }
+ if(outgoing)
+ do_outgoing_connection(mesh, outgoing);
+
+#ifndef HAVE_MINGW
+ /* Clean up dead proxy processes */
- free(c->outbuf);
- c->outbuf = NULL;
- c->outbuflen = 0;
- c->outbufsize = 0;
- c->outbufstart = 0;
+ while(waitpid(-1, NULL, WNOHANG) > 0);
+#endif
}
/*
end does not reply in time, we consider them dead
and close the connection.
*/
-static void check_dead_connections(void)
-{
- avl_node_t *node, *next;
- connection_t *c;
-
- cp();
+static void timeout_handler(event_loop_t *loop, void *data) {
+ meshlink_handle_t *mesh = loop->data;
- for(node = connection_tree->head; node; node = next) {
- next = node->next;
- c = node->data;
-
- if(c->last_ping_time + pingtimeout < now) {
+ for list_each(connection_t, c, mesh->connections) {
+ if(c->last_ping_time + mesh->pingtimeout <= mesh->loop.now.tv_sec) {
if(c->status.active) {
if(c->status.pinged) {
- ifdebug(CONNECTIONS) logger(LOG_INFO, _("%s (%s) didn't respond to PING in %ld seconds"),
- c->name, c->hostname, now - c->last_ping_time);
- c->status.timeout = true;
- terminate_connection(c, true);
- } else if(c->last_ping_time + pinginterval < now) {
- send_ping(c);
- }
- } else {
- if(c->status.remove) {
- logger(LOG_WARNING, _("Old connection_t for %s (%s) status %04x still lingering, deleting..."),
- c->name, c->hostname, c->status.value);
- connection_del(c);
+ logger(DEBUG_CONNECTIONS, LOG_INFO, "%s (%s) didn't respond to PING in %ld seconds", c->name, c->hostname, (long)mesh->loop.now.tv_sec - c->last_ping_time);
+ } else if(c->last_ping_time + mesh->pinginterval <= mesh->loop.now.tv_sec) {
+ send_ping(mesh, c);
continue;
- }
- ifdebug(CONNECTIONS) logger(LOG_WARNING, _("Timeout from %s (%s) during authentication"),
- c->name, c->hostname);
- if(c->status.connecting) {
- c->status.connecting = false;
- closesocket(c->socket);
- do_outgoing_connection(c);
} else {
- terminate_connection(c, false);
+ continue;
}
+ } else {
+ if(c->status.connecting)
+ logger(DEBUG_CONNECTIONS, LOG_WARNING, "Timeout while connecting to %s (%s)", c->name, c->hostname);
+ else
+ logger(DEBUG_CONNECTIONS, LOG_WARNING, "Timeout from %s (%s) during authentication", c->name, c->hostname);
}
- }
-
- if(c->outbuflen > 0 && c->last_flushed_time + pingtimeout < now) {
- if(c->status.active) {
- ifdebug(CONNECTIONS) logger(LOG_INFO,
- _("%s (%s) could not flush for %ld seconds (%d bytes remaining)"),
- c->name, c->hostname, now - c->last_flushed_time, c->outbuflen);
- c->status.timeout = true;
- terminate_connection(c, true);
- }
+ terminate_connection(mesh, c, c->status.active);
}
}
-}
-void handle_meta_connection_data(int fd, short events, void *data)
-{
- connection_t *c = data;
- int result;
- socklen_t len = sizeof(result);
-
- if (c->status.remove)
- return;
+ timeout_set(&mesh->loop, data, &(struct timeval){mesh->pingtimeout, rand() % 100000});
+}
- if(c->status.connecting) {
- c->status.connecting = false;
- getsockopt(c->socket, SOL_SOCKET, SO_ERROR, &result, &len);
-
- if(!result)
- finish_connecting(c);
- else {
- ifdebug(CONNECTIONS) logger(LOG_DEBUG,
- _("Error while connecting to %s (%s): %s"),
- c->name, c->hostname, strerror(result));
- closesocket(c->socket);
- do_outgoing_connection(c);
- return;
+/// Utility function to establish connections based on condition check
+/** The function iterates over all nodes, but skips those that do
+ * not pass the condition check.
+ *
+ * The condition check function is passed
+ * a pointer to a random number r between 0 and rand_modulo, a pointer to the
+ * current node index i, and the node pointer n. This function should return true
+ * if a connection attempt to the node should be made.
+ *
+ * @param mesh A pointer to the mesh structure
+ * @param rand_modulo Random index is selected between 0 and rand_modulo
+ * @cond_check A function pointer. This function should return true
+ * if a connection attempt to the node should be made
+ */
+static void cond_add_connection(meshlink_handle_t *mesh, int rand_modulo, bool (*cond_check)(int*, int*, node_t*)) {
+ int r = rand() % rand_modulo;
+ int i = 0;
+
+ for splay_each(node_t, n, mesh->nodes) {
+ /* skip nodes that do not pass condition check */
+ if(!(*cond_check)(&i, &r, n))
+ continue;
+
+ /* check if there is already a connection attempt to this node */
+ bool found = false;
+ for list_each(outgoing_t, outgoing, mesh->outgoings) {
+ if(!strcmp(outgoing->name, n->name)) {
+ found = true;
+ break;
+ }
}
- }
- if (!receive_meta(c)) {
- terminate_connection(c, c->status.active);
- return;
+ if(!found) {
+ //TODO: if the node is blacklisted the connection will not happen, but
+ //the user will read this debug message "Autoconnecting to %s" that is misleading
+ logger(DEBUG_CONNECTIONS, LOG_INFO, "Autoconnecting to %s", n->name);
+ outgoing_t *outgoing = xzalloc(sizeof *outgoing);
+ outgoing->name = xstrdup(n->name);
+ list_insert_tail(mesh->outgoings, outgoing);
+ setup_outgoing_connection(mesh, outgoing);
+ }
+ break;
}
}
-static void dummy(int a, short b, void *c)
-{
-}
+static bool found_random_node(int *i, int *r, node_t *n) {
+ if((*i)++ != *r)
+ return false;
-static void sigterm_handler(int signal, short events, void *data) {
- logger(LOG_NOTICE, _("Got %s signal"), strsignal(signal));
- running = false;
- event_loopexit(NULL);
+ if(n->connection)
+ return false;
+
+ return true;
}
-static void sigint_handler(int signal, short events, void *data) {
- static int saved_debug_level = -1;
+static bool found_random_unreachable_node(int *i, int *r, node_t *n) {
+ if(n->status.reachable)
+ return false;
+
+ if((*i)++ != *r)
+ return false;
- logger(LOG_NOTICE, _("Got %s signal"), strsignal(signal));
+ if(n->connection)
+ return false;
- if(saved_debug_level != -1) {
- logger(LOG_NOTICE, _("Reverting to old debug level (%d)"),
- saved_debug_level);
- debug_level = saved_debug_level;
- saved_debug_level = -1;
- } else {
- logger(LOG_NOTICE,
- _("Temporarily setting debug level to 5. Kill me with SIGINT again to go back to level %d."),
- debug_level);
- saved_debug_level = debug_level;
- debug_level = 5;
- }
+ return true;
}
-static void sigusr1_handler(int signal, short events, void *data) {
- logger(LOG_NOTICE, _("Got %s signal"), strsignal(signal));
- dump_connections();
-}
+static void periodic_handler(event_loop_t *loop, void *data) {
+ meshlink_handle_t *mesh = loop->data;
-static void sigusr2_handler(int signal, short events, void *data) {
- logger(LOG_NOTICE, _("Got %s signal"), strsignal(signal));
- dump_device_stats();
- dump_nodes();
- dump_edges();
- dump_subnets();
-}
+ /* Check if there are too many contradicting ADD_EDGE and DEL_EDGE messages.
+ This usually only happens when another node has the same Name as this node.
+ If so, sleep for a short while to prevent a storm of contradicting messages.
+ */
-static void sigwinch_handler(int signal, short events, void *data) {
- logger(LOG_NOTICE, _("Got %s signal"), strsignal(signal));
- purge();
-}
+ if(mesh->contradicting_del_edge > 100 && mesh->contradicting_add_edge > 100) {
+ logger(DEBUG_ALWAYS, LOG_WARNING, "Possible node with same Name as us! Sleeping %d seconds.", mesh->sleeptime);
+ usleep(mesh->sleeptime * 1000000LL);
+ mesh->sleeptime *= 2;
+ if(mesh->sleeptime < 0)
+ mesh->sleeptime = 3600;
+ } else {
+ mesh->sleeptime /= 2;
+ if(mesh->sleeptime < 10)
+ mesh->sleeptime = 10;
+ }
-static void sighup_handler(int signal, short events, void *data) {
- connection_t *c;
- avl_node_t *node;
- char *fname;
- struct stat s;
- static time_t last_config_check = 0;
-
- logger(LOG_NOTICE, _("Got %s signal"), strsignal(signal));
+ mesh->contradicting_add_edge = 0;
+ mesh->contradicting_del_edge = 0;
- /* Reread our own configuration file */
+ /* If AutoConnect is set, check if we need to make or break connections. */
- exit_configuration(&config_tree);
- init_configuration(&config_tree);
+ if(autoconnect && mesh->nodes->count > 1) {
+ /* Count number of active connections */
+ int nc = 0;
+ for list_each(connection_t, c, mesh->connections) {
+ if(c->status.active)
+ nc++;
+ }
- if(!read_server_config()) {
- logger(LOG_ERR, _("Unable to reread configuration file, exitting."));
- event_loopexit(NULL);
- return;
- }
+ /* Count number of unreachable nodes */
+ int num_unreachable = 0;
+ for splay_each(node_t, n, mesh->nodes) {
+ if(!n->status.reachable)
+ num_unreachable++;
+ }
- /* Close connections to hosts that have a changed or deleted host config file */
-
- for(node = connection_tree->head; node; node = node->next) {
- c = node->data;
-
- if(c->outgoing) {
- free(c->outgoing->name);
- if(c->outgoing->ai)
- freeaddrinfo(c->outgoing->ai);
- free(c->outgoing);
- c->outgoing = NULL;
+ if(nc < autoconnect) {
+ /* Not enough active connections, try to add one.
+ Choose a random node, if we don't have a connection to it,
+ and we are not already trying to make one, create an
+ outgoing connection to this node.
+ */
+ cond_add_connection(mesh, mesh->nodes->count, &found_random_node);
+ } else if(num_unreachable > 0) {
+ /* Min number of connections established. Now try
+ to connect to some unreachable nodes to attempt
+ to heal possible partitions.
+ */
+ cond_add_connection(mesh, num_unreachable, &found_random_unreachable_node);
}
- asprintf(&fname, "%s/hosts/%s", confbase, c->name);
- if(stat(fname, &s) || s.st_mtime > last_config_check)
- terminate_connection(c, c->status.active);
- free(fname);
- }
-
- last_config_check = time(NULL);
-
- /* Try to make outgoing connections */
-
- try_outgoing_connections();
-}
-
-/*
- this is where it all happens...
-*/
-int main_loop(void)
-{
- struct timeval tv;
- int r;
- time_t last_ping_check;
- struct event timeout;
- struct event sighup_event;
- struct event sigint_event;
- struct event sigterm_event;
- struct event sigquit_event;
- struct event sigusr1_event;
- struct event sigusr2_event;
- struct event sigwinch_event;
-
- cp();
-
- signal_set(&sighup_event, SIGHUP, sighup_handler, NULL);
- signal_add(&sighup_event, NULL);
- signal_set(&sigint_event, SIGINT, sigint_handler, NULL);
- signal_add(&sigint_event, NULL);
- signal_set(&sigterm_event, SIGTERM, sigterm_handler, NULL);
- signal_add(&sigterm_event, NULL);
- signal_set(&sigquit_event, SIGQUIT, sigterm_handler, NULL);
- signal_add(&sigquit_event, NULL);
- signal_set(&sigusr1_event, SIGUSR1, sigusr1_handler, NULL);
- signal_add(&sigusr1_event, NULL);
- signal_set(&sigusr2_event, SIGUSR2, sigusr2_handler, NULL);
- signal_add(&sigusr2_event, NULL);
- signal_set(&sigwinch_event, SIGWINCH, sigwinch_handler, NULL);
- signal_add(&sigwinch_event, NULL);
-
- last_ping_check = now;
-
- srand(now);
-
- running = true;
-
- while(running) {
- now = time(NULL);
+ if(nc > autoconnect) {
+ /* Too many active connections, try to remove one.
+ Choose a random outgoing connection to a node
+ that has at least one other connection.
+ */
+ int r = rand() % nc;
+ int i = 0;
+
+ for list_each(connection_t, c, mesh->connections) {
+ if(!c->status.active)
+ continue;
- // tv.tv_sec = 1 + (rand() & 7); /* Approx. 5 seconds, randomized to prevent global synchronisation effects */
- tv.tv_sec = 1;
- tv.tv_usec = 0;
+ if(i++ != r)
+ continue;
- /* XXX: libevent transition: old timeout code in this loop */
- timeout_set(&timeout, dummy, NULL);
- timeout_add(&timeout, &tv);
+ if(!c->outgoing || !c->node || c->node->edge_tree->count < 2)
+ break;
- r = build_fdset();
- if(r < 0) {
- logger(LOG_ERR, _("Error building fdset: %s"), strerror(errno));
- cp_trace();
- dump_connections();
- return 1;
+ logger(DEBUG_CONNECTIONS, LOG_INFO, "Autodisconnecting from %s", c->name);
+ list_delete(mesh->outgoings, c->outgoing);
+ c->outgoing = NULL;
+ terminate_connection(mesh, c, c->status.active);
+ break;
+ }
}
- r = event_loop(EVLOOP_ONCE);
- now = time(NULL);
- if(r < 0) {
- logger(LOG_ERR, _("Error while waiting for input: %s"),
- strerror(errno));
- cp_trace();
- dump_connections();
- return 1;
- }
+ if(nc >= autoconnect) {
+ /* If we have enough active connections,
+ remove any pending outgoing connections.
+ Do not remove pending connections to unreachable
+ nodes.
+ */
+ node_t *o_node = NULL;
+ for list_each(outgoing_t, o, mesh->outgoings) {
+ o_node = lookup_node(mesh, o->name);
+ /* o_node is NULL if it is not part of the graph yet */
+ if(!o_node || !o_node->status.reachable)
+ continue;
- /* XXX: more libevent transition */
- timeout_del(&timeout);
+ bool found = false;
+ for list_each(connection_t, c, mesh->connections) {
+ if(c->outgoing == o) {
+ found = true;
+ break;
+ }
+ }
+ if(!found) {
+ logger(DEBUG_CONNECTIONS, LOG_INFO, "Cancelled outgoing connection to %s", o->name);
+ /* The node variable is leaked in from using the list_each macro.
+ The o variable could be used, but using node directly
+ is more efficient.
+ */
+ list_delete_node(mesh->outgoings, node);
+ }
+ }
+ }
+ }
- /* Let's check if everybody is still alive */
+ timeout_set(&mesh->loop, data, &(struct timeval){5, rand() % 100000});
+}
- if(last_ping_check + pingtimeout < now) {
- check_dead_connections();
- last_ping_check = now;
+void handle_meta_connection_data(meshlink_handle_t *mesh, connection_t *c) {
+ if (!receive_meta(mesh, c)) {
+ terminate_connection(mesh, c, c->status.active);
+ return;
+ }
+}
- if(routing_mode == RMODE_SWITCH)
- age_subnets();
+void retry(meshlink_handle_t *mesh) {
+ /* Reset the reconnection timers for all outgoing connections */
+ for list_each(outgoing_t, outgoing, mesh->outgoings) {
+ outgoing->timeout = 0;
+ if(outgoing->ev.cb)
+ timeout_set(&mesh->loop, &outgoing->ev, &(struct timeval){0, 0});
+ }
- age_past_requests();
+ /* Check for outgoing connections that are in progress, and reset their ping timers */
+ for list_each(connection_t, c, mesh->connections) {
+ if(c->outgoing && !c->node)
+ c->last_ping_time = 0;
+ }
- /* Should we regenerate our key? */
+ /* Kick the ping timeout handler */
+ timeout_set(&mesh->loop, &mesh->pingtimer, &(struct timeval){0, 0});
+}
- if(keyexpires < now) {
- ifdebug(STATUS) logger(LOG_INFO, _("Regenerating symmetric key"));
+/*
+ this is where it all happens...
+*/
+int main_loop(meshlink_handle_t *mesh) {
+ timeout_add(&mesh->loop, &mesh->pingtimer, timeout_handler, &mesh->pingtimer, &(struct timeval){mesh->pingtimeout, rand() % 100000});
+ timeout_add(&mesh->loop, &mesh->periodictimer, periodic_handler, &mesh->periodictimer, &(struct timeval){mesh->pingtimeout, rand() % 100000});
- RAND_pseudo_bytes((unsigned char *)myself->key, myself->keylength);
- if(myself->cipher)
- EVP_DecryptInit_ex(&packet_ctx, myself->cipher, NULL, (unsigned char *)myself->key, (unsigned char *)myself->key + myself->cipher->key_len);
- send_key_changed(broadcast, myself);
- keyexpires = now + keylifetime;
- }
- }
+ //Add signal handler
+ mesh->datafromapp.signum = 0;
+ signal_add(&(mesh->loop),&(mesh->datafromapp), (signal_cb_t)meshlink_send_from_queue,mesh, mesh->datafromapp.signum);
- if(sigalrm) {
- logger(LOG_INFO, _("Flushing event queue"));
- // TODO: do this another way
- sigalrm = false;
- }
+ if(!event_loop_run(&mesh->loop)) {
+ logger(DEBUG_ALWAYS, LOG_ERR, "Error while waiting for input: %s", strerror(errno));
+ return 1;
}
- signal_del(&sighup_event);
- signal_del(&sigint_event);
- signal_del(&sigterm_event);
- signal_del(&sigquit_event);
- signal_del(&sigusr1_event);
- signal_del(&sigusr2_event);
- signal_del(&sigwinch_event);
+ timeout_del(&mesh->loop, &mesh->periodictimer);
+ timeout_del(&mesh->loop, &mesh->pingtimer);
return 0;
}