2 net.c -- most of the network code
3 Copyright (C) 2014 Guus Sliepen <guus@meshlink.io>
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License along
16 with this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 #include "connection.h"
27 #include "meshlink_internal.h"
34 static const int min(int a, int b) {
39 Terminate a connection:
41 - Remove the edge representing this connection
43 - Check if we need to retry making an outgoing connection
45 void terminate_connection(meshlink_handle_t *mesh, connection_t *c, bool report) {
46 logger(mesh, MESHLINK_INFO, "Closing connection with %s (%s)", c->name, c->hostname);
48 c->status.active = false;
50 if(c->node && c->node->connection == c)
51 c->node->connection = NULL;
55 send_del_edge(mesh, mesh->everyone, c->edge);
57 edge_del(mesh, c->edge);
60 /* Run MST and SSSP algorithms */
64 /* If the node is not reachable anymore but we remember it had an edge to us, clean it up */
66 if(report && !c->node->status.reachable) {
68 e = lookup_edge(c->node, mesh->self);
70 send_del_edge(mesh, mesh->everyone, e);
76 outgoing_t *outgoing = c->outgoing;
77 connection_del(mesh, c);
79 /* Check if this was our outgoing connection */
82 do_outgoing_connection(mesh, outgoing);
85 /* Clean up dead proxy processes */
87 while(waitpid(-1, NULL, WNOHANG) > 0);
92 Check if the other end is active.
93 If we have sent packets, but didn't receive any,
94 then possibly the other end is dead. We send a
95 PING request over the meta connection. If the other
96 end does not reply in time, we consider them dead
97 and close the connection.
99 static void timeout_handler(event_loop_t *loop, void *data) {
100 meshlink_handle_t *mesh = loop->data;
101 logger(mesh, MESHLINK_DEBUG, "timeout_handler()");
103 for list_each(connection_t, c, mesh->connections) {
104 // Also make sure that if outstanding key requests for the UDP counterpart of a connection has timed out, we restart it.
106 if(c->node->status.waitingforkey && c->node->last_req_key + mesh->pingtimeout <= mesh->loop.now.tv_sec)
107 send_req_key(mesh, c->node);
109 if(c->last_ping_time + mesh->pingtimeout <= mesh->loop.now.tv_sec) {
110 if(c->status.active) {
111 if(c->status.pinged) {
112 logger(mesh, MESHLINK_INFO, "%s (%s) didn't respond to PING in %ld seconds", c->name, c->hostname, (long)mesh->loop.now.tv_sec - c->last_ping_time);
113 } else if(c->last_ping_time + mesh->pinginterval <= mesh->loop.now.tv_sec) {
120 if(c->status.connecting)
121 logger(mesh, MESHLINK_WARNING, "Timeout while connecting to %s (%s)", c->name, c->hostname);
123 logger(mesh, MESHLINK_WARNING, "Timeout from %s (%s) during authentication", c->name, c->hostname);
125 terminate_connection(mesh, c, c->status.active);
129 timeout_set(&mesh->loop, data, &(struct timeval){mesh->pingtimeout, rand() % 100000});
132 /// Utility function to establish connections based on condition check
133 /** The function iterates over all nodes, but skips those that do
134 * not pass the condition check.
136 * The condition check function is passed
137 * a pointer to a random number r between 0 and rand_modulo, a pointer to the
138 * current node index i, and the node pointer n. This function should return true
139 * if a connection attempt to the node should be made.
141 * @param mesh A pointer to the mesh structure
142 * @param rand_modulo Random index is selected between 0 and rand_modulo
143 * @cond_check A function pointer. This function should return true
144 * if a connection attempt to the node should be made
146 static void cond_add_connection(meshlink_handle_t *mesh, int rand_modulo, bool (*cond_check)(int*, int*, node_t*)) {
147 int r = rand() % rand_modulo;
150 for splay_each(node_t, n, mesh->nodes) {
151 /* skip nodes that do not pass condition check */
152 if(!(*cond_check)(&i, &r, n))
155 /* check if there is already a connection attempt to this node */
157 for list_each(outgoing_t, outgoing, mesh->outgoings) {
158 if(!strcmp(outgoing->name, n->name)) {
165 //TODO: if the node is blacklisted the connection will not happen, but
166 //the user will read this debug message "Autoconnecting to %s" that is misleading
167 logger(mesh, MESHLINK_INFO, "Autoconnecting to %s", n->name);
168 outgoing_t *outgoing = xzalloc(sizeof *outgoing);
169 outgoing->mesh = mesh;
170 outgoing->name = xstrdup(n->name);
171 list_insert_tail(mesh->outgoings, outgoing);
172 setup_outgoing_connection(mesh, outgoing);
178 static bool found_random_node(int *i, int *r, node_t *n) {
188 static bool found_random_unreachable_node(int *i, int *r, node_t *n) {
189 if(n->status.reachable)
201 static void periodic_handler(event_loop_t *loop, void *data) {
202 meshlink_handle_t *mesh = loop->data;
204 /* Check if there are too many contradicting ADD_EDGE and DEL_EDGE messages.
205 This usually only happens when another node has the same Name as this node.
206 If so, sleep for a short while to prevent a storm of contradicting messages.
209 if(mesh->contradicting_del_edge > 100 && mesh->contradicting_add_edge > 100) {
210 logger(mesh, MESHLINK_WARNING, "Possible node with same Name as us! Sleeping %d seconds.", mesh->sleeptime);
211 usleep(mesh->sleeptime * 1000000LL);
212 mesh->sleeptime *= 2;
213 if(mesh->sleeptime < 0)
214 mesh->sleeptime = 3600;
216 mesh->sleeptime /= 2;
217 if(mesh->sleeptime < 10)
218 mesh->sleeptime = 10;
221 mesh->contradicting_add_edge = 0;
222 mesh->contradicting_del_edge = 0;
226 /* Check if we need to make or break connections. */
228 if(mesh->nodes->count > 1) {
230 logger(mesh, MESHLINK_INFO, "--- autoconnect begin ---");
232 splay_tree_t* ccounts = splay_alloc_tree(dclass_ccount_compare, NULL);
234 /* Count number of active connections per device class */
236 for list_each(connection_t, c, mesh->connections) {
240 key.dclass = c->node->dclass;
242 dclass_ccount_t* ccount = splay_search(ccounts, &key);
246 ccount = dclass_ccount_alloc();
247 ccount->dclass = c->node->dclass;
248 splay_insert(ccounts, ccount);
256 /* Count number of unreachable nodes */
257 int num_unreachable = 0;
258 for splay_each(node_t, n, mesh->nodes) {
259 if(!n->status.reachable)
263 bool satisfied = dclass_ccounts_satisfied(mesh->self->dclass, ccounts, num_total);
264 int maxcc = max_ccount_from_dclass(mesh->self->dclass);
266 logger(mesh, MESHLINK_INFO, "* num_total = %d, satisfied = %d, maxcc = %d", num_total, satisfied, maxcc);
269 logger(mesh, MESHLINK_INFO, "* Not enough active connections, try to add one.");
270 /* Not enough active connections, try to add one.
271 Choose a random node, if we don't have a connection to it,
272 and we are not already trying to make one, create an
273 outgoing connection to this node.
275 cond_add_connection(mesh, mesh->nodes->count, &found_random_node);
278 if(satisfied && num_unreachable > 0) {
279 logger(mesh, MESHLINK_INFO, "* Min number of connections established. Now heal possible partitions.");
280 /* Min number of connections established. Now try
281 to connect to some unreachable nodes to attempt
282 to heal possible partitions.
284 cond_add_connection(mesh, num_unreachable, &found_random_unreachable_node);
287 if(num_total > maxcc) {
288 logger(mesh, MESHLINK_INFO, "* Too many active connections, try to remove one.");
289 /* Too many active connections, try to remove one.
290 Choose a random outgoing connection to a node
291 that has at least one other connection.
293 int r = rand() % num_total;
296 for list_each(connection_t, c, mesh->connections) {
297 if(!c->status.active)
303 if(!c->outgoing || !c->node || c->node->edge_tree->count < 2)
306 logger(mesh, MESHLINK_INFO, "Autodisconnecting from %s", c->name);
307 list_delete(mesh->outgoings, c->outgoing);
309 terminate_connection(mesh, c, c->status.active);
315 logger(mesh, MESHLINK_INFO, "* We have enough active connections, remove pending outgoing connections.");
316 /* If we have enough active connections,
317 remove any pending outgoing connections.
318 Do not remove pending connections to unreachable
321 node_t *o_node = NULL;
322 for list_each(outgoing_t, o, mesh->outgoings) {
323 o_node = lookup_node(mesh, o->name);
324 /* o_node is NULL if it is not part of the graph yet */
325 if(!o_node || !o_node->status.reachable)
329 for list_each(connection_t, c, mesh->connections) {
330 if(c->outgoing == o) {
336 logger(mesh, MESHLINK_INFO, "Cancelled outgoing connection to %s", o->name);
337 /* The node variable is leaked in from using the list_each macro.
338 The o variable could be used, but using node directly
341 list_delete_node(mesh->outgoings, node);
346 if (!satisfied && (num_total + mesh->outgoings->count) < mesh->nodes->count)
348 logger(mesh, MESHLINK_INFO, "* No timeout.");
352 splay_free_tree(ccounts);
354 logger(mesh, MESHLINK_INFO, "--- autoconnect end ---");
357 timeout_set(&mesh->loop, data, &(struct timeval){timeout, rand() % 100000});
360 void handle_meta_connection_data(meshlink_handle_t *mesh, connection_t *c) {
361 if (!receive_meta(mesh, c)) {
362 terminate_connection(mesh, c, c->status.active);
367 void retry(meshlink_handle_t *mesh) {
368 /* Reset the reconnection timers for all outgoing connections */
369 for list_each(outgoing_t, outgoing, mesh->outgoings) {
370 outgoing->timeout = 0;
372 timeout_set(&mesh->loop, &outgoing->ev, &(struct timeval){0, 0});
375 /* Check for outgoing connections that are in progress, and reset their ping timers */
376 for list_each(connection_t, c, mesh->connections) {
377 if(c->outgoing && !c->node)
378 c->last_ping_time = 0;
381 /* Kick the ping timeout handler */
382 timeout_set(&mesh->loop, &mesh->pingtimer, &(struct timeval){0, 0});
386 this is where it all happens...
388 int main_loop(meshlink_handle_t *mesh) {
389 timeout_add(&mesh->loop, &mesh->pingtimer, timeout_handler, &mesh->pingtimer, &(struct timeval){mesh->pingtimeout, rand() % 100000});
390 timeout_add(&mesh->loop, &mesh->periodictimer, periodic_handler, &mesh->periodictimer, &(struct timeval){0, 0});
393 mesh->datafromapp.signum = 0;
394 signal_add(&(mesh->loop),&(mesh->datafromapp), (signal_cb_t)meshlink_send_from_queue,mesh, mesh->datafromapp.signum);
396 if(!event_loop_run(&mesh->loop)) {
397 logger(mesh, MESHLINK_ERROR, "Error while waiting for input: %s", strerror(errno));
401 timeout_del(&mesh->loop, &mesh->periodictimer);
402 timeout_del(&mesh->loop, &mesh->pingtimer);