]> git.meshlink.io Git - meshlink/blob - src/net.c
converging auto connect algorithm
[meshlink] / src / net.c
1 /*
2     net.c -- most of the network code
3     Copyright (C) 2014 Guus Sliepen <guus@meshlink.io>
4
5     This program is free software; you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation; either version 2 of the License, or
8     (at your option) any later version.
9
10     This program is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14
15     You should have received a copy of the GNU General Public License along
16     with this program; if not, write to the Free Software Foundation, Inc.,
17     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #include "system.h"
21
22 #include "utils.h"
23 #include "conf.h"
24 #include "connection.h"
25 #include "graph.h"
26 #include "logger.h"
27 #include "meshlink_internal.h"
28 #include "meta.h"
29 #include "net.h"
30 #include "netutl.h"
31 #include "protocol.h"
32 #include "xalloc.h"
33
34
35 static const int min(int a, int b) {
36         return a < b ? a : b;
37 }
38
39 /*
40   Terminate a connection:
41   - Mark it as inactive
42   - Remove the edge representing this connection
43   - Kill it with fire
44   - Check if we need to retry making an outgoing connection
45 */
46 void terminate_connection(meshlink_handle_t *mesh, connection_t *c, bool report) {
47         logger(mesh, MESHLINK_INFO, "Closing connection with %s (%s)", c->name, c->hostname);
48
49         c->status.active = false;
50
51         if(c->node && c->node->connection == c)
52                 c->node->connection = NULL;
53
54         if(c->edge) {
55                 if(report)
56                         send_del_edge(mesh, mesh->everyone, c->edge);
57
58                 edge_del(mesh, c->edge);
59                 c->edge = NULL;
60
61                 /* Run MST and SSSP algorithms */
62
63                 graph(mesh);
64
65                 /* If the node is not reachable anymore but we remember it had an edge to us, clean it up */
66
67                 if(report && !c->node->status.reachable) {
68                         edge_t *e;
69                         e = lookup_edge(c->node, mesh->self);
70                         if(e) {
71                                 send_del_edge(mesh, mesh->everyone, e);
72                                 edge_del(mesh, e);
73                         }
74                 }
75         }
76
77         outgoing_t *outgoing = c->outgoing;
78         connection_del(mesh, c);
79
80         /* Check if this was our outgoing connection */
81
82         if(outgoing)
83                 do_outgoing_connection(mesh, outgoing);
84
85 #ifndef HAVE_MINGW
86         /* Clean up dead proxy processes */
87
88         while(waitpid(-1, NULL, WNOHANG) > 0);
89 #endif
90 }
91
92 /*
93   Check if the other end is active.
94   If we have sent packets, but didn't receive any,
95   then possibly the other end is dead. We send a
96   PING request over the meta connection. If the other
97   end does not reply in time, we consider them dead
98   and close the connection.
99 */
100 static void timeout_handler(event_loop_t *loop, void *data) {
101         meshlink_handle_t *mesh = loop->data;
102         logger(mesh, MESHLINK_DEBUG, "timeout_handler()");
103
104         for list_each(connection_t, c, mesh->connections) {
105                 // Also make sure that if outstanding key requests for the UDP counterpart of a connection has timed out, we restart it.
106                 if(c->node) {
107                         if(c->node->status.waitingforkey && c->node->last_req_key + mesh->pingtimeout <= mesh->loop.now.tv_sec)
108                                 send_req_key(mesh, c->node);
109                 }
110                 if(c->last_ping_time + mesh->pingtimeout <= mesh->loop.now.tv_sec) {
111                         if(c->status.active) {
112                                 if(c->status.pinged) {
113                                         logger(mesh, MESHLINK_INFO, "%s (%s) didn't respond to PING in %ld seconds", c->name, c->hostname, (long)mesh->loop.now.tv_sec - c->last_ping_time);
114                                 } else if(c->last_ping_time + mesh->pinginterval <= mesh->loop.now.tv_sec) {
115                                         send_ping(mesh, c);
116                                         continue;
117                                 } else {
118                                         continue;
119                                 }
120                         } else {
121                                 if(c->status.connecting)
122                                         logger(mesh, MESHLINK_WARNING, "Timeout while connecting to %s (%s)", c->name, c->hostname);
123                                 else
124                                         logger(mesh, MESHLINK_WARNING, "Timeout from %s (%s) during authentication", c->name, c->hostname);
125                         }
126                         terminate_connection(mesh, c, c->status.active);
127                 }
128         }
129
130         timeout_set(&mesh->loop, data, &(struct timeval){mesh->pingtimeout, rand() % 100000});
131 }
132
133 // devclass asc, last_connect_try desc
134 static int node_compare_devclass_asc_last_connect_try_desc(const void *a, const void *b)
135 {
136         const node_t *na = a, *nb = b;
137
138         if(na->devclass < nb->devclass)
139                 { return -1; }
140
141         if(na->devclass > nb->devclass)
142                 { return 1; }
143
144         if(na->last_connect_try == nb->last_connect_try)
145                 return 0;
146
147         if(nb->last_connect_try == 0 || na->last_connect_try < nb->last_connect_try)
148                 return -1;
149
150         if(na->last_connect_try == 0 || na->last_connect_try > nb->last_connect_try)
151                 return 1;
152
153         return 0;
154 }
155
156 // last_connect_try desc
157 static int node_compare_last_connect_try_desc(const void *a, const void *b)
158 {
159         const node_t *na = a, *nb = b;
160
161         if(na->last_connect_try == nb->last_connect_try)
162                 return 0;
163
164         if(nb->last_connect_try == 0 || na->last_connect_try < nb->last_connect_try)
165                 return -1;
166
167         if(na->last_connect_try == 0 || na->last_connect_try > nb->last_connect_try)
168                 return 1;
169
170         return 0;
171 }
172
173 // devclass desc
174 static int node_compare_devclass_desc(const void *a, const void *b)
175 {
176         const node_t *na = a, *nb = b;
177
178         if(na->devclass < nb->devclass)
179                 { return -1; }
180
181         if(na->devclass > nb->devclass)
182                 { return 1; }
183
184         return 0;
185 }
186
187
188 static void periodic_handler(event_loop_t *loop, void *data) {
189         meshlink_handle_t *mesh = loop->data;
190
191         /* Check if there are too many contradicting ADD_EDGE and DEL_EDGE messages.
192            This usually only happens when another node has the same Name as this node.
193            If so, sleep for a short while to prevent a storm of contradicting messages.
194         */
195
196         if(mesh->contradicting_del_edge > 100 && mesh->contradicting_add_edge > 100) {
197                 logger(mesh, MESHLINK_WARNING, "Possible node with same Name as us! Sleeping %d seconds.", mesh->sleeptime);
198                 usleep(mesh->sleeptime * 1000000LL);
199                 mesh->sleeptime *= 2;
200                 if(mesh->sleeptime < 0)
201                         mesh->sleeptime = 3600;
202         } else {
203                 mesh->sleeptime /= 2;
204                 if(mesh->sleeptime < 10)
205                         mesh->sleeptime = 10;
206         }
207
208         mesh->contradicting_add_edge = 0;
209         mesh->contradicting_del_edge = 0;
210
211         int timeout = 5;
212
213         /* Check if we need to make or break connections. */
214
215         if(mesh->nodes->count > 1) {
216
217                 logger(mesh, MESHLINK_INFO, "--- autoconnect begin ---");
218
219
220                 int retry_timeout = min(mesh->nodes->count * 5, 60);
221
222                 // connect disconnect nodes
223
224                 node_t* connect_to = NULL;
225                 node_t* disconnect_from = NULL;
226
227
228                 // get cur_connects
229
230                 int cur_connects = 0;
231
232                 for list_each(connection_t, c, mesh->connections)
233                 {
234                         if(!c->status.remove_unused)
235                         {
236                                 cur_connects += 1;
237                         }
238                 }
239
240                 logger(mesh, MESHLINK_INFO, "* cur_connects = %d", cur_connects);
241
242
243                 // get min_connects and max_connects
244
245                 int min_connects = dev_class_traits[mesh->devclass].min_connects;
246                 int max_connects = dev_class_traits[mesh->devclass].max_connects;
247
248                 logger(mesh, MESHLINK_INFO, "* min_connects = %d", min_connects);
249                 logger(mesh, MESHLINK_INFO, "* max_connects = %d", max_connects);
250
251
252                 // find the best one for initial connect
253
254                 if(cur_connects < min_connects)
255                 {
256                         splay_tree_t *nodes = splay_alloc_tree(node_compare_devclass_asc_last_connect_try_desc, NULL);
257
258                         for splay_each(node_t, n, mesh->nodes)
259                         {
260                                 if(n->devclass <= mesh->devclass && !n->connection && (n->last_connect_try == 0 || (time(NULL) - n->last_connect_try) > retry_timeout))
261                                         { splay_insert(nodes, n); }
262                         }
263
264                         if(nodes->head)
265                         {
266                                 logger(mesh, MESHLINK_INFO, "* found best one for initial connect");
267
268                                 timeout = 0;
269                                 connect_to = (node_t*)nodes->head->data;
270                         }
271
272                         splay_free_tree(nodes);
273                 }
274
275
276                 // find better nodes to connect to
277
278                 if(!connect_to && min_connects <= cur_connects < max_connects)
279                 {
280                         unsigned int connects = 0;
281
282                         for(int devclass = 0; devclass <= mesh->devclass; ++devclass)
283                         {
284                                 for list_each(connection_t, c, mesh->connections)
285                                 {
286                                         if(!c->status.remove_unused && c->node && c->node->devclass == devclass)
287                                                 { connects += 1; }
288                                 }
289
290                                 if( connects < min_connects )
291                                 {
292                                         splay_tree_t *nodes = splay_alloc_tree(node_compare_last_connect_try_desc, NULL);
293
294                                         for splay_each(node_t, n, mesh->nodes)
295                                         {
296                                                 if(n->devclass == devclass && !n->connection && (n->last_connect_try == 0 || (time(NULL) - n->last_connect_try) > retry_timeout))
297                                                         { splay_insert(nodes, n); }
298                                         }
299
300                                         if(nodes->head)
301                                         {
302                                                 logger(mesh, MESHLINK_INFO, "* found better node");
303                                                 connect_to = (node_t*)nodes->head->data;
304
305                                                 splay_free_tree(nodes);
306                                                 break;
307                                         }
308
309                                         splay_free_tree(nodes);
310                                 }
311                                 else
312                                         { break; }
313                         }
314                 }
315
316
317                 // heal partitions
318
319                 if(!connect_to && min_connects <= cur_connects < max_connects)
320                 {
321                         splay_tree_t *nodes = splay_alloc_tree(node_compare_devclass_asc_last_connect_try_desc, NULL);
322
323                         for splay_each(node_t, n, mesh->nodes)
324                         {
325                                 if(n->devclass <= mesh->devclass && !n->status.reachable && (n->last_connect_try == 0 || (time(NULL) - n->last_connect_try) > retry_timeout))
326                                         { splay_insert(nodes, n); }
327                         }
328
329                         if(nodes->head)
330                         {
331                                 logger(mesh, MESHLINK_INFO, "* try to heal partition");
332                                 connect_to = (node_t*)nodes->head->data;
333                         }
334
335                         splay_free_tree(nodes);
336                 }
337
338
339                 // perform connect
340
341                 if(connect_to && !connect_to->connection)
342                 {
343                         logger(mesh, MESHLINK_INFO, "Autoconnecting to %s", connect_to->name);
344                         outgoing_t *outgoing = xzalloc(sizeof(outgoing_t));
345                         outgoing->mesh = mesh;
346                         outgoing->name = xstrdup(connect_to->name);
347                         list_insert_tail(mesh->outgoings, outgoing);
348                         setup_outgoing_connection(mesh, outgoing);
349                 }
350
351
352                 // disconnect suboptimal outgoing connections
353
354                 if(min_connects < cur_connects <= max_connects)
355                 {
356                         unsigned int connects = 0;
357
358                         for(int devclass = 0; devclass <= mesh->devclass; ++devclass)
359                         {
360                                 for list_each(connection_t, c, mesh->connections)
361                                 {
362                                         if(!c->status.remove_unused && c->node && c->node->devclass == devclass)
363                                                 { connects += 1; }
364                                 }
365
366                                 if( min_connects < connects )
367                                 {
368                                         splay_tree_t *nodes = splay_alloc_tree(node_compare_devclass_desc, NULL);
369
370                                         for list_each(connection_t, c, mesh->connections)
371                                         {
372                                                 if(!c->status.remove_unused && c->outgoing && c->node && c->node->devclass >= devclass)
373                                                         { splay_insert(nodes, c->node); }
374                                         }
375
376                                         if(nodes->head)
377                                         {
378                                                 logger(mesh, MESHLINK_INFO, "* disconnect suboptimal outgoing connection");
379                                                 disconnect_from = (node_t*)nodes->head->data;
380                                         }
381
382                                         splay_free_tree(nodes);
383                                         break;
384                                 }
385                         }
386                 }
387
388
389                 // disconnect connections (too many connections)
390
391                 if(!disconnect_from && max_connects < cur_connects)
392                 {
393                         splay_tree_t *nodes = splay_alloc_tree(node_compare_devclass_desc, NULL);
394
395                         for list_each(connection_t, c, mesh->connections)
396                         {
397                                 if(!c->status.remove_unused && c->node)
398                                         { splay_insert(nodes, c->node); }
399                         }
400
401                         if(nodes->head)
402                         {
403                                 logger(mesh, MESHLINK_INFO, "* disconnect connection (too many connections");
404
405                                 timeout = 0;
406                                 disconnect_from = (node_t*)nodes->head->data;
407                         }
408
409                         splay_free_tree(nodes);
410                 }
411
412
413                 // perform disconnect
414
415                 if(disconnect_from && disconnect_from->connection)
416                 {
417                         logger(mesh, MESHLINK_INFO, "Autodisconnecting from %s", disconnect_from->connection->name);
418                         list_delete(mesh->outgoings, disconnect_from->connection->outgoing);
419                         disconnect_from->connection->outgoing = NULL;
420                         terminate_connection(mesh, disconnect_from->connection, disconnect_from->connection->status.active);
421                 }
422
423
424                 // done!
425
426                 logger(mesh, MESHLINK_INFO, "--- autoconnect end ---");
427         }
428
429         timeout_set(&mesh->loop, data, &(struct timeval){timeout, rand() % 100000});
430 }
431
432 void handle_meta_connection_data(meshlink_handle_t *mesh, connection_t *c) {
433         if (!receive_meta(mesh, c)) {
434                 terminate_connection(mesh, c, c->status.active);
435                 return;
436         }
437 }
438
439 void retry(meshlink_handle_t *mesh) {
440         /* Reset the reconnection timers for all outgoing connections */
441         for list_each(outgoing_t, outgoing, mesh->outgoings) {
442                 outgoing->timeout = 0;
443                 if(outgoing->ev.cb)
444                         timeout_set(&mesh->loop, &outgoing->ev, &(struct timeval){0, 0});
445         }
446
447         /* Check for outgoing connections that are in progress, and reset their ping timers */
448         for list_each(connection_t, c, mesh->connections) {
449                 if(c->outgoing && !c->node)
450                         c->last_ping_time = 0;
451         }
452
453         /* Kick the ping timeout handler */
454         timeout_set(&mesh->loop, &mesh->pingtimer, &(struct timeval){0, 0});
455 }
456
457 /*
458   this is where it all happens...
459 */
460 int main_loop(meshlink_handle_t *mesh) {
461         timeout_add(&mesh->loop, &mesh->pingtimer, timeout_handler, &mesh->pingtimer, &(struct timeval){mesh->pingtimeout, rand() % 100000});
462         timeout_add(&mesh->loop, &mesh->periodictimer, periodic_handler, &mesh->periodictimer, &(struct timeval){0, 0});
463
464         //Add signal handler
465         mesh->datafromapp.signum = 0;
466         signal_add(&(mesh->loop),&(mesh->datafromapp), (signal_cb_t)meshlink_send_from_queue,mesh, mesh->datafromapp.signum);
467
468         if(!event_loop_run(&mesh->loop)) {
469                 logger(mesh, MESHLINK_ERROR, "Error while waiting for input: %s", strerror(errno));
470                 return 1;
471         }
472
473         timeout_del(&mesh->loop, &mesh->periodictimer);
474         timeout_del(&mesh->loop, &mesh->pingtimer);
475
476         return 0;
477 }