]> git.meshlink.io Git - meshlink/blob - src/net.c
Add a configurable fast connection retry period.
[meshlink] / src / net.c
1 /*
2     net.c -- most of the network code
3     Copyright (C) 2014-2017 Guus Sliepen <guus@meshlink.io>
4
5     This program is free software; you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation; either version 2 of the License, or
8     (at your option) any later version.
9
10     This program is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14
15     You should have received a copy of the GNU General Public License along
16     with this program; if not, write to the Free Software Foundation, Inc.,
17     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #include "system.h"
21
22 #include "utils.h"
23 #include "conf.h"
24 #include "connection.h"
25 #include "graph.h"
26 #include "logger.h"
27 #include "meshlink_internal.h"
28 #include "meta.h"
29 #include "net.h"
30 #include "netutl.h"
31 #include "protocol.h"
32 #include "xalloc.h"
33
34 #include <assert.h>
35
36 #if !defined(min)
37 static inline int min(int a, int b) {
38         return a < b ? a : b;
39 }
40 #endif
41
42 static const int default_timeout = 5;
43 static const int default_interval = 60;
44
45 /*
46   Terminate a connection:
47   - Mark it as inactive
48   - Remove the edge representing this connection
49   - Kill it with fire
50   - Check if we need to retry making an outgoing connection
51 */
52 void terminate_connection(meshlink_handle_t *mesh, connection_t *c, bool report) {
53         logger(mesh, MESHLINK_INFO, "Closing connection with %s", c->name);
54
55         c->status.active = false;
56
57         if(c->node && c->node->connection == c) {
58                 c->node->connection = NULL;
59         }
60
61         if(c->edge) {
62                 if(report) {
63                         send_del_edge(mesh, mesh->everyone, c->edge, 0);
64                 }
65
66                 edge_del(mesh, c->edge);
67                 c->edge = NULL;
68
69                 /* Run MST and SSSP algorithms */
70
71                 graph(mesh);
72
73                 /* If the node is not reachable anymore but we remember it had an edge to us, clean it up */
74
75                 if(report && c->node && !c->node->status.reachable) {
76                         edge_t *e;
77                         e = lookup_edge(c->node, mesh->self);
78
79                         if(e) {
80                                 send_del_edge(mesh, mesh->everyone, e, 0);
81                                 edge_del(mesh, e);
82                         }
83                 }
84         }
85
86         outgoing_t *outgoing = c->outgoing;
87         connection_del(mesh, c);
88
89         /* Check if this was our outgoing connection */
90
91         if(outgoing) {
92                 do_outgoing_connection(mesh, outgoing);
93         }
94
95 #ifndef HAVE_MINGW
96         /* Clean up dead proxy processes */
97
98         while(waitpid(-1, NULL, WNOHANG) > 0);
99
100 #endif
101 }
102
103 /*
104   Check if the other end is active.
105   If we have sent packets, but didn't receive any,
106   then possibly the other end is dead. We send a
107   PING request over the meta connection. If the other
108   end does not reply in time, we consider them dead
109   and close the connection.
110 */
111 static void timeout_handler(event_loop_t *loop, void *data) {
112         assert(data);
113
114         meshlink_handle_t *mesh = loop->data;
115         logger(mesh, MESHLINK_DEBUG, "timeout_handler()");
116
117         for list_each(connection_t, c, mesh->connections) {
118                 int pingtimeout = c->node ? mesh->dev_class_traits[c->node->devclass].pingtimeout : default_timeout;
119                 int pinginterval = c->node ? mesh->dev_class_traits[c->node->devclass].pinginterval : default_interval;
120
121                 if(c->outgoing && c->outgoing->timeout < 5) {
122                         pingtimeout = 1;
123                 }
124
125                 // Also make sure that if outstanding key requests for the UDP counterpart of a connection has timed out, we restart it.
126                 if(c->node) {
127                         if(c->node->status.waitingforkey && c->node->last_req_key + pingtimeout <= mesh->loop.now.tv_sec) {
128                                 send_req_key(mesh, c->node);
129                         }
130                 }
131
132                 if(c->last_ping_time + pingtimeout <= mesh->loop.now.tv_sec) {
133                         if(c->status.active) {
134                                 if(c->status.pinged) {
135                                         logger(mesh, MESHLINK_INFO, "%s didn't respond to PING in %ld seconds", c->name, (long)mesh->loop.now.tv_sec - c->last_ping_time);
136                                 } else if(c->last_ping_time + pinginterval <= mesh->loop.now.tv_sec) {
137                                         send_ping(mesh, c);
138                                         continue;
139                                 } else {
140                                         continue;
141                                 }
142                         } else {
143                                 if(c->status.connecting) {
144                                         logger(mesh, MESHLINK_WARNING, "Timeout while connecting to %s", c->name);
145                                 } else {
146                                         logger(mesh, MESHLINK_WARNING, "Timeout from %s during authentication", c->name);
147                                 }
148                         }
149
150                         terminate_connection(mesh, c, c->status.active);
151                 }
152         }
153
154         timeout_set(&mesh->loop, data, &(struct timeval) {
155                 1, prng(mesh, TIMER_FUDGE)
156         });
157 }
158
159 // devclass asc, last_successfull_connection desc
160 static int node_compare_devclass_asc_lsc_desc(const void *a, const void *b) {
161         const node_t *na = a, *nb = b;
162
163         if(na->devclass < nb->devclass) {
164                 return -1;
165         }
166
167         if(na->devclass > nb->devclass) {
168                 return 1;
169         }
170
171         if(na->last_successfull_connection == nb->last_successfull_connection) {
172                 return 0;
173         }
174
175         if(na->last_successfull_connection == 0 || na->last_successfull_connection > nb->last_successfull_connection) {
176                 return -1;
177         }
178
179         if(nb->last_successfull_connection == 0 || na->last_successfull_connection < nb->last_successfull_connection) {
180                 return 1;
181         }
182
183         if(na < nb) {
184                 return -1;
185         }
186
187         if(na > nb) {
188                 return 1;
189         }
190
191         return 0;
192 }
193
194 // last_successfull_connection desc
195 static int node_compare_lsc_desc(const void *a, const void *b) {
196         const node_t *na = a, *nb = b;
197
198         if(na->last_successfull_connection == nb->last_successfull_connection) {
199                 return 0;
200         }
201
202         if(na->last_successfull_connection == 0 || na->last_successfull_connection > nb->last_successfull_connection) {
203                 return -1;
204         }
205
206         if(nb->last_successfull_connection == 0 || na->last_successfull_connection < nb->last_successfull_connection) {
207                 return 1;
208         }
209
210         if(na < nb) {
211                 return -1;
212         }
213
214         if(na > nb) {
215                 return 1;
216         }
217
218         return 0;
219 }
220
221 // devclass desc
222 static int node_compare_devclass_desc(const void *a, const void *b) {
223         const node_t *na = a, *nb = b;
224
225         if(na->devclass < nb->devclass) {
226                 return -1;
227         }
228
229         if(na->devclass > nb->devclass) {
230                 return 1;
231         }
232
233         if(na < nb) {
234                 return -1;
235         }
236
237         if(na > nb) {
238                 return 1;
239         }
240
241         return 0;
242 }
243
244
245 /*
246
247 autoconnect()
248 {
249         timeout = 5
250
251         // find the best one for initial connect
252
253         if cur < min
254                 newcon =
255                         first from nodes
256                                 where dclass <= my.dclass and !connection and (timestamp - last_retry) > retry_timeout
257                                 order by dclass asc, last_connection desc
258                 if newcon
259                         timeout = 0
260                         goto connect
261
262
263         // find better nodes to connect to: in case we have less than min connections within [BACKBONE, i] and there are nodes which we are not connected to within the range
264
265         if min <= cur < max
266                 j = 0
267                 for i = BACKBONE to my.dclass
268                         j += count(from connections where node.dclass = i)
269                         if j < min
270                                 newcon =
271                                         first from nodes
272                                                 where dclass = i and !connection and (timestamp - last_retry) > retry_timeout
273                                                 order by last_connection desc
274                                 if newcon
275                                         goto connect
276                         else
277                                 break
278
279
280         // heal partitions
281
282         if min <= cur < max
283                 newcon =
284                         first from nodes
285                                 where dclass <= my.dclass and !reachable and (timestamp - last_retry) > retry_timeout
286                                 order by dclass asc, last_connection desc
287                 if newcon
288                         goto connect
289
290
291         // connect
292
293 connect:
294         if newcon
295                 connect newcon
296
297
298         // disconnect outgoing connections in case we have more than min connections within [BACKBONE, i] and there are nodes which we are connected to within the range [i, PORTABLE]
299
300         if min < cur <= max
301                 j = 0
302                 for i = BACKBONE to my.dclass
303                         j += count(from connections where node.dclass = i)
304                         if min < j
305                                 delcon =
306                                         first from nodes
307                                                 where dclass >= i and outgoing_connection
308                                                 order by dclass desc
309                                 if disconnect
310                                         goto disconnect
311                                 else
312                                         break
313
314
315         // disconnect connections in case we have more than enough connections
316
317         if max < cur
318                 delcon =
319                         first from nodes
320                                 where outgoing_connection
321                                 order by dclass desc
322                 goto disconnect
323
324         // disconnect
325
326 disconnect
327         if delcon
328                 disconnect delcon
329
330
331         // next iteration
332         next (timeout, autoconnect)
333
334 }
335
336 */
337
338
339 static void periodic_handler(event_loop_t *loop, void *data) {
340         meshlink_handle_t *mesh = loop->data;
341
342         /* Check if there are too many contradicting ADD_EDGE and DEL_EDGE messages.
343            This usually only happens when another node has the same Name as this node.
344            If so, sleep for a short while to prevent a storm of contradicting messages.
345         */
346
347         if(mesh->contradicting_del_edge > 100 && mesh->contradicting_add_edge > 100) {
348                 logger(mesh, MESHLINK_WARNING, "Possible node with same Name as us! Sleeping %d seconds.", mesh->sleeptime);
349                 usleep(mesh->sleeptime * 1000000LL);
350                 mesh->sleeptime *= 2;
351
352                 if(mesh->sleeptime < 0) {
353                         mesh->sleeptime = 3600;
354                 }
355         } else {
356                 mesh->sleeptime /= 2;
357
358                 if(mesh->sleeptime < 10) {
359                         mesh->sleeptime = 10;
360                 }
361         }
362
363         mesh->contradicting_add_edge = 0;
364         mesh->contradicting_del_edge = 0;
365
366         int timeout = default_timeout;
367
368         /* Check if we need to make or break connections. */
369
370         if(mesh->nodes->count > 1) {
371
372                 logger(mesh, MESHLINK_DEBUG, "--- autoconnect begin ---");
373
374                 int retry_timeout = min(mesh->nodes->count * default_timeout, 60);
375
376                 logger(mesh, MESHLINK_DEBUG, "* devclass = %d", mesh->devclass);
377                 logger(mesh, MESHLINK_DEBUG, "* nodes = %d", mesh->nodes->count);
378                 logger(mesh, MESHLINK_DEBUG, "* retry_timeout = %d", retry_timeout);
379
380
381                 // connect disconnect nodes
382
383                 node_t *connect_to = NULL;
384                 node_t *disconnect_from = NULL;
385
386
387                 // get cur_connects
388
389                 unsigned int cur_connects = 0;
390
391                 for list_each(connection_t, c, mesh->connections) {
392                         if(c->status.active) {
393                                 cur_connects += 1;
394                         }
395                 }
396
397                 logger(mesh, MESHLINK_DEBUG, "* cur_connects = %d", cur_connects);
398                 logger(mesh, MESHLINK_DEBUG, "* outgoings = %d", mesh->outgoings->count);
399
400                 // get min_connects and max_connects
401
402                 unsigned int min_connects = mesh->dev_class_traits[mesh->devclass].min_connects;
403                 unsigned int max_connects = mesh->dev_class_traits[mesh->devclass].max_connects;
404
405                 logger(mesh, MESHLINK_DEBUG, "* min_connects = %d", min_connects);
406                 logger(mesh, MESHLINK_DEBUG, "* max_connects = %d", max_connects);
407
408                 // find the best one for initial connect
409
410                 if(cur_connects < min_connects) {
411                         splay_tree_t *nodes = splay_alloc_tree(node_compare_devclass_asc_lsc_desc, NULL);
412
413                         for splay_each(node_t, n, mesh->nodes) {
414                                 logger(mesh, MESHLINK_DEBUG, "* %s->devclass = %d", n->name, n->devclass);
415
416                                 if(n != mesh->self && n->devclass <= mesh->devclass && !n->connection && !n->status.blacklisted && (n->last_connect_try == 0 || (mesh->loop.now.tv_sec - n->last_connect_try) > retry_timeout)) {
417                                         splay_insert(nodes, n);
418                                 }
419                         }
420
421                         if(nodes->head) {
422                                 //timeout = 0;
423                                 connect_to = (node_t *)nodes->head->data;
424
425                                 logger(mesh, MESHLINK_DEBUG, "* found best one for initial connect: %s", connect_to->name);
426                         } else {
427                                 logger(mesh, MESHLINK_DEBUG, "* could not find node for initial connect");
428                         }
429
430                         splay_delete_tree(nodes);
431                 }
432
433
434                 // find better nodes to connect to
435
436                 if(!connect_to && min_connects <= cur_connects && cur_connects < max_connects) {
437                         unsigned int connects = 0;
438
439                         for(dev_class_t devclass = 0; devclass <= mesh->devclass; ++devclass) {
440                                 for list_each(connection_t, c, mesh->connections) {
441                                         if(c->status.active && c->node && c->node->devclass == devclass) {
442                                                 connects += 1;
443                                         }
444                                 }
445
446                                 if(connects < min_connects) {
447                                         splay_tree_t *nodes = splay_alloc_tree(node_compare_lsc_desc, NULL);
448
449                                         for splay_each(node_t, n, mesh->nodes) {
450                                                 if(n != mesh->self && n->devclass == devclass && !n->connection && !n->status.blacklisted && (n->last_connect_try == 0 || (mesh->loop.now.tv_sec - n->last_connect_try) > retry_timeout)) {
451                                                         splay_insert(nodes, n);
452                                                 }
453                                         }
454
455                                         if(nodes->head) {
456                                                 logger(mesh, MESHLINK_DEBUG, "* found better node");
457                                                 connect_to = (node_t *)nodes->head->data;
458
459                                                 splay_delete_tree(nodes);
460                                                 break;
461                                         }
462
463                                         splay_delete_tree(nodes);
464                                 } else {
465                                         break;
466                                 }
467                         }
468
469                         if(!connect_to) {
470                                 logger(mesh, MESHLINK_DEBUG, "* could not find better nodes");
471                         }
472                 }
473
474
475                 // heal partitions
476
477                 if(!connect_to && min_connects <= cur_connects && cur_connects < max_connects) {
478                         splay_tree_t *nodes = splay_alloc_tree(node_compare_devclass_asc_lsc_desc, NULL);
479
480                         for splay_each(node_t, n, mesh->nodes) {
481                                 if(n != mesh->self && n->devclass <= mesh->devclass && !n->status.reachable && !n->status.blacklisted && (n->last_connect_try == 0 || (mesh->loop.now.tv_sec - n->last_connect_try) > retry_timeout)) {
482                                         splay_insert(nodes, n);
483                                 }
484                         }
485
486                         if(nodes->head) {
487                                 logger(mesh, MESHLINK_DEBUG, "* try to heal partition");
488                                 connect_to = (node_t *)nodes->head->data;
489                         } else {
490                                 logger(mesh, MESHLINK_DEBUG, "* could not find nodes for partition healing");
491                         }
492
493                         splay_delete_tree(nodes);
494                 }
495
496
497                 // perform connect
498
499                 if(connect_to && !connect_to->connection) {
500                         connect_to->last_connect_try = mesh->loop.now.tv_sec;
501                         logger(mesh, MESHLINK_DEBUG, "Autoconnect trying to connect to %s", connect_to->name);
502
503                         /* check if there is already a connection attempt to this node */
504                         bool skip = false;
505
506                         for list_each(outgoing_t, outgoing, mesh->outgoings) {
507                                 if(outgoing->node == connect_to) {
508                                         logger(mesh, MESHLINK_DEBUG, "* skip autoconnect since it is an outgoing connection already");
509                                         skip = true;
510                                         break;
511                                 }
512                         }
513
514                         if(!connect_to->status.reachable && !node_read_public_key(mesh, connect_to)) {
515                                 logger(mesh, MESHLINK_DEBUG, "* skip autoconnect since we don't know this node's public key");
516                                 skip = true;
517                         }
518
519                         if(!skip) {
520                                 logger(mesh, MESHLINK_DEBUG, "Autoconnecting to %s", connect_to->name);
521                                 outgoing_t *outgoing = xzalloc(sizeof(outgoing_t));
522                                 outgoing->node = connect_to;
523                                 list_insert_tail(mesh->outgoings, outgoing);
524                                 setup_outgoing_connection(mesh, outgoing);
525                         }
526                 }
527
528
529                 // disconnect suboptimal outgoing connections
530
531                 if(min_connects < cur_connects /*&& cur_connects <= max_connects*/) {
532                         unsigned int connects = 0;
533
534                         for(dev_class_t devclass = 0; devclass <= mesh->devclass; ++devclass) {
535                                 for list_each(connection_t, c, mesh->connections) {
536                                         if(c->status.active && c->node && c->node->devclass == devclass) {
537                                                 connects += 1;
538                                         }
539                                 }
540
541                                 if(min_connects < connects) {
542                                         splay_tree_t *nodes = splay_alloc_tree(node_compare_devclass_desc, NULL);
543
544                                         for list_each(connection_t, c, mesh->connections) {
545                                                 if(c->outgoing && c->node && c->node->devclass >= devclass) {
546                                                         splay_insert(nodes, c->node);
547                                                 }
548                                         }
549
550                                         if(nodes->head) {
551                                                 logger(mesh, MESHLINK_DEBUG, "* disconnect suboptimal outgoing connection");
552                                                 disconnect_from = (node_t *)nodes->head->data;
553                                         }
554
555                                         splay_delete_tree(nodes);
556                                         break;
557                                 }
558                         }
559
560                         if(!disconnect_from) {
561                                 logger(mesh, MESHLINK_DEBUG, "* no suboptimal outgoing connections");
562                         }
563                 }
564
565
566                 // disconnect connections (too many connections)
567
568                 if(!disconnect_from && max_connects < cur_connects) {
569                         splay_tree_t *nodes = splay_alloc_tree(node_compare_devclass_desc, NULL);
570
571                         for list_each(connection_t, c, mesh->connections) {
572                                 if(c->status.active && c->node) {
573                                         splay_insert(nodes, c->node);
574                                 }
575                         }
576
577                         if(nodes->head) {
578                                 logger(mesh, MESHLINK_DEBUG, "* disconnect connection (too many connections)");
579
580                                 //timeout = 0;
581                                 disconnect_from = (node_t *)nodes->head->data;
582                         } else {
583                                 logger(mesh, MESHLINK_DEBUG, "* no node we want to disconnect, even though we have too many connections");
584                         }
585
586                         splay_delete_tree(nodes);
587                 }
588
589
590                 // perform disconnect
591
592                 if(disconnect_from && disconnect_from->connection) {
593                         logger(mesh, MESHLINK_DEBUG, "Autodisconnecting from %s", disconnect_from->connection->name);
594                         list_delete(mesh->outgoings, disconnect_from->connection->outgoing);
595                         disconnect_from->connection->outgoing = NULL;
596                         terminate_connection(mesh, disconnect_from->connection, disconnect_from->connection->status.active);
597                 }
598
599                 // reduce timeout if we don't have enough connections + outgoings
600                 if(cur_connects + mesh->outgoings->count < 3) {
601                         timeout = 1;
602                 }
603
604                 // done!
605
606                 logger(mesh, MESHLINK_DEBUG, "--- autoconnect end ---");
607         }
608
609         for splay_each(node_t, n, mesh->nodes) {
610                 if(n->status.dirty) {
611                         if(node_write_config(mesh, n)) {
612                                 logger(mesh, MESHLINK_DEBUG, "Could not update %s", n->name);
613                         }
614
615                         n->status.dirty = false;
616                 }
617         }
618
619         timeout_set(&mesh->loop, data, &(struct timeval) {
620                 timeout, prng(mesh, TIMER_FUDGE)
621         });
622 }
623
624 void handle_meta_connection_data(meshlink_handle_t *mesh, connection_t *c) {
625         if(!receive_meta(mesh, c)) {
626                 terminate_connection(mesh, c, c->status.active);
627                 return;
628         }
629 }
630
631 void retry(meshlink_handle_t *mesh) {
632         /* Reset the reconnection timers for all outgoing connections */
633         for list_each(outgoing_t, outgoing, mesh->outgoings) {
634                 outgoing->timeout = 0;
635
636                 if(outgoing->ev.cb)
637                         timeout_set(&mesh->loop, &outgoing->ev, &(struct timeval) {
638                         0, 0
639                 });
640         }
641
642 #ifdef HAVE_IFADDRS_H
643         struct ifaddrs *ifa = NULL;
644         getifaddrs(&ifa);
645 #endif
646
647         /* For active connections, check if their addresses are still valid.
648          * If yes, reset their ping timers, otherwise terminate them. */
649         for list_each(connection_t, c, mesh->connections) {
650                 if(!c->status.active) {
651                         continue;
652                 }
653
654                 if(!c->status.pinged) {
655                         c->last_ping_time = 0;
656                 }
657
658 #ifdef HAVE_IFADDRS_H
659
660                 if(!ifa) {
661                         continue;
662                 }
663
664                 sockaddr_t sa;
665                 socklen_t salen = sizeof(sa);
666
667                 if(getsockname(c->socket, &sa.sa, &salen)) {
668                         continue;
669                 }
670
671                 bool found = false;
672
673                 for(struct ifaddrs *ifap = ifa; ifap; ifap = ifap->ifa_next) {
674                         if(ifap->ifa_addr && !sockaddrcmp_noport(&sa, (sockaddr_t *)ifap->ifa_addr)) {
675                                 found = true;
676                                 break;
677                         }
678
679                 }
680
681                 if(!found) {
682                         logger(mesh, MESHLINK_DEBUG, "Local address for connection to %s no longer valid, terminating", c->name);
683                         terminate_connection(mesh, c, c->status.active);
684                 }
685
686 #endif
687         }
688
689 #ifdef HAVE_IFADDRS_H
690
691         if(ifa) {
692                 freeifaddrs(ifa);
693         }
694
695 #endif
696
697         /* Kick the ping timeout handler */
698         timeout_set(&mesh->loop, &mesh->pingtimer, &(struct timeval) {
699                 0, 0
700         });
701 }
702
703 /*
704   this is where it all happens...
705 */
706 void main_loop(meshlink_handle_t *mesh) {
707         timeout_add(&mesh->loop, &mesh->pingtimer, timeout_handler, &mesh->pingtimer, &(struct timeval) {
708                 1, prng(mesh, TIMER_FUDGE)
709         });
710         timeout_add(&mesh->loop, &mesh->periodictimer, periodic_handler, &mesh->periodictimer, &(struct timeval) {
711                 0, 0
712         });
713
714         //Add signal handler
715         mesh->datafromapp.signum = 0;
716         signal_add(&mesh->loop, &mesh->datafromapp, meshlink_send_from_queue, mesh, mesh->datafromapp.signum);
717
718         if(!event_loop_run(&mesh->loop, &mesh->mutex)) {
719                 logger(mesh, MESHLINK_ERROR, "Error while waiting for input: %s", strerror(errno));
720                 call_error_cb(mesh, MESHLINK_ENETWORK);
721         }
722
723         signal_del(&mesh->loop, &mesh->datafromapp);
724         timeout_del(&mesh->loop, &mesh->periodictimer);
725         timeout_del(&mesh->loop, &mesh->pingtimer);
726 }