]> git.meshlink.io Git - meshlink/blob - src/net.c
Make ping intervals and timeouts configurable for each device class.
[meshlink] / src / net.c
1 /*
2     net.c -- most of the network code
3     Copyright (C) 2014-2017 Guus Sliepen <guus@meshlink.io>
4
5     This program is free software; you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation; either version 2 of the License, or
8     (at your option) any later version.
9
10     This program is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14
15     You should have received a copy of the GNU General Public License along
16     with this program; if not, write to the Free Software Foundation, Inc.,
17     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #include "system.h"
21
22 #include "utils.h"
23 #include "conf.h"
24 #include "connection.h"
25 #include "graph.h"
26 #include "logger.h"
27 #include "meshlink_internal.h"
28 #include "meta.h"
29 #include "net.h"
30 #include "netutl.h"
31 #include "protocol.h"
32 #include "xalloc.h"
33
34 #include <assert.h>
35
36 #if !defined(min)
37 static inline int min(int a, int b) {
38         return a < b ? a : b;
39 }
40 #endif
41
42 static const int default_timeout = 5;
43
44 /*
45   Terminate a connection:
46   - Mark it as inactive
47   - Remove the edge representing this connection
48   - Kill it with fire
49   - Check if we need to retry making an outgoing connection
50 */
51 void terminate_connection(meshlink_handle_t *mesh, connection_t *c, bool report) {
52         logger(mesh, MESHLINK_INFO, "Closing connection with %s", c->name);
53
54         c->status.active = false;
55
56         if(c->node && c->node->connection == c) {
57                 c->node->connection = NULL;
58         }
59
60         if(c->edge) {
61                 if(report) {
62                         send_del_edge(mesh, mesh->everyone, c->edge, 0);
63                 }
64
65                 edge_del(mesh, c->edge);
66                 c->edge = NULL;
67
68                 /* Run MST and SSSP algorithms */
69
70                 graph(mesh);
71
72                 /* If the node is not reachable anymore but we remember it had an edge to us, clean it up */
73
74                 if(report && c->node && !c->node->status.reachable) {
75                         edge_t *e;
76                         e = lookup_edge(c->node, mesh->self);
77
78                         if(e) {
79                                 send_del_edge(mesh, mesh->everyone, e, 0);
80                                 edge_del(mesh, e);
81                         }
82                 }
83         }
84
85         outgoing_t *outgoing = c->outgoing;
86         connection_del(mesh, c);
87
88         /* Check if this was our outgoing connection */
89
90         if(outgoing) {
91                 do_outgoing_connection(mesh, outgoing);
92         }
93
94 #ifndef HAVE_MINGW
95         /* Clean up dead proxy processes */
96
97         while(waitpid(-1, NULL, WNOHANG) > 0);
98
99 #endif
100 }
101
102 /*
103   Check if the other end is active.
104   If we have sent packets, but didn't receive any,
105   then possibly the other end is dead. We send a
106   PING request over the meta connection. If the other
107   end does not reply in time, we consider them dead
108   and close the connection.
109 */
110 static void timeout_handler(event_loop_t *loop, void *data) {
111         meshlink_handle_t *mesh = loop->data;
112         logger(mesh, MESHLINK_DEBUG, "timeout_handler()");
113
114         for list_each(connection_t, c, mesh->connections) {
115                 int pingtimeout = c->node ? mesh->dev_class_traits[c->node->devclass].pingtimeout : default_timeout;
116
117                 // Also make sure that if outstanding key requests for the UDP counterpart of a connection has timed out, we restart it.
118                 if(c->node) {
119                         if(c->node->status.waitingforkey && c->node->last_req_key + pingtimeout <= mesh->loop.now.tv_sec) {
120                                 send_req_key(mesh, c->node);
121                         }
122                 }
123
124                 if(c->last_ping_time + pingtimeout <= mesh->loop.now.tv_sec) {
125                         if(c->status.active) {
126                                 if(c->status.pinged) {
127                                         logger(mesh, MESHLINK_INFO, "%s didn't respond to PING in %ld seconds", c->name, (long)mesh->loop.now.tv_sec - c->last_ping_time);
128                                 } else if(c->last_ping_time + mesh->dev_class_traits[c->node->devclass].pinginterval <= mesh->loop.now.tv_sec) {
129                                         send_ping(mesh, c);
130                                         continue;
131                                 } else {
132                                         continue;
133                                 }
134                         } else {
135                                 if(c->status.connecting) {
136                                         logger(mesh, MESHLINK_WARNING, "Timeout while connecting to %s", c->name);
137                                 } else {
138                                         logger(mesh, MESHLINK_WARNING, "Timeout from %s during authentication", c->name);
139                                 }
140                         }
141
142                         terminate_connection(mesh, c, c->status.active);
143                 }
144         }
145
146         timeout_set(&mesh->loop, data, &(struct timeval) {
147                 default_timeout, rand() % 100000
148         });
149 }
150
151 // devclass asc, last_successfull_connection desc
152 static int node_compare_devclass_asc_lsc_desc(const void *a, const void *b) {
153         const node_t *na = a, *nb = b;
154
155         if(na->devclass < nb->devclass) {
156                 return -1;
157         }
158
159         if(na->devclass > nb->devclass) {
160                 return 1;
161         }
162
163         if(na->last_successfull_connection == nb->last_successfull_connection) {
164                 return 0;
165         }
166
167         if(na->last_successfull_connection == 0 || na->last_successfull_connection > nb->last_successfull_connection) {
168                 return -1;
169         }
170
171         if(nb->last_successfull_connection == 0 || na->last_successfull_connection < nb->last_successfull_connection) {
172                 return 1;
173         }
174
175         if(na < nb) {
176                 return -1;
177         }
178
179         if(na > nb) {
180                 return 1;
181         }
182
183         return 0;
184 }
185
186 // last_successfull_connection desc
187 static int node_compare_lsc_desc(const void *a, const void *b) {
188         const node_t *na = a, *nb = b;
189
190         if(na->last_successfull_connection == nb->last_successfull_connection) {
191                 return 0;
192         }
193
194         if(na->last_successfull_connection == 0 || na->last_successfull_connection > nb->last_successfull_connection) {
195                 return -1;
196         }
197
198         if(nb->last_successfull_connection == 0 || na->last_successfull_connection < nb->last_successfull_connection) {
199                 return 1;
200         }
201
202         if(na < nb) {
203                 return -1;
204         }
205
206         if(na > nb) {
207                 return 1;
208         }
209
210         return 0;
211 }
212
213 // devclass desc
214 static int node_compare_devclass_desc(const void *a, const void *b) {
215         const node_t *na = a, *nb = b;
216
217         if(na->devclass < nb->devclass) {
218                 return -1;
219         }
220
221         if(na->devclass > nb->devclass) {
222                 return 1;
223         }
224
225         if(na < nb) {
226                 return -1;
227         }
228
229         if(na > nb) {
230                 return 1;
231         }
232
233         return 0;
234 }
235
236
237 /*
238
239 autoconnect()
240 {
241         timeout = 5
242
243         // find the best one for initial connect
244
245         if cur < min
246                 newcon =
247                         first from nodes
248                                 where dclass <= my.dclass and !connection and (timestamp - last_retry) > retry_timeout
249                                 order by dclass asc, last_connection desc
250                 if newcon
251                         timeout = 0
252                         goto connect
253
254
255         // find better nodes to connect to: in case we have less than min connections within [BACKBONE, i] and there are nodes which we are not connected to within the range
256
257         if min <= cur < max
258                 j = 0
259                 for i = BACKBONE to my.dclass
260                         j += count(from connections where node.dclass = i)
261                         if j < min
262                                 newcon =
263                                         first from nodes
264                                                 where dclass = i and !connection and (timestamp - last_retry) > retry_timeout
265                                                 order by last_connection desc
266                                 if newcon
267                                         goto connect
268                         else
269                                 break
270
271
272         // heal partitions
273
274         if min <= cur < max
275                 newcon =
276                         first from nodes
277                                 where dclass <= my.dclass and !reachable and (timestamp - last_retry) > retry_timeout
278                                 order by dclass asc, last_connection desc
279                 if newcon
280                         goto connect
281
282
283         // connect
284
285 connect:
286         if newcon
287                 connect newcon
288
289
290         // disconnect outgoing connections in case we have more than min connections within [BACKBONE, i] and there are nodes which we are connected to within the range [i, PORTABLE]
291
292         if min < cur <= max
293                 j = 0
294                 for i = BACKBONE to my.dclass
295                         j += count(from connections where node.dclass = i)
296                         if min < j
297                                 delcon =
298                                         first from nodes
299                                                 where dclass >= i and outgoing_connection
300                                                 order by dclass desc
301                                 if disconnect
302                                         goto disconnect
303                                 else
304                                         break
305
306
307         // disconnect connections in case we have more than enough connections
308
309         if max < cur
310                 delcon =
311                         first from nodes
312                                 where outgoing_connection
313                                 order by dclass desc
314                 goto disconnect
315
316         // disconnect
317
318 disconnect
319         if delcon
320                 disconnect delcon
321
322
323         // next iteration
324         next (timeout, autoconnect)
325
326 }
327
328 */
329
330
331 static void periodic_handler(event_loop_t *loop, void *data) {
332         meshlink_handle_t *mesh = loop->data;
333
334         /* Check if there are too many contradicting ADD_EDGE and DEL_EDGE messages.
335            This usually only happens when another node has the same Name as this node.
336            If so, sleep for a short while to prevent a storm of contradicting messages.
337         */
338
339         if(mesh->contradicting_del_edge > 100 && mesh->contradicting_add_edge > 100) {
340                 logger(mesh, MESHLINK_WARNING, "Possible node with same Name as us! Sleeping %d seconds.", mesh->sleeptime);
341                 usleep(mesh->sleeptime * 1000000LL);
342                 mesh->sleeptime *= 2;
343
344                 if(mesh->sleeptime < 0) {
345                         mesh->sleeptime = 3600;
346                 }
347         } else {
348                 mesh->sleeptime /= 2;
349
350                 if(mesh->sleeptime < 10) {
351                         mesh->sleeptime = 10;
352                 }
353         }
354
355         mesh->contradicting_add_edge = 0;
356         mesh->contradicting_del_edge = 0;
357
358         int timeout = default_timeout;
359
360         /* Check if we need to make or break connections. */
361
362         if(mesh->nodes->count > 1) {
363
364                 logger(mesh, MESHLINK_DEBUG, "--- autoconnect begin ---");
365
366                 int retry_timeout = min(mesh->nodes->count * default_timeout, 60);
367
368                 logger(mesh, MESHLINK_DEBUG, "* devclass = %d", mesh->devclass);
369                 logger(mesh, MESHLINK_DEBUG, "* nodes = %d", mesh->nodes->count);
370                 logger(mesh, MESHLINK_DEBUG, "* retry_timeout = %d", retry_timeout);
371
372
373                 // connect disconnect nodes
374
375                 node_t *connect_to = NULL;
376                 node_t *disconnect_from = NULL;
377
378
379                 // get cur_connects
380
381                 unsigned int cur_connects = 0;
382
383                 for list_each(connection_t, c, mesh->connections) {
384                         if(c->status.active) {
385                                 cur_connects += 1;
386                         }
387                 }
388
389                 logger(mesh, MESHLINK_DEBUG, "* cur_connects = %d", cur_connects);
390                 logger(mesh, MESHLINK_DEBUG, "* outgoings = %d", mesh->outgoings->count);
391
392                 // get min_connects and max_connects
393
394                 unsigned int min_connects = mesh->dev_class_traits[mesh->devclass].min_connects;
395                 unsigned int max_connects = mesh->dev_class_traits[mesh->devclass].max_connects;
396
397                 logger(mesh, MESHLINK_DEBUG, "* min_connects = %d", min_connects);
398                 logger(mesh, MESHLINK_DEBUG, "* max_connects = %d", max_connects);
399
400                 // find the best one for initial connect
401
402                 if(cur_connects < min_connects) {
403                         splay_tree_t *nodes = splay_alloc_tree(node_compare_devclass_asc_lsc_desc, NULL);
404
405                         for splay_each(node_t, n, mesh->nodes) {
406                                 logger(mesh, MESHLINK_DEBUG, "* %s->devclass = %d", n->name, n->devclass);
407
408                                 if(n != mesh->self && n->devclass <= mesh->devclass && !n->connection && !n->status.blacklisted && (n->last_connect_try == 0 || (mesh->loop.now.tv_sec - n->last_connect_try) > retry_timeout)) {
409                                         splay_insert(nodes, n);
410                                 }
411                         }
412
413                         if(nodes->head) {
414                                 //timeout = 0;
415                                 connect_to = (node_t *)nodes->head->data;
416
417                                 logger(mesh, MESHLINK_DEBUG, "* found best one for initial connect: %s", connect_to->name);
418                         } else {
419                                 logger(mesh, MESHLINK_DEBUG, "* could not find node for initial connect");
420                         }
421
422                         splay_free_tree(nodes);
423                 }
424
425
426                 // find better nodes to connect to
427
428                 if(!connect_to && min_connects <= cur_connects && cur_connects < max_connects) {
429                         unsigned int connects = 0;
430
431                         for(dev_class_t devclass = 0; devclass <= mesh->devclass; ++devclass) {
432                                 for list_each(connection_t, c, mesh->connections) {
433                                         if(c->status.active && c->node && c->node->devclass == devclass) {
434                                                 connects += 1;
435                                         }
436                                 }
437
438                                 if(connects < min_connects) {
439                                         splay_tree_t *nodes = splay_alloc_tree(node_compare_lsc_desc, NULL);
440
441                                         for splay_each(node_t, n, mesh->nodes) {
442                                                 if(n != mesh->self && n->devclass == devclass && !n->connection && !n->status.blacklisted && (n->last_connect_try == 0 || (mesh->loop.now.tv_sec - n->last_connect_try) > retry_timeout)) {
443                                                         splay_insert(nodes, n);
444                                                 }
445                                         }
446
447                                         if(nodes->head) {
448                                                 logger(mesh, MESHLINK_DEBUG, "* found better node");
449                                                 connect_to = (node_t *)nodes->head->data;
450
451                                                 splay_free_tree(nodes);
452                                                 break;
453                                         }
454
455                                         splay_free_tree(nodes);
456                                 } else {
457                                         break;
458                                 }
459                         }
460
461                         if(!connect_to) {
462                                 logger(mesh, MESHLINK_DEBUG, "* could not find better nodes");
463                         }
464                 }
465
466
467                 // heal partitions
468
469                 if(!connect_to && min_connects <= cur_connects && cur_connects < max_connects) {
470                         splay_tree_t *nodes = splay_alloc_tree(node_compare_devclass_asc_lsc_desc, NULL);
471
472                         for splay_each(node_t, n, mesh->nodes) {
473                                 if(n != mesh->self && n->devclass <= mesh->devclass && !n->status.reachable && !n->status.blacklisted && (n->last_connect_try == 0 || (mesh->loop.now.tv_sec - n->last_connect_try) > retry_timeout)) {
474                                         splay_insert(nodes, n);
475                                 }
476                         }
477
478                         if(nodes->head) {
479                                 logger(mesh, MESHLINK_DEBUG, "* try to heal partition");
480                                 connect_to = (node_t *)nodes->head->data;
481                         } else {
482                                 logger(mesh, MESHLINK_DEBUG, "* could not find nodes for partition healing");
483                         }
484
485                         splay_free_tree(nodes);
486                 }
487
488
489                 // perform connect
490
491                 if(connect_to && !connect_to->connection) {
492                         connect_to->last_connect_try = mesh->loop.now.tv_sec;
493                         logger(mesh, MESHLINK_DEBUG, "Autoconnect trying to connect to %s", connect_to->name);
494
495                         /* check if there is already a connection attempt to this node */
496                         bool skip = false;
497
498                         for list_each(outgoing_t, outgoing, mesh->outgoings) {
499                                 if(outgoing->node == connect_to) {
500                                         logger(mesh, MESHLINK_DEBUG, "* skip autoconnect since it is an outgoing connection already");
501                                         skip = true;
502                                         break;
503                                 }
504                         }
505
506                         if(!connect_to->status.reachable && !node_read_public_key(mesh, connect_to)) {
507                                 logger(mesh, MESHLINK_DEBUG, "* skip autoconnect since we don't know this node's public key");
508                                 skip = true;
509                         }
510
511                         if(!skip) {
512                                 logger(mesh, MESHLINK_DEBUG, "Autoconnecting to %s", connect_to->name);
513                                 outgoing_t *outgoing = xzalloc(sizeof(outgoing_t));
514                                 outgoing->node = connect_to;
515                                 list_insert_tail(mesh->outgoings, outgoing);
516                                 setup_outgoing_connection(mesh, outgoing);
517                         }
518                 }
519
520
521                 // disconnect suboptimal outgoing connections
522
523                 if(min_connects < cur_connects /*&& cur_connects <= max_connects*/) {
524                         unsigned int connects = 0;
525
526                         for(dev_class_t devclass = 0; devclass <= mesh->devclass; ++devclass) {
527                                 for list_each(connection_t, c, mesh->connections) {
528                                         if(c->status.active && c->node && c->node->devclass == devclass) {
529                                                 connects += 1;
530                                         }
531                                 }
532
533                                 if(min_connects < connects) {
534                                         splay_tree_t *nodes = splay_alloc_tree(node_compare_devclass_desc, NULL);
535
536                                         for list_each(connection_t, c, mesh->connections) {
537                                                 if(c->outgoing && c->node && c->node->devclass >= devclass) {
538                                                         splay_insert(nodes, c->node);
539                                                 }
540                                         }
541
542                                         if(nodes->head) {
543                                                 logger(mesh, MESHLINK_DEBUG, "* disconnect suboptimal outgoing connection");
544                                                 disconnect_from = (node_t *)nodes->head->data;
545                                         }
546
547                                         splay_free_tree(nodes);
548                                         break;
549                                 }
550                         }
551
552                         if(!disconnect_from) {
553                                 logger(mesh, MESHLINK_DEBUG, "* no suboptimal outgoing connections");
554                         }
555                 }
556
557
558                 // disconnect connections (too many connections)
559
560                 if(!disconnect_from && max_connects < cur_connects) {
561                         splay_tree_t *nodes = splay_alloc_tree(node_compare_devclass_desc, NULL);
562
563                         for list_each(connection_t, c, mesh->connections) {
564                                 if(c->status.active && c->node) {
565                                         splay_insert(nodes, c->node);
566                                 }
567                         }
568
569                         if(nodes->head) {
570                                 logger(mesh, MESHLINK_DEBUG, "* disconnect connection (too many connections)");
571
572                                 //timeout = 0;
573                                 disconnect_from = (node_t *)nodes->head->data;
574                         } else {
575                                 logger(mesh, MESHLINK_DEBUG, "* no node we want to disconnect, even though we have too many connections");
576                         }
577
578                         splay_free_tree(nodes);
579                 }
580
581
582                 // perform disconnect
583
584                 if(disconnect_from && disconnect_from->connection) {
585                         logger(mesh, MESHLINK_DEBUG, "Autodisconnecting from %s", disconnect_from->connection->name);
586                         list_delete(mesh->outgoings, disconnect_from->connection->outgoing);
587                         disconnect_from->connection->outgoing = NULL;
588                         terminate_connection(mesh, disconnect_from->connection, disconnect_from->connection->status.active);
589                 }
590
591                 // reduce timeout if we don't have enough connections + outgoings
592                 if(cur_connects + mesh->outgoings->count < 3) {
593                         timeout = 1;
594                 }
595
596                 // done!
597
598                 logger(mesh, MESHLINK_DEBUG, "--- autoconnect end ---");
599         }
600
601         for splay_each(node_t, n, mesh->nodes) {
602                 if(n->status.dirty) {
603                         node_write_config(mesh, n);
604                         n->status.dirty = false;
605                 }
606         }
607
608         timeout_set(&mesh->loop, data, &(struct timeval) {
609                 timeout, rand() % 100000
610         });
611 }
612
613 void handle_meta_connection_data(meshlink_handle_t *mesh, connection_t *c) {
614         if(!receive_meta(mesh, c)) {
615                 terminate_connection(mesh, c, c->status.active);
616                 return;
617         }
618 }
619
620 void retry(meshlink_handle_t *mesh) {
621         /* Reset the reconnection timers for all outgoing connections */
622         for list_each(outgoing_t, outgoing, mesh->outgoings) {
623                 outgoing->timeout = 0;
624
625                 if(outgoing->ev.cb)
626                         timeout_set(&mesh->loop, &outgoing->ev, &(struct timeval) {
627                         0, 0
628                 });
629         }
630
631 #ifdef HAVE_IFADDRS_H
632         struct ifaddrs *ifa = NULL;
633         getifaddrs(&ifa);
634 #endif
635
636         /* For active connections, check if their addresses are still valid.
637          * If yes, reset their ping timers, otherwise terminate them. */
638         for list_each(connection_t, c, mesh->connections) {
639                 if(!c->status.active) {
640                         continue;
641                 }
642
643                 if(!c->status.pinged) {
644                         c->last_ping_time = 0;
645                 }
646
647 #ifdef HAVE_IFADDRS_H
648
649                 if(!ifa) {
650                         continue;
651                 }
652
653                 sockaddr_t sa;
654                 socklen_t salen = sizeof(sa);
655
656                 if(getsockname(c->socket, &sa.sa, &salen)) {
657                         continue;
658                 }
659
660                 bool found = false;
661
662                 for(struct ifaddrs *ifap = ifa; ifap; ifap = ifap->ifa_next) {
663                         if(ifap->ifa_addr && !sockaddrcmp_noport(&sa, (sockaddr_t *)ifap->ifa_addr)) {
664                                 found = true;
665                                 break;
666                         }
667
668                 }
669
670                 if(!found) {
671                         logger(mesh, MESHLINK_DEBUG, "Local address for connection to %s no longer valid, terminating", c->name);
672                         terminate_connection(mesh, c, c->status.active);
673                 }
674
675 #endif
676         }
677
678 #ifdef HAVE_IFADDRS_H
679
680         if(ifa) {
681                 freeifaddrs(ifa);
682         }
683
684 #endif
685
686         /* Kick the ping timeout handler */
687         timeout_set(&mesh->loop, &mesh->pingtimer, &(struct timeval) {
688                 0, 0
689         });
690 }
691
692 /*
693   this is where it all happens...
694 */
695 int main_loop(meshlink_handle_t *mesh) {
696         timeout_add(&mesh->loop, &mesh->pingtimer, timeout_handler, &mesh->pingtimer, &(struct timeval) {
697                 default_timeout, rand() % 100000
698         });
699         timeout_add(&mesh->loop, &mesh->periodictimer, periodic_handler, &mesh->periodictimer, &(struct timeval) {
700                 0, 0
701         });
702
703         //Add signal handler
704         mesh->datafromapp.signum = 0;
705         signal_add(&(mesh->loop), &(mesh->datafromapp), (signal_cb_t)meshlink_send_from_queue, mesh, mesh->datafromapp.signum);
706
707         if(!event_loop_run(&(mesh->loop), &(mesh->mesh_mutex))) {
708                 logger(mesh, MESHLINK_ERROR, "Error while waiting for input: %s", strerror(errno));
709                 abort();
710                 timeout_del(&mesh->loop, &mesh->periodictimer);
711                 timeout_del(&mesh->loop, &mesh->pingtimer);
712
713                 return 1;
714         }
715
716         timeout_del(&mesh->loop, &mesh->periodictimer);
717         timeout_del(&mesh->loop, &mesh->pingtimer);
718
719         return 0;
720 }