]> git.meshlink.io Git - meshlink/blob - src/net.c
d80df2fd84c3d6d728a79bac55301e60a43e6d1d
[meshlink] / src / net.c
1 /*
2     net.c -- most of the network code
3     Copyright (C) 2014-2017 Guus Sliepen <guus@meshlink.io>
4
5     This program is free software; you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation; either version 2 of the License, or
8     (at your option) any later version.
9
10     This program is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14
15     You should have received a copy of the GNU General Public License along
16     with this program; if not, write to the Free Software Foundation, Inc.,
17     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #include "system.h"
21
22 #include "utils.h"
23 #include "conf.h"
24 #include "connection.h"
25 #include "graph.h"
26 #include "logger.h"
27 #include "meshlink_internal.h"
28 #include "meta.h"
29 #include "net.h"
30 #include "netutl.h"
31 #include "protocol.h"
32 #include "sptps.h"
33 #include "xalloc.h"
34
35 #include <assert.h>
36
37 #if !defined(min)
38 static inline int min(int a, int b) {
39         return a < b ? a : b;
40 }
41 #endif
42
43 static const int default_timeout = 5;
44 static const int default_interval = 60;
45
46 /*
47   Terminate a connection:
48   - Mark it as inactive
49   - Remove the edge representing this connection
50   - Kill it with fire
51   - Check if we need to retry making an outgoing connection
52 */
53 void terminate_connection(meshlink_handle_t *mesh, connection_t *c, bool report) {
54         logger(mesh, MESHLINK_INFO, "Closing connection with %s", c->name);
55
56         c->status.active = false;
57
58         if(c->node && c->node->connection == c) {
59                 c->node->connection = NULL;
60         }
61
62         if(c->edge) {
63                 if(report) {
64                         send_del_edge(mesh, mesh->everyone, c->edge, 0);
65                 }
66
67                 edge_del(mesh, c->edge);
68                 c->edge = NULL;
69
70                 /* Run MST and SSSP algorithms */
71
72                 graph(mesh);
73
74                 /* If the node is not reachable anymore but we remember it had an edge to us, clean it up */
75
76                 if(report && c->node && !c->node->status.reachable) {
77                         edge_t *e;
78                         e = lookup_edge(c->node, mesh->self);
79
80                         if(e) {
81                                 send_del_edge(mesh, mesh->everyone, e, 0);
82                                 edge_del(mesh, e);
83                         }
84                 }
85         }
86
87         outgoing_t *outgoing = c->outgoing;
88         connection_del(mesh, c);
89
90         /* Check if this was our outgoing connection */
91
92         if(outgoing) {
93                 do_outgoing_connection(mesh, outgoing);
94         }
95 }
96
97 /*
98   Check if the other end is active.
99   If we have sent packets, but didn't receive any,
100   then possibly the other end is dead. We send a
101   PING request over the meta connection. If the other
102   end does not reply in time, we consider them dead
103   and close the connection.
104 */
105 static void timeout_handler(event_loop_t *loop, void *data) {
106         assert(data);
107
108         meshlink_handle_t *mesh = loop->data;
109         logger(mesh, MESHLINK_DEBUG, "timeout_handler()");
110
111         for list_each(connection_t, c, mesh->connections) {
112                 int pingtimeout = c->node ? mesh->dev_class_traits[c->node->devclass].pingtimeout : default_timeout;
113                 int pinginterval = c->node ? mesh->dev_class_traits[c->node->devclass].pinginterval : default_interval;
114
115                 if(c->outgoing && c->outgoing->timeout < 5) {
116                         pingtimeout = 1;
117                 }
118
119                 // Also make sure that if outstanding key requests for the UDP counterpart of a connection has timed out, we restart it.
120                 if(c->node) {
121                         if(c->node->status.waitingforkey && c->node->last_req_key + pingtimeout <= mesh->loop.now.tv_sec) {
122                                 send_req_key(mesh, c->node);
123                         }
124                 }
125
126                 if(c->last_ping_time + pingtimeout <= mesh->loop.now.tv_sec) {
127                         if(c->status.active) {
128                                 if(c->status.pinged) {
129                                         logger(mesh, MESHLINK_INFO, "%s didn't respond to PING in %ld seconds", c->name, (long)mesh->loop.now.tv_sec - c->last_ping_time);
130                                 } else if(c->last_ping_time + pinginterval <= mesh->loop.now.tv_sec) {
131                                         send_ping(mesh, c);
132                                         continue;
133                                 } else {
134                                         continue;
135                                 }
136                         } else {
137                                 if(c->status.connecting) {
138                                         logger(mesh, MESHLINK_WARNING, "Timeout while connecting to %s", c->name);
139                                 } else {
140                                         logger(mesh, MESHLINK_WARNING, "Timeout from %s during authentication", c->name);
141                                 }
142                         }
143
144                         terminate_connection(mesh, c, c->status.active);
145                 }
146         }
147
148         timeout_set(&mesh->loop, data, &(struct timespec) {
149                 1, prng(mesh, TIMER_FUDGE)
150         });
151 }
152
153 // devclass asc, last_successfull_connection desc
154 static int node_compare_devclass_asc_lsc_desc(const void *a, const void *b) {
155         const node_t *na = a, *nb = b;
156
157         if(na->devclass < nb->devclass) {
158                 return -1;
159         }
160
161         if(na->devclass > nb->devclass) {
162                 return 1;
163         }
164
165         if(na->last_successfull_connection == nb->last_successfull_connection) {
166                 return 0;
167         }
168
169         if(na->last_successfull_connection == 0 || na->last_successfull_connection > nb->last_successfull_connection) {
170                 return -1;
171         }
172
173         if(nb->last_successfull_connection == 0 || na->last_successfull_connection < nb->last_successfull_connection) {
174                 return 1;
175         }
176
177         if(na < nb) {
178                 return -1;
179         }
180
181         if(na > nb) {
182                 return 1;
183         }
184
185         return 0;
186 }
187
188 // last_successfull_connection desc
189 static int node_compare_lsc_desc(const void *a, const void *b) {
190         const node_t *na = a, *nb = b;
191
192         if(na->last_successfull_connection == nb->last_successfull_connection) {
193                 return 0;
194         }
195
196         if(na->last_successfull_connection == 0 || na->last_successfull_connection > nb->last_successfull_connection) {
197                 return -1;
198         }
199
200         if(nb->last_successfull_connection == 0 || na->last_successfull_connection < nb->last_successfull_connection) {
201                 return 1;
202         }
203
204         if(na < nb) {
205                 return -1;
206         }
207
208         if(na > nb) {
209                 return 1;
210         }
211
212         return 0;
213 }
214
215 // devclass desc
216 static int node_compare_devclass_desc(const void *a, const void *b) {
217         const node_t *na = a, *nb = b;
218
219         if(na->devclass < nb->devclass) {
220                 return -1;
221         }
222
223         if(na->devclass > nb->devclass) {
224                 return 1;
225         }
226
227         if(na < nb) {
228                 return -1;
229         }
230
231         if(na > nb) {
232                 return 1;
233         }
234
235         return 0;
236 }
237
238
239 /*
240
241 autoconnect()
242 {
243         timeout = 5
244
245         // find the best one for initial connect
246
247         if cur < min
248                 newcon =
249                         first from nodes
250                                 where dclass <= my.dclass and !connection and (timestamp - last_retry) > retry_timeout
251                                 order by dclass asc, last_connection desc
252                 if newcon
253                         timeout = 0
254                         goto connect
255
256
257         // find better nodes to connect to: in case we have less than min connections within [BACKBONE, i] and there are nodes which we are not connected to within the range
258
259         if min <= cur < max
260                 j = 0
261                 for i = BACKBONE to my.dclass
262                         j += count(from connections where node.dclass = i)
263                         if j < min
264                                 newcon =
265                                         first from nodes
266                                                 where dclass = i and !connection and (timestamp - last_retry) > retry_timeout
267                                                 order by last_connection desc
268                                 if newcon
269                                         goto connect
270                         else
271                                 break
272
273
274         // heal partitions
275
276         if min <= cur < max
277                 newcon =
278                         first from nodes
279                                 where dclass <= my.dclass and !reachable and (timestamp - last_retry) > retry_timeout
280                                 order by dclass asc, last_connection desc
281                 if newcon
282                         goto connect
283
284
285         // connect
286
287 connect:
288         if newcon
289                 connect newcon
290
291
292         // disconnect outgoing connections in case we have more than min connections within [BACKBONE, i] and there are nodes which we are connected to within the range [i, PORTABLE]
293
294         if min < cur <= max
295                 j = 0
296                 for i = BACKBONE to my.dclass
297                         j += count(from connections where node.dclass = i)
298                         if min < j
299                                 delcon =
300                                         first from nodes
301                                                 where dclass >= i and outgoing_connection
302                                                 order by dclass desc
303                                 if disconnect
304                                         goto disconnect
305                                 else
306                                         break
307
308
309         // disconnect connections in case we have more than enough connections
310
311         if max < cur
312                 delcon =
313                         first from nodes
314                                 where outgoing_connection
315                                 order by dclass desc
316                 goto disconnect
317
318         // disconnect
319
320 disconnect
321         if delcon
322                 disconnect delcon
323
324
325         // next iteration
326         next (timeout, autoconnect)
327
328 }
329
330 */
331
332
333 static void periodic_handler(event_loop_t *loop, void *data) {
334         meshlink_handle_t *mesh = loop->data;
335
336         /* Check if there are too many contradicting ADD_EDGE and DEL_EDGE messages.
337            This usually only happens when another node has the same Name as this node.
338            If so, sleep for a short while to prevent a storm of contradicting messages.
339         */
340
341         if(mesh->contradicting_del_edge > 100 && mesh->contradicting_add_edge > 100) {
342                 logger(mesh, MESHLINK_WARNING, "Possible node with same Name as us! Sleeping %d seconds.", mesh->sleeptime);
343                 struct timespec ts = {mesh->sleeptime, 0};
344                 clock_nanosleep(CLOCK_MONOTONIC, 0, &ts, NULL);
345                 mesh->sleeptime *= 2;
346
347                 if(mesh->sleeptime < 0) {
348                         mesh->sleeptime = 3600;
349                 }
350         } else {
351                 mesh->sleeptime /= 2;
352
353                 if(mesh->sleeptime < 10) {
354                         mesh->sleeptime = 10;
355                 }
356         }
357
358         mesh->contradicting_add_edge = 0;
359         mesh->contradicting_del_edge = 0;
360
361         int timeout = default_timeout;
362
363         /* Check if we need to make or break connections. */
364
365         if(mesh->nodes->count > 1) {
366
367                 logger(mesh, MESHLINK_DEBUG, "--- autoconnect begin ---");
368
369                 int retry_timeout = min(mesh->nodes->count * default_timeout, 60);
370
371                 logger(mesh, MESHLINK_DEBUG, "* devclass = %d", mesh->devclass);
372                 logger(mesh, MESHLINK_DEBUG, "* nodes = %d", mesh->nodes->count);
373                 logger(mesh, MESHLINK_DEBUG, "* retry_timeout = %d", retry_timeout);
374
375
376                 // connect disconnect nodes
377
378                 node_t *connect_to = NULL;
379                 node_t *disconnect_from = NULL;
380
381
382                 // get cur_connects
383
384                 unsigned int cur_connects = 0;
385
386                 for list_each(connection_t, c, mesh->connections) {
387                         if(c->status.active) {
388                                 cur_connects += 1;
389                         }
390                 }
391
392                 logger(mesh, MESHLINK_DEBUG, "* cur_connects = %d", cur_connects);
393                 logger(mesh, MESHLINK_DEBUG, "* outgoings = %d", mesh->outgoings->count);
394
395                 // get min_connects and max_connects
396
397                 unsigned int min_connects = mesh->dev_class_traits[mesh->devclass].min_connects;
398                 unsigned int max_connects = mesh->dev_class_traits[mesh->devclass].max_connects;
399
400                 logger(mesh, MESHLINK_DEBUG, "* min_connects = %d", min_connects);
401                 logger(mesh, MESHLINK_DEBUG, "* max_connects = %d", max_connects);
402
403                 // find the best one for initial connect
404
405                 if(cur_connects < min_connects) {
406                         splay_tree_t *nodes = splay_alloc_tree(node_compare_devclass_asc_lsc_desc, NULL);
407
408                         for splay_each(node_t, n, mesh->nodes) {
409                                 logger(mesh, MESHLINK_DEBUG, "* %s->devclass = %d", n->name, n->devclass);
410
411                                 if(n != mesh->self && n->devclass <= mesh->devclass && !n->connection && !n->status.blacklisted && (n->last_connect_try == 0 || (mesh->loop.now.tv_sec - n->last_connect_try) > retry_timeout)) {
412                                         splay_insert(nodes, n);
413                                 }
414                         }
415
416                         if(nodes->head) {
417                                 //timeout = 0;
418                                 connect_to = (node_t *)nodes->head->data;
419
420                                 logger(mesh, MESHLINK_DEBUG, "* found best one for initial connect: %s", connect_to->name);
421                         } else {
422                                 logger(mesh, MESHLINK_DEBUG, "* could not find node for initial connect");
423                         }
424
425                         splay_delete_tree(nodes);
426                 }
427
428
429                 // find better nodes to connect to
430
431                 if(!connect_to && min_connects <= cur_connects && cur_connects < max_connects) {
432                         unsigned int connects = 0;
433
434                         for(dev_class_t devclass = 0; devclass <= mesh->devclass; ++devclass) {
435                                 for list_each(connection_t, c, mesh->connections) {
436                                         if(c->status.active && c->node && c->node->devclass == devclass) {
437                                                 connects += 1;
438                                         }
439                                 }
440
441                                 if(connects < min_connects) {
442                                         splay_tree_t *nodes = splay_alloc_tree(node_compare_lsc_desc, NULL);
443
444                                         for splay_each(node_t, n, mesh->nodes) {
445                                                 if(n != mesh->self && n->devclass == devclass && !n->connection && !n->status.blacklisted && (n->last_connect_try == 0 || (mesh->loop.now.tv_sec - n->last_connect_try) > retry_timeout)) {
446                                                         splay_insert(nodes, n);
447                                                 }
448                                         }
449
450                                         if(nodes->head) {
451                                                 logger(mesh, MESHLINK_DEBUG, "* found better node");
452                                                 connect_to = (node_t *)nodes->head->data;
453
454                                                 splay_delete_tree(nodes);
455                                                 break;
456                                         }
457
458                                         splay_delete_tree(nodes);
459                                 } else {
460                                         break;
461                                 }
462                         }
463
464                         if(!connect_to) {
465                                 logger(mesh, MESHLINK_DEBUG, "* could not find better nodes");
466                         }
467                 }
468
469
470                 // heal partitions
471
472                 if(!connect_to && min_connects <= cur_connects && cur_connects < max_connects) {
473                         splay_tree_t *nodes = splay_alloc_tree(node_compare_devclass_asc_lsc_desc, NULL);
474
475                         for splay_each(node_t, n, mesh->nodes) {
476                                 if(n != mesh->self && n->devclass <= mesh->devclass && !n->status.reachable && !n->status.blacklisted && (n->last_connect_try == 0 || (mesh->loop.now.tv_sec - n->last_connect_try) > retry_timeout)) {
477                                         splay_insert(nodes, n);
478                                 }
479                         }
480
481                         if(nodes->head) {
482                                 logger(mesh, MESHLINK_DEBUG, "* try to heal partition");
483                                 connect_to = (node_t *)nodes->head->data;
484                         } else {
485                                 logger(mesh, MESHLINK_DEBUG, "* could not find nodes for partition healing");
486                         }
487
488                         splay_delete_tree(nodes);
489                 }
490
491
492                 // perform connect
493
494                 if(connect_to && !connect_to->connection) {
495                         connect_to->last_connect_try = mesh->loop.now.tv_sec;
496                         logger(mesh, MESHLINK_DEBUG, "Autoconnect trying to connect to %s", connect_to->name);
497
498                         /* check if there is already a connection attempt to this node */
499                         bool skip = false;
500
501                         for list_each(outgoing_t, outgoing, mesh->outgoings) {
502                                 if(outgoing->node == connect_to) {
503                                         logger(mesh, MESHLINK_DEBUG, "* skip autoconnect since it is an outgoing connection already");
504                                         skip = true;
505                                         break;
506                                 }
507                         }
508
509                         if(!connect_to->status.reachable && !node_read_public_key(mesh, connect_to)) {
510                                 logger(mesh, MESHLINK_DEBUG, "* skip autoconnect since we don't know this node's public key");
511                                 skip = true;
512                         }
513
514                         if(!skip) {
515                                 logger(mesh, MESHLINK_DEBUG, "Autoconnecting to %s", connect_to->name);
516                                 outgoing_t *outgoing = xzalloc(sizeof(outgoing_t));
517                                 outgoing->node = connect_to;
518                                 list_insert_tail(mesh->outgoings, outgoing);
519                                 setup_outgoing_connection(mesh, outgoing);
520                         }
521                 }
522
523
524                 // disconnect suboptimal outgoing connections
525
526                 if(min_connects < cur_connects /*&& cur_connects <= max_connects*/) {
527                         unsigned int connects = 0;
528
529                         for(dev_class_t devclass = 0; devclass <= mesh->devclass; ++devclass) {
530                                 for list_each(connection_t, c, mesh->connections) {
531                                         if(c->status.active && c->node && c->node->devclass == devclass) {
532                                                 connects += 1;
533                                         }
534                                 }
535
536                                 if(min_connects < connects) {
537                                         splay_tree_t *nodes = splay_alloc_tree(node_compare_devclass_desc, NULL);
538
539                                         for list_each(connection_t, c, mesh->connections) {
540                                                 if(c->outgoing && c->node && c->node->devclass >= devclass) {
541                                                         splay_insert(nodes, c->node);
542                                                 }
543                                         }
544
545                                         if(nodes->head) {
546                                                 logger(mesh, MESHLINK_DEBUG, "* disconnect suboptimal outgoing connection");
547                                                 disconnect_from = (node_t *)nodes->head->data;
548                                         }
549
550                                         splay_delete_tree(nodes);
551                                         break;
552                                 }
553                         }
554
555                         if(!disconnect_from) {
556                                 logger(mesh, MESHLINK_DEBUG, "* no suboptimal outgoing connections");
557                         }
558                 }
559
560
561                 // disconnect connections (too many connections)
562
563                 if(!disconnect_from && max_connects < cur_connects) {
564                         splay_tree_t *nodes = splay_alloc_tree(node_compare_devclass_desc, NULL);
565
566                         for list_each(connection_t, c, mesh->connections) {
567                                 if(c->status.active && c->node) {
568                                         splay_insert(nodes, c->node);
569                                 }
570                         }
571
572                         if(nodes->head) {
573                                 logger(mesh, MESHLINK_DEBUG, "* disconnect connection (too many connections)");
574
575                                 //timeout = 0;
576                                 disconnect_from = (node_t *)nodes->head->data;
577                         } else {
578                                 logger(mesh, MESHLINK_DEBUG, "* no node we want to disconnect, even though we have too many connections");
579                         }
580
581                         splay_delete_tree(nodes);
582                 }
583
584
585                 // perform disconnect
586
587                 if(disconnect_from && disconnect_from->connection) {
588                         logger(mesh, MESHLINK_DEBUG, "Autodisconnecting from %s", disconnect_from->connection->name);
589                         list_delete(mesh->outgoings, disconnect_from->connection->outgoing);
590                         disconnect_from->connection->outgoing = NULL;
591                         terminate_connection(mesh, disconnect_from->connection, disconnect_from->connection->status.active);
592                 }
593
594                 // reduce timeout if we don't have enough connections + outgoings
595                 if(cur_connects + mesh->outgoings->count < 3) {
596                         timeout = 1;
597                 }
598
599                 // done!
600
601                 logger(mesh, MESHLINK_DEBUG, "--- autoconnect end ---");
602         }
603
604         for splay_each(node_t, n, mesh->nodes) {
605                 if(n->status.dirty) {
606                         if(!node_write_config(mesh, n)) {
607                                 logger(mesh, MESHLINK_DEBUG, "Could not update %s", n->name);
608                         }
609
610                         n->status.dirty = false;
611                 }
612
613                 if(n->status.validkey && n->last_req_key + 3600 < mesh->loop.now.tv_sec) {
614                         logger(mesh, MESHLINK_DEBUG, "SPTPS key renewal for node %s", n->name);
615
616                         if(!sptps_force_kex(&n->sptps)) {
617                                 logger(mesh, MESHLINK_ERROR, "SPTPS key renewal for node %s failed", n->name);
618                                 n->status.validkey = false;
619                                 sptps_stop(&n->sptps);
620                                 n->status.waitingforkey = false;
621                                 n->last_req_key = 0;
622                         } else {
623                                 n->last_req_key = mesh->loop.now.tv_sec;
624                         }
625                 }
626         }
627
628         timeout_set(&mesh->loop, data, &(struct timespec) {
629                 timeout, prng(mesh, TIMER_FUDGE)
630         });
631 }
632
633 void handle_meta_connection_data(meshlink_handle_t *mesh, connection_t *c) {
634         if(!receive_meta(mesh, c)) {
635                 terminate_connection(mesh, c, c->status.active);
636                 return;
637         }
638 }
639
640 void retry(meshlink_handle_t *mesh) {
641         /* Reset the reconnection timers for all outgoing connections */
642         for list_each(outgoing_t, outgoing, mesh->outgoings) {
643                 outgoing->timeout = 0;
644
645                 if(outgoing->ev.cb) {
646                         timeout_set(&mesh->loop, &outgoing->ev, &(struct timespec) {
647                                 0, 0
648                         });
649                 }
650         }
651
652         /* For active connections, check if their addresses are still valid.
653          * If yes, reset their ping timers, otherwise terminate them. */
654         for list_each(connection_t, c, mesh->connections) {
655                 if(!c->status.active) {
656                         continue;
657                 }
658
659                 if(!c->status.pinged) {
660                         c->last_ping_time = 0;
661                 }
662
663                 sockaddr_t sa;
664                 socklen_t salen = sizeof(sa);
665
666                 if(getsockname(c->socket, &sa.sa, &salen)) {
667                         continue;
668                 }
669
670                 switch(sa.sa.sa_family) {
671                 case AF_INET:
672                         sa.in.sin_port = 0;
673                         break;
674
675                 case AF_INET6:
676                         sa.in6.sin6_port = 0;
677                         break;
678
679                 default:
680                         continue;
681                 }
682
683                 int sock = socket(sa.sa.sa_family, SOCK_STREAM, IPPROTO_TCP);
684
685                 if(sock != -1) {
686                         continue;
687                 }
688
689                 if(bind(sock, &sa.sa, salen) && errno == EADDRNOTAVAIL) {
690                         logger(mesh, MESHLINK_DEBUG, "Local address for connection to %s no longer valid, terminating", c->name);
691                         terminate_connection(mesh, c, c->status.active);
692                 }
693
694                 closesocket(sock);
695         }
696
697         /* Kick the ping timeout handler */
698         timeout_set(&mesh->loop, &mesh->pingtimer, &(struct timespec) {
699                 0, 0
700         });
701 }
702
703 /*
704   this is where it all happens...
705 */
706 void main_loop(meshlink_handle_t *mesh) {
707         timeout_add(&mesh->loop, &mesh->pingtimer, timeout_handler, &mesh->pingtimer, &(struct timespec) {
708                 1, prng(mesh, TIMER_FUDGE)
709         });
710         timeout_add(&mesh->loop, &mesh->periodictimer, periodic_handler, &mesh->periodictimer, &(struct timespec) {
711                 0, 0
712         });
713
714         //Add signal handler
715         mesh->datafromapp.signum = 0;
716         signal_add(&mesh->loop, &mesh->datafromapp, meshlink_send_from_queue, mesh, mesh->datafromapp.signum);
717
718         if(!event_loop_run(&mesh->loop, &mesh->mutex)) {
719                 logger(mesh, MESHLINK_ERROR, "Error while waiting for input: %s", strerror(errno));
720                 call_error_cb(mesh, MESHLINK_ENETWORK);
721         }
722
723         signal_del(&mesh->loop, &mesh->datafromapp);
724         timeout_del(&mesh->loop, &mesh->periodictimer);
725         timeout_del(&mesh->loop, &mesh->pingtimer);
726 }