]> git.meshlink.io Git - meshlink/blob - src/net.c
Fix a debug message being logged incorrectly.
[meshlink] / src / net.c
1 /*
2     net.c -- most of the network code
3     Copyright (C) 2014-2017 Guus Sliepen <guus@meshlink.io>
4
5     This program is free software; you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation; either version 2 of the License, or
8     (at your option) any later version.
9
10     This program is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14
15     You should have received a copy of the GNU General Public License along
16     with this program; if not, write to the Free Software Foundation, Inc.,
17     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #include "system.h"
21
22 #include "utils.h"
23 #include "conf.h"
24 #include "connection.h"
25 #include "graph.h"
26 #include "logger.h"
27 #include "meshlink_internal.h"
28 #include "meta.h"
29 #include "net.h"
30 #include "netutl.h"
31 #include "protocol.h"
32 #include "sptps.h"
33 #include "xalloc.h"
34
35 #include <assert.h>
36
37 #if !defined(min)
38 static inline int min(int a, int b) {
39         return a < b ? a : b;
40 }
41 #endif
42
43 static const int default_timeout = 5;
44 static const int default_interval = 60;
45
46 /*
47   Terminate a connection:
48   - Mark it as inactive
49   - Remove the edge representing this connection
50   - Kill it with fire
51   - Check if we need to retry making an outgoing connection
52 */
53 void terminate_connection(meshlink_handle_t *mesh, connection_t *c, bool report) {
54         logger(mesh, MESHLINK_INFO, "Closing connection with %s", c->name);
55
56         c->status.active = false;
57
58         if(c->node && c->node->connection == c) {
59                 c->node->connection = NULL;
60         }
61
62         if(c->edge) {
63                 if(report) {
64                         send_del_edge(mesh, mesh->everyone, c->edge, 0);
65                 }
66
67                 edge_del(mesh, c->edge);
68                 c->edge = NULL;
69
70                 /* Run MST and SSSP algorithms */
71
72                 graph(mesh);
73
74                 /* If the node is not reachable anymore but we remember it had an edge to us, clean it up */
75
76                 if(report && c->node && !c->node->status.reachable) {
77                         edge_t *e;
78                         e = lookup_edge(c->node, mesh->self);
79
80                         if(e) {
81                                 send_del_edge(mesh, mesh->everyone, e, 0);
82                                 edge_del(mesh, e);
83                         }
84                 }
85         }
86
87         outgoing_t *outgoing = c->outgoing;
88         connection_del(mesh, c);
89
90         /* Check if this was our outgoing connection */
91
92         if(outgoing) {
93                 do_outgoing_connection(mesh, outgoing);
94         }
95
96 #ifndef HAVE_MINGW
97         /* Clean up dead proxy processes */
98
99         while(waitpid(-1, NULL, WNOHANG) > 0);
100
101 #endif
102 }
103
104 /*
105   Check if the other end is active.
106   If we have sent packets, but didn't receive any,
107   then possibly the other end is dead. We send a
108   PING request over the meta connection. If the other
109   end does not reply in time, we consider them dead
110   and close the connection.
111 */
112 static void timeout_handler(event_loop_t *loop, void *data) {
113         assert(data);
114
115         meshlink_handle_t *mesh = loop->data;
116         logger(mesh, MESHLINK_DEBUG, "timeout_handler()");
117
118         for list_each(connection_t, c, mesh->connections) {
119                 int pingtimeout = c->node ? mesh->dev_class_traits[c->node->devclass].pingtimeout : default_timeout;
120                 int pinginterval = c->node ? mesh->dev_class_traits[c->node->devclass].pinginterval : default_interval;
121
122                 if(c->outgoing && c->outgoing->timeout < 5) {
123                         pingtimeout = 1;
124                 }
125
126                 // Also make sure that if outstanding key requests for the UDP counterpart of a connection has timed out, we restart it.
127                 if(c->node) {
128                         if(c->node->status.waitingforkey && c->node->last_req_key + pingtimeout <= mesh->loop.now.tv_sec) {
129                                 send_req_key(mesh, c->node);
130                         }
131                 }
132
133                 if(c->last_ping_time + pingtimeout <= mesh->loop.now.tv_sec) {
134                         if(c->status.active) {
135                                 if(c->status.pinged) {
136                                         logger(mesh, MESHLINK_INFO, "%s didn't respond to PING in %ld seconds", c->name, (long)mesh->loop.now.tv_sec - c->last_ping_time);
137                                 } else if(c->last_ping_time + pinginterval <= mesh->loop.now.tv_sec) {
138                                         send_ping(mesh, c);
139                                         continue;
140                                 } else {
141                                         continue;
142                                 }
143                         } else {
144                                 if(c->status.connecting) {
145                                         logger(mesh, MESHLINK_WARNING, "Timeout while connecting to %s", c->name);
146                                 } else {
147                                         logger(mesh, MESHLINK_WARNING, "Timeout from %s during authentication", c->name);
148                                 }
149                         }
150
151                         terminate_connection(mesh, c, c->status.active);
152                 }
153         }
154
155         timeout_set(&mesh->loop, data, &(struct timespec) {
156                 1, prng(mesh, TIMER_FUDGE)
157         });
158 }
159
160 // devclass asc, last_successfull_connection desc
161 static int node_compare_devclass_asc_lsc_desc(const void *a, const void *b) {
162         const node_t *na = a, *nb = b;
163
164         if(na->devclass < nb->devclass) {
165                 return -1;
166         }
167
168         if(na->devclass > nb->devclass) {
169                 return 1;
170         }
171
172         if(na->last_successfull_connection == nb->last_successfull_connection) {
173                 return 0;
174         }
175
176         if(na->last_successfull_connection == 0 || na->last_successfull_connection > nb->last_successfull_connection) {
177                 return -1;
178         }
179
180         if(nb->last_successfull_connection == 0 || na->last_successfull_connection < nb->last_successfull_connection) {
181                 return 1;
182         }
183
184         if(na < nb) {
185                 return -1;
186         }
187
188         if(na > nb) {
189                 return 1;
190         }
191
192         return 0;
193 }
194
195 // last_successfull_connection desc
196 static int node_compare_lsc_desc(const void *a, const void *b) {
197         const node_t *na = a, *nb = b;
198
199         if(na->last_successfull_connection == nb->last_successfull_connection) {
200                 return 0;
201         }
202
203         if(na->last_successfull_connection == 0 || na->last_successfull_connection > nb->last_successfull_connection) {
204                 return -1;
205         }
206
207         if(nb->last_successfull_connection == 0 || na->last_successfull_connection < nb->last_successfull_connection) {
208                 return 1;
209         }
210
211         if(na < nb) {
212                 return -1;
213         }
214
215         if(na > nb) {
216                 return 1;
217         }
218
219         return 0;
220 }
221
222 // devclass desc
223 static int node_compare_devclass_desc(const void *a, const void *b) {
224         const node_t *na = a, *nb = b;
225
226         if(na->devclass < nb->devclass) {
227                 return -1;
228         }
229
230         if(na->devclass > nb->devclass) {
231                 return 1;
232         }
233
234         if(na < nb) {
235                 return -1;
236         }
237
238         if(na > nb) {
239                 return 1;
240         }
241
242         return 0;
243 }
244
245
246 /*
247
248 autoconnect()
249 {
250         timeout = 5
251
252         // find the best one for initial connect
253
254         if cur < min
255                 newcon =
256                         first from nodes
257                                 where dclass <= my.dclass and !connection and (timestamp - last_retry) > retry_timeout
258                                 order by dclass asc, last_connection desc
259                 if newcon
260                         timeout = 0
261                         goto connect
262
263
264         // find better nodes to connect to: in case we have less than min connections within [BACKBONE, i] and there are nodes which we are not connected to within the range
265
266         if min <= cur < max
267                 j = 0
268                 for i = BACKBONE to my.dclass
269                         j += count(from connections where node.dclass = i)
270                         if j < min
271                                 newcon =
272                                         first from nodes
273                                                 where dclass = i and !connection and (timestamp - last_retry) > retry_timeout
274                                                 order by last_connection desc
275                                 if newcon
276                                         goto connect
277                         else
278                                 break
279
280
281         // heal partitions
282
283         if min <= cur < max
284                 newcon =
285                         first from nodes
286                                 where dclass <= my.dclass and !reachable and (timestamp - last_retry) > retry_timeout
287                                 order by dclass asc, last_connection desc
288                 if newcon
289                         goto connect
290
291
292         // connect
293
294 connect:
295         if newcon
296                 connect newcon
297
298
299         // disconnect outgoing connections in case we have more than min connections within [BACKBONE, i] and there are nodes which we are connected to within the range [i, PORTABLE]
300
301         if min < cur <= max
302                 j = 0
303                 for i = BACKBONE to my.dclass
304                         j += count(from connections where node.dclass = i)
305                         if min < j
306                                 delcon =
307                                         first from nodes
308                                                 where dclass >= i and outgoing_connection
309                                                 order by dclass desc
310                                 if disconnect
311                                         goto disconnect
312                                 else
313                                         break
314
315
316         // disconnect connections in case we have more than enough connections
317
318         if max < cur
319                 delcon =
320                         first from nodes
321                                 where outgoing_connection
322                                 order by dclass desc
323                 goto disconnect
324
325         // disconnect
326
327 disconnect
328         if delcon
329                 disconnect delcon
330
331
332         // next iteration
333         next (timeout, autoconnect)
334
335 }
336
337 */
338
339
340 static void periodic_handler(event_loop_t *loop, void *data) {
341         meshlink_handle_t *mesh = loop->data;
342
343         /* Check if there are too many contradicting ADD_EDGE and DEL_EDGE messages.
344            This usually only happens when another node has the same Name as this node.
345            If so, sleep for a short while to prevent a storm of contradicting messages.
346         */
347
348         if(mesh->contradicting_del_edge > 100 && mesh->contradicting_add_edge > 100) {
349                 logger(mesh, MESHLINK_WARNING, "Possible node with same Name as us! Sleeping %d seconds.", mesh->sleeptime);
350                 struct timespec ts = {mesh->sleeptime, 0};
351                 clock_nanosleep(CLOCK_MONOTONIC, 0, &ts, NULL);
352                 mesh->sleeptime *= 2;
353
354                 if(mesh->sleeptime < 0) {
355                         mesh->sleeptime = 3600;
356                 }
357         } else {
358                 mesh->sleeptime /= 2;
359
360                 if(mesh->sleeptime < 10) {
361                         mesh->sleeptime = 10;
362                 }
363         }
364
365         mesh->contradicting_add_edge = 0;
366         mesh->contradicting_del_edge = 0;
367
368         int timeout = default_timeout;
369
370         /* Check if we need to make or break connections. */
371
372         if(mesh->nodes->count > 1) {
373
374                 logger(mesh, MESHLINK_DEBUG, "--- autoconnect begin ---");
375
376                 int retry_timeout = min(mesh->nodes->count * default_timeout, 60);
377
378                 logger(mesh, MESHLINK_DEBUG, "* devclass = %d", mesh->devclass);
379                 logger(mesh, MESHLINK_DEBUG, "* nodes = %d", mesh->nodes->count);
380                 logger(mesh, MESHLINK_DEBUG, "* retry_timeout = %d", retry_timeout);
381
382
383                 // connect disconnect nodes
384
385                 node_t *connect_to = NULL;
386                 node_t *disconnect_from = NULL;
387
388
389                 // get cur_connects
390
391                 unsigned int cur_connects = 0;
392
393                 for list_each(connection_t, c, mesh->connections) {
394                         if(c->status.active) {
395                                 cur_connects += 1;
396                         }
397                 }
398
399                 logger(mesh, MESHLINK_DEBUG, "* cur_connects = %d", cur_connects);
400                 logger(mesh, MESHLINK_DEBUG, "* outgoings = %d", mesh->outgoings->count);
401
402                 // get min_connects and max_connects
403
404                 unsigned int min_connects = mesh->dev_class_traits[mesh->devclass].min_connects;
405                 unsigned int max_connects = mesh->dev_class_traits[mesh->devclass].max_connects;
406
407                 logger(mesh, MESHLINK_DEBUG, "* min_connects = %d", min_connects);
408                 logger(mesh, MESHLINK_DEBUG, "* max_connects = %d", max_connects);
409
410                 // find the best one for initial connect
411
412                 if(cur_connects < min_connects) {
413                         splay_tree_t *nodes = splay_alloc_tree(node_compare_devclass_asc_lsc_desc, NULL);
414
415                         for splay_each(node_t, n, mesh->nodes) {
416                                 logger(mesh, MESHLINK_DEBUG, "* %s->devclass = %d", n->name, n->devclass);
417
418                                 if(n != mesh->self && n->devclass <= mesh->devclass && !n->connection && !n->status.blacklisted && (n->last_connect_try == 0 || (mesh->loop.now.tv_sec - n->last_connect_try) > retry_timeout)) {
419                                         splay_insert(nodes, n);
420                                 }
421                         }
422
423                         if(nodes->head) {
424                                 //timeout = 0;
425                                 connect_to = (node_t *)nodes->head->data;
426
427                                 logger(mesh, MESHLINK_DEBUG, "* found best one for initial connect: %s", connect_to->name);
428                         } else {
429                                 logger(mesh, MESHLINK_DEBUG, "* could not find node for initial connect");
430                         }
431
432                         splay_delete_tree(nodes);
433                 }
434
435
436                 // find better nodes to connect to
437
438                 if(!connect_to && min_connects <= cur_connects && cur_connects < max_connects) {
439                         unsigned int connects = 0;
440
441                         for(dev_class_t devclass = 0; devclass <= mesh->devclass; ++devclass) {
442                                 for list_each(connection_t, c, mesh->connections) {
443                                         if(c->status.active && c->node && c->node->devclass == devclass) {
444                                                 connects += 1;
445                                         }
446                                 }
447
448                                 if(connects < min_connects) {
449                                         splay_tree_t *nodes = splay_alloc_tree(node_compare_lsc_desc, NULL);
450
451                                         for splay_each(node_t, n, mesh->nodes) {
452                                                 if(n != mesh->self && n->devclass == devclass && !n->connection && !n->status.blacklisted && (n->last_connect_try == 0 || (mesh->loop.now.tv_sec - n->last_connect_try) > retry_timeout)) {
453                                                         splay_insert(nodes, n);
454                                                 }
455                                         }
456
457                                         if(nodes->head) {
458                                                 logger(mesh, MESHLINK_DEBUG, "* found better node");
459                                                 connect_to = (node_t *)nodes->head->data;
460
461                                                 splay_delete_tree(nodes);
462                                                 break;
463                                         }
464
465                                         splay_delete_tree(nodes);
466                                 } else {
467                                         break;
468                                 }
469                         }
470
471                         if(!connect_to) {
472                                 logger(mesh, MESHLINK_DEBUG, "* could not find better nodes");
473                         }
474                 }
475
476
477                 // heal partitions
478
479                 if(!connect_to && min_connects <= cur_connects && cur_connects < max_connects) {
480                         splay_tree_t *nodes = splay_alloc_tree(node_compare_devclass_asc_lsc_desc, NULL);
481
482                         for splay_each(node_t, n, mesh->nodes) {
483                                 if(n != mesh->self && n->devclass <= mesh->devclass && !n->status.reachable && !n->status.blacklisted && (n->last_connect_try == 0 || (mesh->loop.now.tv_sec - n->last_connect_try) > retry_timeout)) {
484                                         splay_insert(nodes, n);
485                                 }
486                         }
487
488                         if(nodes->head) {
489                                 logger(mesh, MESHLINK_DEBUG, "* try to heal partition");
490                                 connect_to = (node_t *)nodes->head->data;
491                         } else {
492                                 logger(mesh, MESHLINK_DEBUG, "* could not find nodes for partition healing");
493                         }
494
495                         splay_delete_tree(nodes);
496                 }
497
498
499                 // perform connect
500
501                 if(connect_to && !connect_to->connection) {
502                         connect_to->last_connect_try = mesh->loop.now.tv_sec;
503                         logger(mesh, MESHLINK_DEBUG, "Autoconnect trying to connect to %s", connect_to->name);
504
505                         /* check if there is already a connection attempt to this node */
506                         bool skip = false;
507
508                         for list_each(outgoing_t, outgoing, mesh->outgoings) {
509                                 if(outgoing->node == connect_to) {
510                                         logger(mesh, MESHLINK_DEBUG, "* skip autoconnect since it is an outgoing connection already");
511                                         skip = true;
512                                         break;
513                                 }
514                         }
515
516                         if(!connect_to->status.reachable && !node_read_public_key(mesh, connect_to)) {
517                                 logger(mesh, MESHLINK_DEBUG, "* skip autoconnect since we don't know this node's public key");
518                                 skip = true;
519                         }
520
521                         if(!skip) {
522                                 logger(mesh, MESHLINK_DEBUG, "Autoconnecting to %s", connect_to->name);
523                                 outgoing_t *outgoing = xzalloc(sizeof(outgoing_t));
524                                 outgoing->node = connect_to;
525                                 list_insert_tail(mesh->outgoings, outgoing);
526                                 setup_outgoing_connection(mesh, outgoing);
527                         }
528                 }
529
530
531                 // disconnect suboptimal outgoing connections
532
533                 if(min_connects < cur_connects /*&& cur_connects <= max_connects*/) {
534                         unsigned int connects = 0;
535
536                         for(dev_class_t devclass = 0; devclass <= mesh->devclass; ++devclass) {
537                                 for list_each(connection_t, c, mesh->connections) {
538                                         if(c->status.active && c->node && c->node->devclass == devclass) {
539                                                 connects += 1;
540                                         }
541                                 }
542
543                                 if(min_connects < connects) {
544                                         splay_tree_t *nodes = splay_alloc_tree(node_compare_devclass_desc, NULL);
545
546                                         for list_each(connection_t, c, mesh->connections) {
547                                                 if(c->outgoing && c->node && c->node->devclass >= devclass) {
548                                                         splay_insert(nodes, c->node);
549                                                 }
550                                         }
551
552                                         if(nodes->head) {
553                                                 logger(mesh, MESHLINK_DEBUG, "* disconnect suboptimal outgoing connection");
554                                                 disconnect_from = (node_t *)nodes->head->data;
555                                         }
556
557                                         splay_delete_tree(nodes);
558                                         break;
559                                 }
560                         }
561
562                         if(!disconnect_from) {
563                                 logger(mesh, MESHLINK_DEBUG, "* no suboptimal outgoing connections");
564                         }
565                 }
566
567
568                 // disconnect connections (too many connections)
569
570                 if(!disconnect_from && max_connects < cur_connects) {
571                         splay_tree_t *nodes = splay_alloc_tree(node_compare_devclass_desc, NULL);
572
573                         for list_each(connection_t, c, mesh->connections) {
574                                 if(c->status.active && c->node) {
575                                         splay_insert(nodes, c->node);
576                                 }
577                         }
578
579                         if(nodes->head) {
580                                 logger(mesh, MESHLINK_DEBUG, "* disconnect connection (too many connections)");
581
582                                 //timeout = 0;
583                                 disconnect_from = (node_t *)nodes->head->data;
584                         } else {
585                                 logger(mesh, MESHLINK_DEBUG, "* no node we want to disconnect, even though we have too many connections");
586                         }
587
588                         splay_delete_tree(nodes);
589                 }
590
591
592                 // perform disconnect
593
594                 if(disconnect_from && disconnect_from->connection) {
595                         logger(mesh, MESHLINK_DEBUG, "Autodisconnecting from %s", disconnect_from->connection->name);
596                         list_delete(mesh->outgoings, disconnect_from->connection->outgoing);
597                         disconnect_from->connection->outgoing = NULL;
598                         terminate_connection(mesh, disconnect_from->connection, disconnect_from->connection->status.active);
599                 }
600
601                 // reduce timeout if we don't have enough connections + outgoings
602                 if(cur_connects + mesh->outgoings->count < 3) {
603                         timeout = 1;
604                 }
605
606                 // done!
607
608                 logger(mesh, MESHLINK_DEBUG, "--- autoconnect end ---");
609         }
610
611         for splay_each(node_t, n, mesh->nodes) {
612                 if(n->status.dirty) {
613                         if(!node_write_config(mesh, n)) {
614                                 logger(mesh, MESHLINK_DEBUG, "Could not update %s", n->name);
615                         }
616
617                         n->status.dirty = false;
618                 }
619
620                 if(n->status.validkey && n->last_req_key + 3600 < mesh->loop.now.tv_sec) {
621                         logger(mesh, MESHLINK_DEBUG, "SPTPS key renewal for node %s", n->name);
622
623                         if(!sptps_force_kex(&n->sptps)) {
624                                 logger(mesh, MESHLINK_ERROR, "SPTPS key renewal for node %s failed", n->name);
625                                 n->status.validkey = false;
626                                 sptps_stop(&n->sptps);
627                                 n->status.waitingforkey = false;
628                                 n->last_req_key = 0;
629                         } else {
630                                 n->last_req_key = mesh->loop.now.tv_sec;
631                         }
632                 }
633         }
634
635         timeout_set(&mesh->loop, data, &(struct timespec) {
636                 timeout, prng(mesh, TIMER_FUDGE)
637         });
638 }
639
640 void handle_meta_connection_data(meshlink_handle_t *mesh, connection_t *c) {
641         if(!receive_meta(mesh, c)) {
642                 terminate_connection(mesh, c, c->status.active);
643                 return;
644         }
645 }
646
647 void retry(meshlink_handle_t *mesh) {
648         /* Reset the reconnection timers for all outgoing connections */
649         for list_each(outgoing_t, outgoing, mesh->outgoings) {
650                 outgoing->timeout = 0;
651
652                 if(outgoing->ev.cb) {
653                         timeout_set(&mesh->loop, &outgoing->ev, &(struct timespec) {
654                                 0, 0
655                         });
656                 }
657         }
658
659         /* For active connections, check if their addresses are still valid.
660          * If yes, reset their ping timers, otherwise terminate them. */
661         for list_each(connection_t, c, mesh->connections) {
662                 if(!c->status.active) {
663                         continue;
664                 }
665
666                 if(!c->status.pinged) {
667                         c->last_ping_time = 0;
668                 }
669
670                 sockaddr_t sa;
671                 socklen_t salen = sizeof(sa);
672
673                 if(getsockname(c->socket, &sa.sa, &salen)) {
674                         continue;
675                 }
676
677                 switch(sa.sa.sa_family) {
678                 case AF_INET:
679                         sa.in.sin_port = 0;
680                         break;
681
682                 case AF_INET6:
683                         sa.in6.sin6_port = 0;
684                         break;
685
686                 default:
687                         continue;
688                 }
689
690                 int sock = socket(sa.sa.sa_family, SOCK_STREAM, IPPROTO_TCP);
691
692                 if(sock != -1) {
693                         continue;
694                 }
695
696                 if(bind(sock, &sa.sa, salen) && errno == EADDRNOTAVAIL) {
697                         logger(mesh, MESHLINK_DEBUG, "Local address for connection to %s no longer valid, terminating", c->name);
698                         terminate_connection(mesh, c, c->status.active);
699                 }
700
701                 closesocket(sock);
702         }
703
704         /* Kick the ping timeout handler */
705         timeout_set(&mesh->loop, &mesh->pingtimer, &(struct timespec) {
706                 0, 0
707         });
708 }
709
710 /*
711   this is where it all happens...
712 */
713 void main_loop(meshlink_handle_t *mesh) {
714         timeout_add(&mesh->loop, &mesh->pingtimer, timeout_handler, &mesh->pingtimer, &(struct timespec) {
715                 1, prng(mesh, TIMER_FUDGE)
716         });
717         timeout_add(&mesh->loop, &mesh->periodictimer, periodic_handler, &mesh->periodictimer, &(struct timespec) {
718                 0, 0
719         });
720
721         //Add signal handler
722         mesh->datafromapp.signum = 0;
723         signal_add(&mesh->loop, &mesh->datafromapp, meshlink_send_from_queue, mesh, mesh->datafromapp.signum);
724
725         if(!event_loop_run(&mesh->loop, &mesh->mutex)) {
726                 logger(mesh, MESHLINK_ERROR, "Error while waiting for input: %s", strerror(errno));
727                 call_error_cb(mesh, MESHLINK_ENETWORK);
728         }
729
730         signal_del(&mesh->loop, &mesh->datafromapp);
731         timeout_del(&mesh->loop, &mesh->periodictimer);
732         timeout_del(&mesh->loop, &mesh->pingtimer);
733 }