]> git.meshlink.io Git - meshlink/blob - src/net.c
fix for autoconnect (prevent ping pong)
[meshlink] / src / net.c
1 /*
2     net.c -- most of the network code
3     Copyright (C) 2014 Guus Sliepen <guus@meshlink.io>
4
5     This program is free software; you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation; either version 2 of the License, or
8     (at your option) any later version.
9
10     This program is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14
15     You should have received a copy of the GNU General Public License along
16     with this program; if not, write to the Free Software Foundation, Inc.,
17     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #include "system.h"
21
22 #include "utils.h"
23 #include "conf.h"
24 #include "connection.h"
25 #include "graph.h"
26 #include "logger.h"
27 #include "meshlink_internal.h"
28 #include "meta.h"
29 #include "net.h"
30 #include "netutl.h"
31 #include "protocol.h"
32 #include "xalloc.h"
33
34 #include <assert.h>
35
36 #if !defined(min)
37 static const int min(int a, int b) {
38         return a < b ? a : b;
39 }
40 #endif
41
42 /*
43   Terminate a connection:
44   - Mark it as inactive
45   - Remove the edge representing this connection
46   - Kill it with fire
47   - Check if we need to retry making an outgoing connection
48 */
49 void terminate_connection(meshlink_handle_t *mesh, connection_t *c, bool report) {
50         logger(mesh, MESHLINK_INFO, "Closing connection with %s (%s)", c->name, c->hostname);
51
52         c->status.active = false;
53
54         if(c->node && c->node->connection == c)
55                 c->node->connection = NULL;
56
57         if(c->edge) {
58                 if(report)
59                         send_del_edge(mesh, mesh->everyone, c->edge);
60
61                 edge_del(mesh, c->edge);
62                 c->edge = NULL;
63
64                 /* Run MST and SSSP algorithms */
65
66                 graph(mesh);
67
68                 /* If the node is not reachable anymore but we remember it had an edge to us, clean it up */
69
70                 if(report && !c->node->status.reachable) {
71                         edge_t *e;
72                         e = lookup_edge(c->node, mesh->self);
73                         if(e) {
74                                 send_del_edge(mesh, mesh->everyone, e);
75                                 edge_del(mesh, e);
76                         }
77                 }
78         }
79
80         outgoing_t *outgoing = c->outgoing;
81         connection_del(mesh, c);
82
83         /* Check if this was our outgoing connection */
84
85         if(outgoing)
86                 do_outgoing_connection(mesh, outgoing);
87
88 #ifndef HAVE_MINGW
89         /* Clean up dead proxy processes */
90
91         while(waitpid(-1, NULL, WNOHANG) > 0);
92 #endif
93 }
94
95 /*
96   Check if the other end is active.
97   If we have sent packets, but didn't receive any,
98   then possibly the other end is dead. We send a
99   PING request over the meta connection. If the other
100   end does not reply in time, we consider them dead
101   and close the connection.
102 */
103 static void timeout_handler(event_loop_t *loop, void *data) {
104         meshlink_handle_t *mesh = loop->data;
105         logger(mesh, MESHLINK_DEBUG, "timeout_handler()");
106
107         for list_each(connection_t, c, mesh->connections) {
108                 // Also make sure that if outstanding key requests for the UDP counterpart of a connection has timed out, we restart it.
109                 if(c->node) {
110                         if(c->node->status.waitingforkey && c->node->last_req_key + mesh->pingtimeout <= mesh->loop.now.tv_sec)
111                                 send_req_key(mesh, c->node);
112                 }
113                 if(c->last_ping_time + mesh->pingtimeout <= mesh->loop.now.tv_sec) {
114                         if(c->status.active) {
115                                 if(c->status.pinged) {
116                                         logger(mesh, MESHLINK_INFO, "%s (%s) didn't respond to PING in %ld seconds", c->name, c->hostname, (long)mesh->loop.now.tv_sec - c->last_ping_time);
117                                 } else if(c->last_ping_time + mesh->pinginterval <= mesh->loop.now.tv_sec) {
118                                         send_ping(mesh, c);
119                                         continue;
120                                 } else {
121                                         continue;
122                                 }
123                         } else {
124                                 if(c->status.connecting)
125                                         logger(mesh, MESHLINK_WARNING, "Timeout while connecting to %s (%s)", c->name, c->hostname);
126                                 else
127                                         logger(mesh, MESHLINK_WARNING, "Timeout from %s (%s) during authentication", c->name, c->hostname);
128                         }
129                         terminate_connection(mesh, c, c->status.active);
130                 }
131         }
132
133         timeout_set(&mesh->loop, data, &(struct timeval){mesh->pingtimeout, rand() % 100000});
134 }
135
136 // devclass asc, last_successfull_connection desc
137 static int node_compare_devclass_asc_lsc_desc(const void *a, const void *b)
138 {
139         const node_t *na = a, *nb = b;
140
141         if(na->devclass < nb->devclass)
142                 { return -1; }
143
144         if(na->devclass > nb->devclass)
145                 { return 1; }
146
147         if(na->last_successfull_connection == nb->last_successfull_connection)
148                 return 0;
149
150         if(na->last_successfull_connection == 0 || na->last_successfull_connection > nb->last_successfull_connection)
151                 return -1;
152
153         if(nb->last_successfull_connection == 0 || na->last_successfull_connection < nb->last_successfull_connection)
154                 return 1;
155
156         if(na < nb)
157                 return -1;
158
159         if(na > nb)
160                 return 1;
161
162         return 0;
163 }
164
165 // last_successfull_connection desc
166 static int node_compare_lsc_desc(const void *a, const void *b)
167 {
168         const node_t *na = a, *nb = b;
169
170         if(na->last_successfull_connection == nb->last_successfull_connection)
171                 return 0;
172
173         if(na->last_successfull_connection == 0 || na->last_successfull_connection > nb->last_successfull_connection)
174                 return -1;
175
176         if(nb->last_successfull_connection == 0 || na->last_successfull_connection < nb->last_successfull_connection)
177                 return 1;
178
179         if(na < nb)
180                 return -1;
181
182         if(na > nb)
183                 return 1;
184
185         return 0;
186 }
187
188 // devclass desc
189 static int node_compare_devclass_desc(const void *a, const void *b)
190 {
191         const node_t *na = a, *nb = b;
192
193         if(na->devclass < nb->devclass)
194                 { return -1; }
195
196         if(na->devclass > nb->devclass)
197                 { return 1; }
198
199         if(na < nb)
200                 return -1;
201
202         if(na > nb)
203                 return 1;
204
205         return 0;
206 }
207
208
209 /*
210
211 autoconnect()
212 {
213         timeout = 5
214
215         // find the best one for initial connect
216
217         if cur < min
218                 newcon =
219                         first from nodes
220                                 where dclass <= my.dclass and !connection and (timestamp - last_retry) > retry_timeout
221                                 order by dclass asc, last_connection desc
222                 if newcon
223                         timeout = 0
224                         goto connect
225
226
227         // find better nodes to connect to: in case we have less than min connections within [BACKBONE, i] and there are nodes which we are not connected to within the range
228
229         if min <= cur < max
230                 j = 0
231                 for i = BACKBONE to my.dclass
232                         j += count(from connections where node.dclass = i)
233                         if j < min
234                                 newcon =
235                                         first from nodes
236                                                 where dclass = i and !connection and (timestamp - last_retry) > retry_timeout
237                                                 order by last_connection desc
238                                 if newcon
239                                         goto connect
240                         else
241                                 break
242
243
244         // heal partitions
245
246         if min <= cur < max
247                 newcon =
248                         first from nodes
249                                 where dclass <= my.dclass and !reachable and (timestamp - last_retry) > retry_timeout
250                                 order by dclass asc, last_connection desc
251                 if newcon
252                         goto connect
253
254
255         // connect
256
257 connect:
258         if newcon
259                 connect newcon
260
261
262         // disconnect outgoing connections in case we have more than min connections within [BACKBONE, i] and there are nodes which we are connected to within the range [i, PORTABLE]
263
264         if min < cur <= max
265                 j = 0
266                 for i = BACKBONE to my.dclass
267                         j += count(from connections where node.dclass = i)
268                         if min < j
269                                 delcon =
270                                         first from nodes
271                                                 where dclass >= i and outgoing_connection
272                                                 order by dclass desc
273                                 if disconnect
274                                         goto disconnect
275                                 else
276                                         break
277
278
279         // disconnect connections in case we have more than enough connections
280
281         if max < cur
282                 delcon =
283                         first from nodes
284                                 where outgoing_connection
285                                 order by dclass desc
286                 goto disconnect
287
288         // disconnect
289
290 disconnect
291         if delcon
292                 disconnect delcon
293
294
295         // next iteration
296         next (timeout, autoconnect)
297
298 }
299
300 */
301
302
303 static void periodic_handler(event_loop_t *loop, void *data) {
304         meshlink_handle_t *mesh = loop->data;
305
306         /* Check if there are too many contradicting ADD_EDGE and DEL_EDGE messages.
307            This usually only happens when another node has the same Name as this node.
308            If so, sleep for a short while to prevent a storm of contradicting messages.
309         */
310
311         if(mesh->contradicting_del_edge > 100 && mesh->contradicting_add_edge > 100) {
312                 logger(mesh, MESHLINK_WARNING, "Possible node with same Name as us! Sleeping %d seconds.", mesh->sleeptime);
313                 usleep(mesh->sleeptime * 1000000LL);
314                 mesh->sleeptime *= 2;
315                 if(mesh->sleeptime < 0)
316                         mesh->sleeptime = 3600;
317         } else {
318                 mesh->sleeptime /= 2;
319                 if(mesh->sleeptime < 10)
320                         mesh->sleeptime = 10;
321         }
322
323         mesh->contradicting_add_edge = 0;
324         mesh->contradicting_del_edge = 0;
325
326         int timeout = 5;
327
328         /* Check if we need to make or break connections. */
329
330         if(mesh->nodes->count > 1) {
331
332                 logger(mesh, MESHLINK_DEBUG, "--- autoconnect begin ---");
333
334                 int retry_timeout = min(mesh->nodes->count * 5, 60);
335
336                 logger(mesh, MESHLINK_DEBUG, "* devclass = %d", mesh->devclass);
337                 logger(mesh, MESHLINK_DEBUG, "* nodes = %d", mesh->nodes->count);
338                 logger(mesh, MESHLINK_DEBUG, "* retry_timeout = %d", retry_timeout);
339
340
341                 // connect disconnect nodes
342
343                 node_t* connect_to = NULL;
344                 node_t* disconnect_from = NULL;
345
346
347                 // get cur_connects
348
349                 int cur_connects = 0;
350
351                 for list_each(connection_t, c, mesh->connections)
352                 {
353                         if(c->status.active)
354                         {
355                                 cur_connects += 1;
356                         }
357                 }
358
359                 logger(mesh, MESHLINK_DEBUG, "* cur_connects = %d", cur_connects);
360                 logger(mesh, MESHLINK_DEBUG, "* outgoings = %d", mesh->outgoings->count);
361
362                 // get min_connects and max_connects
363
364                 assert(mesh->devclass >= 0 && mesh->devclass <= _DEV_CLASS_MAX);
365
366                 int min_connects = dev_class_traits[mesh->devclass].min_connects;
367                 int max_connects = dev_class_traits[mesh->devclass].max_connects;
368
369                 logger(mesh, MESHLINK_DEBUG, "* min_connects = %d", min_connects);
370                 logger(mesh, MESHLINK_DEBUG, "* max_connects = %d", max_connects);
371
372
373                 // find the best one for initial connect
374
375                 if(cur_connects < min_connects)
376                 {
377                         splay_tree_t *nodes = splay_alloc_tree(node_compare_devclass_asc_lsc_desc, NULL);
378
379                         for splay_each(node_t, n, mesh->nodes)
380                         {
381                                 logger(mesh, MESHLINK_DEBUG, "* n->devclass = %d", n->devclass);
382                                 if(n != mesh->self && n->devclass <= mesh->devclass && !n->connection && (n->last_connect_try == 0 || (time(NULL) - n->last_connect_try) > retry_timeout))
383                                         { splay_insert(nodes, n); }
384                         }
385
386                         if(nodes->head)
387                         {
388                                 logger(mesh, MESHLINK_DEBUG, "* found best one for initial connect");
389
390                                 //timeout = 0;
391                                 connect_to = (node_t*)nodes->head->data;
392                         }
393                         else
394                                 { logger(mesh, MESHLINK_DEBUG, "* could not find node for initial connect"); }
395
396                         splay_free_tree(nodes);
397                 }
398
399
400                 // find better nodes to connect to
401
402                 if(!connect_to && min_connects <= cur_connects && cur_connects < max_connects)
403                 {
404                         unsigned int connects = 0;
405
406                         for(int devclass = 0; devclass <= mesh->devclass; ++devclass)
407                         {
408                                 for list_each(connection_t, c, mesh->connections)
409                                 {
410                                         if(c->status.active && c->node && c->node->devclass == devclass)
411                                                 { connects += 1; }
412                                 }
413
414                                 if( connects < min_connects )
415                                 {
416                                         splay_tree_t *nodes = splay_alloc_tree(node_compare_lsc_desc, NULL);
417
418                                         for splay_each(node_t, n, mesh->nodes)
419                                         {
420                                                 if(n != mesh->self && n->devclass == devclass && !n->connection && (n->last_connect_try == 0 || (time(NULL) - n->last_connect_try) > retry_timeout))
421                                                         { splay_insert(nodes, n); }
422                                         }
423
424                                         if(nodes->head)
425                                         {
426                                                 logger(mesh, MESHLINK_DEBUG, "* found better node");
427                                                 connect_to = (node_t*)nodes->head->data;
428
429                                                 splay_free_tree(nodes);
430                                                 break;
431                                         }
432
433                                         splay_free_tree(nodes);
434                                 }
435                                 else
436                                         { break; }
437                         }
438
439                         if(!connect_to)
440                                 { logger(mesh, MESHLINK_DEBUG, "* could not find better nodes"); }
441                 }
442
443
444                 // heal partitions
445
446                 if(!connect_to && min_connects <= cur_connects && cur_connects < max_connects)
447                 {
448                         splay_tree_t *nodes = splay_alloc_tree(node_compare_devclass_asc_lsc_desc, NULL);
449
450                         for splay_each(node_t, n, mesh->nodes)
451                         {
452                                 if(n != mesh->self && n->devclass <= mesh->devclass && !n->status.reachable && (n->last_connect_try == 0 || (time(NULL) - n->last_connect_try) > retry_timeout))
453                                         { splay_insert(nodes, n); }
454                         }
455
456                         if(nodes->head)
457                         {
458                                 logger(mesh, MESHLINK_DEBUG, "* try to heal partition");
459                                 connect_to = (node_t*)nodes->head->data;
460                         }
461                         else
462                                 { logger(mesh, MESHLINK_DEBUG, "* could not find nodes for partition healing"); }
463
464                         splay_free_tree(nodes);
465                 }
466
467
468                 // perform connect
469
470                 if(connect_to && !connect_to->connection)
471                 {
472                         connect_to->last_connect_try = time(NULL);
473
474                         /* check if there is already a connection attempt to this node */
475                         bool found = false;
476                         for list_each(outgoing_t, outgoing, mesh->outgoings)
477                         {
478                                 if(!strcmp(outgoing->name, connect_to->name))
479                                 {
480                                         found = true;
481                                         break;
482                                 }
483                         }
484
485                         if(!found)
486                         {
487                                 logger(mesh, MESHLINK_DEBUG, "Autoconnecting to %s", connect_to->name);
488                                 outgoing_t *outgoing = xzalloc(sizeof(outgoing_t));
489                                 outgoing->mesh = mesh;
490                                 outgoing->name = xstrdup(connect_to->name);
491                                 list_insert_tail(mesh->outgoings, outgoing);
492                                 setup_outgoing_connection(mesh, outgoing);
493                         }
494                         else
495                                 { logger(mesh, MESHLINK_DEBUG, "* skip autoconnect since it is an outgoing connection already"); }
496                 }
497
498
499                 // disconnect suboptimal outgoing connections
500
501                 if(min_connects < cur_connects /*&& cur_connects <= max_connects*/)
502                 {
503                         unsigned int connects = 0;
504
505                         for(int devclass = 0; devclass <= mesh->devclass; ++devclass)
506                         {
507                                 for list_each(connection_t, c, mesh->connections)
508                                 {
509                                         if(c->status.active && c->node && c->node->devclass == devclass)
510                                                 { connects += 1; }
511                                 }
512
513                                 if( min_connects < connects )
514                                 {
515                                         splay_tree_t *nodes = splay_alloc_tree(node_compare_devclass_desc, NULL);
516
517                                         for list_each(connection_t, c, mesh->connections)
518                                         {
519                                                 if(c->outgoing && c->node && c->node->devclass >= devclass)
520                                                         { splay_insert(nodes, c->node); }
521                                         }
522
523                                         if(nodes->head)
524                                         {
525                                                 logger(mesh, MESHLINK_DEBUG, "* disconnect suboptimal outgoing connection");
526                                                 disconnect_from = (node_t*)nodes->head->data;
527                                         }
528
529                                         splay_free_tree(nodes);
530                                         break;
531                                 }
532                         }
533
534                         if(!disconnect_from)
535                                 { logger(mesh, MESHLINK_DEBUG, "* no suboptimal outgoing connections"); }
536                 }
537
538
539                 // disconnect connections (too many connections)
540
541                 if(!disconnect_from && max_connects < cur_connects)
542                 {
543                         splay_tree_t *nodes = splay_alloc_tree(node_compare_devclass_desc, NULL);
544
545                         for list_each(connection_t, c, mesh->connections)
546                         {
547                                 if(c->status.active && c->node)
548                                         { splay_insert(nodes, c->node); }
549                         }
550
551                         if(nodes->head)
552                         {
553                                 logger(mesh, MESHLINK_DEBUG, "* disconnect connection (too many connections)");
554
555                                 //timeout = 0;
556                                 disconnect_from = (node_t*)nodes->head->data;
557                         }
558                         else
559                                 { logger(mesh, MESHLINK_DEBUG, "* no node we want to disconnect, even though we have too many connections"); }
560
561                         splay_free_tree(nodes);
562                 }
563
564
565                 // perform disconnect
566
567                 if(disconnect_from && disconnect_from->connection)
568                 {
569                         logger(mesh, MESHLINK_DEBUG, "Autodisconnecting from %s", disconnect_from->connection->name);
570                         list_delete(mesh->outgoings, disconnect_from->connection->outgoing);
571                         disconnect_from->connection->outgoing = NULL;
572                         terminate_connection(mesh, disconnect_from->connection, disconnect_from->connection->status.active);
573                 }
574
575
576                 // done!
577
578                 logger(mesh, MESHLINK_DEBUG, "--- autoconnect end ---");
579         }
580
581         timeout_set(&mesh->loop, data, &(struct timeval){timeout, rand() % 100000});
582 }
583
584 void handle_meta_connection_data(meshlink_handle_t *mesh, connection_t *c) {
585         if (!receive_meta(mesh, c)) {
586                 terminate_connection(mesh, c, c->status.active);
587                 return;
588         }
589 }
590
591 void retry(meshlink_handle_t *mesh) {
592         /* Reset the reconnection timers for all outgoing connections */
593         for list_each(outgoing_t, outgoing, mesh->outgoings) {
594                 outgoing->timeout = 0;
595                 if(outgoing->ev.cb)
596                         timeout_set(&mesh->loop, &outgoing->ev, &(struct timeval){0, 0});
597         }
598
599         /* Check for outgoing connections that are in progress, and reset their ping timers */
600         for list_each(connection_t, c, mesh->connections) {
601                 if(c->outgoing && !c->node)
602                         c->last_ping_time = 0;
603         }
604
605         /* Kick the ping timeout handler */
606         timeout_set(&mesh->loop, &mesh->pingtimer, &(struct timeval){0, 0});
607 }
608
609 /*
610   this is where it all happens...
611 */
612 int main_loop(meshlink_handle_t *mesh) {
613         timeout_add(&mesh->loop, &mesh->pingtimer, timeout_handler, &mesh->pingtimer, &(struct timeval){mesh->pingtimeout, rand() % 100000});
614         timeout_add(&mesh->loop, &mesh->periodictimer, periodic_handler, &mesh->periodictimer, &(struct timeval){0, 0});
615
616         //Add signal handler
617         mesh->datafromapp.signum = 0;
618         signal_add(&(mesh->loop),&(mesh->datafromapp), (signal_cb_t)meshlink_send_from_queue,mesh, mesh->datafromapp.signum);
619
620         if(!event_loop_run(&(mesh->loop), &(mesh->mesh_mutex))) {
621                 logger(mesh, MESHLINK_ERROR, "Error while waiting for input: %s", strerror(errno));
622                 return 1;
623         }
624
625         timeout_del(&mesh->loop, &mesh->periodictimer);
626         timeout_del(&mesh->loop, &mesh->pingtimer);
627
628         return 0;
629 }