]> git.meshlink.io Git - meshlink/blob - src/net.c
Connect to unreachable nodes after enough stable connections exit.
[meshlink] / src / net.c
1 /*
2     net.c -- most of the network code
3     Copyright (C) 2014 Guus Sliepen <guus@meshlink.io>
4
5     This program is free software; you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation; either version 2 of the License, or
8     (at your option) any later version.
9
10     This program is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14
15     You should have received a copy of the GNU General Public License along
16     with this program; if not, write to the Free Software Foundation, Inc.,
17     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #include "system.h"
21
22 #include "utils.h"
23 #include "conf.h"
24 #include "connection.h"
25 #include "graph.h"
26 #include "logger.h"
27 #include "meshlink_internal.h"
28 #include "meta.h"
29 #include "net.h"
30 #include "netutl.h"
31 #include "protocol.h"
32 #include "xalloc.h"
33
34 /*
35   Terminate a connection:
36   - Mark it as inactive
37   - Remove the edge representing this connection
38   - Kill it with fire
39   - Check if we need to retry making an outgoing connection
40 */
41 void terminate_connection(meshlink_handle_t *mesh, connection_t *c, bool report) {
42         logger(DEBUG_CONNECTIONS, LOG_NOTICE, "Closing connection with %s (%s)", c->name, c->hostname);
43
44         c->status.active = false;
45
46         if(c->node && c->node->connection == c)
47                 c->node->connection = NULL;
48
49         if(c->edge) {
50                 if(report)
51                         send_del_edge(mesh, mesh->everyone, c->edge);
52
53                 edge_del(mesh, c->edge);
54                 c->edge = NULL;
55
56                 /* Run MST and SSSP algorithms */
57
58                 graph(mesh);
59
60                 /* If the node is not reachable anymore but we remember it had an edge to us, clean it up */
61
62                 if(report && !c->node->status.reachable) {
63                         edge_t *e;
64                         e = lookup_edge(c->node, mesh->self);
65                         if(e) {
66                                 send_del_edge(mesh, mesh->everyone, e);
67                                 edge_del(mesh, e);
68                         }
69                 }
70         }
71
72         outgoing_t *outgoing = c->outgoing;
73         connection_del(mesh, c);
74
75         /* Check if this was our outgoing connection */
76
77         if(outgoing)
78                 do_outgoing_connection(mesh, outgoing);
79
80 #ifndef HAVE_MINGW
81         /* Clean up dead proxy processes */
82
83         while(waitpid(-1, NULL, WNOHANG) > 0);
84 #endif
85 }
86
87 /*
88   Check if the other end is active.
89   If we have sent packets, but didn't receive any,
90   then possibly the other end is dead. We send a
91   PING request over the meta connection. If the other
92   end does not reply in time, we consider them dead
93   and close the connection.
94 */
95 static void timeout_handler(event_loop_t *loop, void *data) {
96         meshlink_handle_t *mesh = loop->data;
97
98         for list_each(connection_t, c, mesh->connections) {
99                 if(c->last_ping_time + mesh->pingtimeout <= mesh->loop.now.tv_sec) {
100                         if(c->status.active) {
101                                 if(c->status.pinged) {
102                                         logger(DEBUG_CONNECTIONS, LOG_INFO, "%s (%s) didn't respond to PING in %ld seconds", c->name, c->hostname, (long)mesh->loop.now.tv_sec - c->last_ping_time);
103                                 } else if(c->last_ping_time + mesh->pinginterval <= mesh->loop.now.tv_sec) {
104                                         send_ping(mesh, c);
105                                         continue;
106                                 } else {
107                                         continue;
108                                 }
109                         } else {
110                                 if(c->status.connecting)
111                                         logger(DEBUG_CONNECTIONS, LOG_WARNING, "Timeout while connecting to %s (%s)", c->name, c->hostname);
112                                 else
113                                         logger(DEBUG_CONNECTIONS, LOG_WARNING, "Timeout from %s (%s) during authentication", c->name, c->hostname);
114                         }
115                         terminate_connection(mesh, c, c->status.active);
116                 }
117         }
118
119         timeout_set(&mesh->loop, data, &(struct timeval){mesh->pingtimeout, rand() % 100000});
120 }
121
122 static void periodic_handler(event_loop_t *loop, void *data) {
123         meshlink_handle_t *mesh = loop->data;
124
125         /* Check if there are too many contradicting ADD_EDGE and DEL_EDGE messages.
126            This usually only happens when another node has the same Name as this node.
127            If so, sleep for a short while to prevent a storm of contradicting messages.
128         */
129
130         if(mesh->contradicting_del_edge > 100 && mesh->contradicting_add_edge > 100) {
131                 logger(DEBUG_ALWAYS, LOG_WARNING, "Possible node with same Name as us! Sleeping %d seconds.", mesh->sleeptime);
132                 usleep(mesh->sleeptime * 1000000LL);
133                 mesh->sleeptime *= 2;
134                 if(mesh->sleeptime < 0)
135                         mesh->sleeptime = 3600;
136         } else {
137                 mesh->sleeptime /= 2;
138                 if(mesh->sleeptime < 10)
139                         mesh->sleeptime = 10;
140         }
141
142         mesh->contradicting_add_edge = 0;
143         mesh->contradicting_del_edge = 0;
144
145         /* If AutoConnect is set, check if we need to make or break connections. */
146
147         if(autoconnect && mesh->nodes->count > 1) {
148                 /* Count number of active connections */
149                 int nc = 0;
150                 for list_each(connection_t, c, mesh->connections) {
151                         if(c->status.active)
152                                 nc++;
153                 }
154
155                 /* Count number of unreachable nodes */
156                 int num_unreachable = 0;
157                 for splay_each(node_t, n, mesh->nodes) {
158                         if(!n->status.reachable)
159                                 num_unreachable++;
160                 }
161
162                 if(nc < autoconnect) {
163                         /* Not enough active connections, try to add one.
164                            Choose a random node, if we don't have a connection to it,
165                            and we are not already trying to make one, create an
166                            outgoing connection to this node.
167                         */
168                         int r = rand() % mesh->nodes->count;
169                         int i = 0;
170
171                         for splay_each(node_t, n, mesh->nodes) {
172                                 if(i++ != r)
173                                         continue;
174
175                                 if(n->connection)
176                                         break;
177
178                                 bool found = false;
179
180                                 for list_each(outgoing_t, outgoing, mesh->outgoings) {
181                                         if(!strcmp(outgoing->name, n->name)) {
182                                                 found = true;
183                                                 break;
184                                         }
185                                 }
186
187                                 if(!found) {
188                                         //TODO: if the node is blacklisted the connection will not happen, but
189                                         //the user will read this debug message "Autoconnecting to %s" that is misleading
190                                         logger(DEBUG_CONNECTIONS, LOG_INFO, "Autoconnecting to %s", n->name);
191                                         outgoing_t *outgoing = xzalloc(sizeof *outgoing);
192                                         outgoing->name = xstrdup(n->name);
193                                         list_insert_tail(mesh->outgoings, outgoing);
194                                         setup_outgoing_connection(mesh, outgoing);
195                                 }
196                                 break;
197                         }
198                 //} else if(nc > autoconnect) {
199                 } else {
200                         /* Min number of connections established. Now try
201                            to connect to some unreachable nodes to attempt
202                            to heal possible partitions.
203                         */
204                         int r = rand() % num_unreachable;
205                         int i = 0;
206
207                         for splay_each(node_t, n, mesh->nodes) {
208                                 if(n->status.reachable)
209                                         continue;
210                                 
211                                 if(i++ != r)
212                                         continue;
213
214                                 if(n->connection)
215                                         break;
216
217                                 bool found = false;
218
219                                 for list_each(outgoing_t, outgoing, mesh->outgoings) {
220                                         if(!strcmp(outgoing->name, n->name)) {
221                                                 found = true;
222                                                 break;
223                                         }
224                                 }
225
226                                 if(!found) {
227                                         //TODO: if the node is blacklisted the connection will not happen, but
228                                         //the user will read this debug message "Autoconnecting to %s" that is misleading
229                                         logger(DEBUG_CONNECTIONS, LOG_INFO, "Autoconnecting to %s", n->name);
230                                         outgoing_t *outgoing = xzalloc(sizeof *outgoing);
231                                         outgoing->name = xstrdup(n->name);
232                                         list_insert_tail(mesh->outgoings, outgoing);
233                                         setup_outgoing_connection(mesh, outgoing);
234                                 }
235                                 break;
236                         }
237                 }
238                 if(nc > autoconnect) {
239                         /* Too many active connections, try to remove one.
240                            Choose a random outgoing connection to a node
241                            that has at least one other connection.
242                         */
243                         int r = rand() % nc;
244                         int i = 0;
245
246                         for list_each(connection_t, c, mesh->connections) {
247                                 if(!c->status.active)
248                                         continue;
249
250                                 if(i++ != r)
251                                         continue;
252
253                                 if(!c->outgoing || !c->node || c->node->edge_tree->count < 2)
254                                         break;
255
256                                 logger(DEBUG_CONNECTIONS, LOG_INFO, "Autodisconnecting from %s", c->name);
257                                 list_delete(mesh->outgoings, c->outgoing);
258                                 c->outgoing = NULL;
259                                 terminate_connection(mesh, c, c->status.active);
260                                 break;
261                         }
262                 }
263
264                 if(nc >= autoconnect) {
265                         /* If we have enough active connections,
266                            remove any pending outgoing connections.
267                            Do not remove pending connections to unreachable
268                            nodes.
269                         */
270                         node_t *o_node = NULL;
271                         for list_each(outgoing_t, o, mesh->outgoings) {
272                                 o_node = lookup_node(mesh, o->name);
273                                 /* o_node is NULL if it is not part of the graph yet */
274                                 if(!o_node || !o_node->status.reachable)
275                                         continue;
276
277                                 bool found = false;
278                                 for list_each(connection_t, c, mesh->connections) {
279                                         if(c->outgoing == o) {
280                                                 found = true;
281                                                 break;
282                                         }
283                                 }
284                                 if(!found) {
285                                         logger(DEBUG_CONNECTIONS, LOG_INFO, "Cancelled outgoing connection to %s", o->name);
286                                         /* The node variable is leaked in from using the list_each macro.
287                                            The o variable could be used, but using node directly
288                                            is more efficient.
289                                         */
290                                         list_delete_node(mesh->outgoings, node);
291                                 }
292                         }
293                 }
294         }
295
296         timeout_set(&mesh->loop, data, &(struct timeval){5, rand() % 100000});
297 }
298
299 void handle_meta_connection_data(meshlink_handle_t *mesh, connection_t *c) {
300         if (!receive_meta(mesh, c)) {
301                 terminate_connection(mesh, c, c->status.active);
302                 return;
303         }
304 }
305
306 void retry(meshlink_handle_t *mesh) {
307         /* Reset the reconnection timers for all outgoing connections */
308         for list_each(outgoing_t, outgoing, mesh->outgoings) {
309                 outgoing->timeout = 0;
310                 if(outgoing->ev.cb)
311                         timeout_set(&mesh->loop, &outgoing->ev, &(struct timeval){0, 0});
312         }
313
314         /* Check for outgoing connections that are in progress, and reset their ping timers */
315         for list_each(connection_t, c, mesh->connections) {
316                 if(c->outgoing && !c->node)
317                         c->last_ping_time = 0;
318         }
319
320         /* Kick the ping timeout handler */
321         timeout_set(&mesh->loop, &mesh->pingtimer, &(struct timeval){0, 0});
322 }
323
324 /*
325   this is where it all happens...
326 */
327 int main_loop(meshlink_handle_t *mesh) {
328         timeout_add(&mesh->loop, &mesh->pingtimer, timeout_handler, &mesh->pingtimer, &(struct timeval){mesh->pingtimeout, rand() % 100000});
329         timeout_add(&mesh->loop, &mesh->periodictimer, periodic_handler, &mesh->periodictimer, &(struct timeval){mesh->pingtimeout, rand() % 100000});
330
331         //Add signal handler
332         mesh->datafromapp.signum = 0;
333         signal_add(&(mesh->loop),&(mesh->datafromapp), (signal_cb_t)meshlink_send_from_queue,mesh, mesh->datafromapp.signum);
334
335         if(!event_loop_run(&mesh->loop)) {
336                 logger(DEBUG_ALWAYS, LOG_ERR, "Error while waiting for input: %s", strerror(errno));
337                 return 1;
338         }
339
340         timeout_del(&mesh->loop, &mesh->periodictimer);
341         timeout_del(&mesh->loop, &mesh->pingtimer);
342
343         return 0;
344 }