]> git.meshlink.io Git - utcp/blob - utcp.c
Turn magic numbers into #defines.
[utcp] / utcp.c
1 /*
2     utcp.c -- Userspace TCP
3     Copyright (C) 2014 Guus Sliepen <guus@tinc-vpn.org>
4
5     This program is free software; you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation; either version 2 of the License, or
8     (at your option) any later version.
9
10     This program is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14
15     You should have received a copy of the GNU General Public License along
16     with this program; if not, write to the Free Software Foundation, Inc.,
17     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #define _GNU_SOURCE
21
22 #include <assert.h>
23 #include <errno.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <stdint.h>
27 #include <stdbool.h>
28 #include <string.h>
29 #include <unistd.h>
30 #include <sys/time.h>
31 #include <sys/socket.h>
32
33 #include "utcp_priv.h"
34
35 #ifndef EBADMSG
36 #define EBADMSG         104
37 #endif
38
39 #ifndef SHUT_RDWR
40 #define SHUT_RDWR 2
41 #endif
42
43 #ifdef poll
44 #undef poll
45 #endif
46
47 #ifndef timersub
48 #define timersub(a, b, r) do {\
49         (r)->tv_sec = (a)->tv_sec - (b)->tv_sec;\
50         (r)->tv_usec = (a)->tv_usec - (b)->tv_usec;\
51         if((r)->tv_usec < 0)\
52                 (r)->tv_sec--, (r)->tv_usec += 1000000;\
53 } while (0)
54 #endif
55
56 #ifndef max
57 #define max(a, b) ((a) > (b) ? (a) : (b))
58 #endif
59
60 #ifdef UTCP_DEBUG
61 #include <stdarg.h>
62
63 static void debug(const char *format, ...) {
64         va_list ap;
65         va_start(ap, format);
66         vfprintf(stderr, format, ap);
67         va_end(ap);
68 }
69
70 static void print_packet(struct utcp *utcp, const char *dir, const void *pkt, size_t len) {
71         struct hdr hdr;
72         if(len < sizeof hdr) {
73                 debug("%p %s: short packet (%zu bytes)\n", utcp, dir, len);
74                 return;
75         }
76
77         memcpy(&hdr, pkt, sizeof hdr);
78         fprintf (stderr, "%p %s: len=%zu, src=%u dst=%u seq=%u ack=%u wnd=%u ctl=", utcp, dir, len, hdr.src, hdr.dst, hdr.seq, hdr.ack, hdr.wnd);
79         if(hdr.ctl & SYN)
80                 debug("SYN");
81         if(hdr.ctl & RST)
82                 debug("RST");
83         if(hdr.ctl & FIN)
84                 debug("FIN");
85         if(hdr.ctl & ACK)
86                 debug("ACK");
87
88         if(len > sizeof hdr) {
89                 debug(" data=");
90                 for(int i = sizeof hdr; i < len; i++) {
91                         const char *data = pkt;
92                         debug("%c", data[i] >= 32 ? data[i] : '.');
93                 }
94         }
95
96         debug("\n");
97 }
98 #else
99 #define debug(...)
100 #define print_packet(...)
101 #endif
102
103 static void set_state(struct utcp_connection *c, enum state state) {
104         c->state = state;
105         if(state == ESTABLISHED)
106                 timerclear(&c->conn_timeout);
107         debug("%p new state: %s\n", c->utcp, strstate[state]);
108 }
109
110 static bool fin_wanted(struct utcp_connection *c, uint32_t seq) {
111         if(seq != c->snd.last)
112                 return false;
113         switch(c->state) {
114         case FIN_WAIT_1:
115         case CLOSING:
116         case LAST_ACK:
117                 return true;
118         default:
119                 return false;
120         }
121 }
122
123 static inline void list_connections(struct utcp *utcp) {
124         debug("%p has %d connections:\n", utcp, utcp->nconnections);
125         for(int i = 0; i < utcp->nconnections; i++)
126                 debug("  %u -> %u state %s\n", utcp->connections[i]->src, utcp->connections[i]->dst, strstate[utcp->connections[i]->state]);
127 }
128
129 static int32_t seqdiff(uint32_t a, uint32_t b) {
130         return a - b;
131 }
132
133 // Buffer functions
134 // TODO: convert to ringbuffers to avoid memmove() operations.
135
136 // Store data into the buffer
137 static ssize_t buffer_put_at(struct buffer *buf, size_t offset, const void *data, size_t len) {
138         if(buf->maxsize <= buf->used)
139                 return 0;
140
141         debug("buffer_put_at %zu %zu %zu\n", buf->used, offset, len);
142
143         size_t required = offset + len;
144         if(required > buf->maxsize) {
145                 if(offset >= buf->maxsize)
146                         return 0;
147                 abort();
148                 len = buf->maxsize - offset;
149                 required = buf->maxsize;
150         }
151
152         if(required > buf->size) {
153                 size_t newsize = buf->size;
154                 if(!newsize) {
155                         newsize = required;
156                 } else {
157                         do {
158                                 newsize *= 2;
159                         } while(newsize < buf->used + len);
160                 }
161                 if(newsize > buf->maxsize)
162                         newsize = buf->maxsize;
163                 char *newdata = realloc(buf->data, newsize);
164                 if(!newdata)
165                         return -1;
166                 buf->data = newdata;
167                 buf->size = newsize;
168         }
169
170         memcpy(buf->data + offset, data, len);
171         if(required > buf->used)
172                 buf->used = required;
173         return len;
174 }
175
176 static ssize_t buffer_put(struct buffer *buf, const void *data, size_t len) {
177         return buffer_put_at(buf, buf->used, data, len);
178 }
179
180 // Get data from the buffer. data can be NULL.
181 static ssize_t buffer_get(struct buffer *buf, void *data, size_t len) {
182         if(len > buf->used)
183                 len = buf->used;
184         if(data)
185                 memcpy(data, buf->data, len);
186         if(len < buf->used)
187                 memmove(buf->data, buf->data + len, buf->used - len);
188         buf->used -= len;
189         return len;
190 }
191
192 // Copy data from the buffer without removing it.
193 static ssize_t buffer_copy(struct buffer *buf, void *data, size_t offset, size_t len) {
194         if(offset >= buf->used)
195                 return 0;
196         if(offset + len > buf->used)
197                 len = buf->used - offset;
198         memcpy(data, buf->data + offset, len);
199         return len;
200 }
201
202 static bool buffer_init(struct buffer *buf, uint32_t len, uint32_t maxlen) {
203         memset(buf, 0, sizeof *buf);
204         if(len) {
205                 buf->data = malloc(len);
206                 if(!buf->data)
207                         return false;
208         }
209         buf->size = len;
210         buf->maxsize = maxlen;
211         return true;
212 }
213
214 static void buffer_exit(struct buffer *buf) {
215         free(buf->data);
216         memset(buf, 0, sizeof *buf);
217 }
218
219 static uint32_t buffer_free(const struct buffer *buf) {
220         return buf->maxsize - buf->used;
221 }
222
223 // Connections are stored in a sorted list.
224 // This gives O(log(N)) lookup time, O(N log(N)) insertion time and O(N) deletion time.
225
226 static int compare(const void *va, const void *vb) {
227         assert(va && vb);
228
229         const struct utcp_connection *a = *(struct utcp_connection **)va;
230         const struct utcp_connection *b = *(struct utcp_connection **)vb;
231
232         assert(a && b);
233         assert(a->src && b->src);
234
235         int c = (int)a->src - (int)b->src;
236         if(c)
237                 return c;
238         c = (int)a->dst - (int)b->dst;
239         return c;
240 }
241
242 static struct utcp_connection *find_connection(const struct utcp *utcp, uint16_t src, uint16_t dst) {
243         if(!utcp->nconnections)
244                 return NULL;
245         struct utcp_connection key = {
246                 .src = src,
247                 .dst = dst,
248         }, *keyp = &key;
249         struct utcp_connection **match = bsearch(&keyp, utcp->connections, utcp->nconnections, sizeof *utcp->connections, compare);
250         return match ? *match : NULL;
251 }
252
253 static void free_connection(struct utcp_connection *c) {
254         struct utcp *utcp = c->utcp;
255         struct utcp_connection **cp = bsearch(&c, utcp->connections, utcp->nconnections, sizeof *utcp->connections, compare);
256
257         assert(cp);
258
259         int i = cp - utcp->connections;
260         memmove(cp, cp + 1, (utcp->nconnections - i - 1) * sizeof *cp);
261         utcp->nconnections--;
262
263         buffer_exit(&c->sndbuf);
264         free(c);
265 }
266
267 static struct utcp_connection *allocate_connection(struct utcp *utcp, uint16_t src, uint16_t dst) {
268         // Check whether this combination of src and dst is free
269
270         if(src) {
271                 if(find_connection(utcp, src, dst)) {
272                         errno = EADDRINUSE;
273                         return NULL;
274                 }
275         } else { // If src == 0, generate a random port number with the high bit set
276                 if(utcp->nconnections >= 32767) {
277                         errno = ENOMEM;
278                         return NULL;
279                 }
280                 src = rand() | 0x8000;
281                 while(find_connection(utcp, src, dst))
282                         src++;
283         }
284
285         // Allocate memory for the new connection
286
287         if(utcp->nconnections >= utcp->nallocated) {
288                 if(!utcp->nallocated)
289                         utcp->nallocated = 4;
290                 else
291                         utcp->nallocated *= 2;
292                 struct utcp_connection **new_array = realloc(utcp->connections, utcp->nallocated * sizeof *utcp->connections);
293                 if(!new_array)
294                         return NULL;
295                 utcp->connections = new_array;
296         }
297
298         struct utcp_connection *c = calloc(1, sizeof *c);
299         if(!c)
300                 return NULL;
301
302         if(!buffer_init(&c->sndbuf, DEFAULT_SNDBUFSIZE, DEFAULT_MAXSNDBUFSIZE)) {
303                 free(c);
304                 return NULL;
305         }
306
307         if(!buffer_init(&c->rcvbuf, DEFAULT_RCVBUFSIZE, DEFAULT_MAXRCVBUFSIZE)) {
308                 free(c);
309                 return NULL;
310         }
311
312         // Fill in the details
313
314         c->src = src;
315         c->dst = dst;
316 #ifdef UTCP_DEBUG
317         c->snd.iss = 0;
318 #else
319         c->snd.iss = rand();
320 #endif
321         c->snd.una = c->snd.iss;
322         c->snd.nxt = c->snd.iss + 1;
323         c->rcv.wnd = utcp->mtu;
324         c->snd.last = c->snd.nxt;
325         c->snd.cwnd = utcp->mtu;
326         c->utcp = utcp;
327
328         // Add it to the sorted list of connections
329
330         utcp->connections[utcp->nconnections++] = c;
331         qsort(utcp->connections, utcp->nconnections, sizeof *utcp->connections, compare);
332
333         return c;
334 }
335
336 struct utcp_connection *utcp_connect(struct utcp *utcp, uint16_t dst, utcp_recv_t recv, void *priv) {
337         struct utcp_connection *c = allocate_connection(utcp, 0, dst);
338         if(!c)
339                 return NULL;
340
341         c->recv = recv;
342         c->priv = priv;
343
344         struct hdr hdr;
345
346         hdr.src = c->src;
347         hdr.dst = c->dst;
348         hdr.seq = c->snd.iss;
349         hdr.ack = 0;
350         hdr.wnd = c->rcv.wnd;
351         hdr.ctl = SYN;
352         hdr.aux = 0;
353
354         set_state(c, SYN_SENT);
355
356         print_packet(utcp, "send", &hdr, sizeof hdr);
357         utcp->send(utcp, &hdr, sizeof hdr);
358
359         gettimeofday(&c->conn_timeout, NULL);
360         c->conn_timeout.tv_sec += utcp->timeout;
361
362         return c;
363 }
364
365 void utcp_accept(struct utcp_connection *c, utcp_recv_t recv, void *priv) {
366         if(c->reapable || c->state != SYN_RECEIVED) {
367                 debug("Error: accept() called on invalid connection %p in state %s\n", c, strstate[c->state]);
368                 return;
369         }
370
371         debug("%p accepted, %p %p\n", c, recv, priv);
372         c->recv = recv;
373         c->priv = priv;
374         set_state(c, ESTABLISHED);
375 }
376
377 static void ack(struct utcp_connection *c, bool sendatleastone) {
378         int32_t left = seqdiff(c->snd.last, c->snd.nxt);
379         int32_t cwndleft = c->snd.cwnd - seqdiff(c->snd.nxt, c->snd.una);
380         debug("cwndleft = %d\n", cwndleft);
381
382         assert(left >= 0);
383
384         if(cwndleft <= 0)
385                 cwndleft = 0;
386
387         if(cwndleft < left)
388                 left = cwndleft;
389
390         if(!left && !sendatleastone)
391                 return;
392
393         struct {
394                 struct hdr hdr;
395                 char data[];
396         } *pkt;
397
398         pkt = malloc(sizeof pkt->hdr + c->utcp->mtu);
399         if(!pkt)
400                 return;
401
402         pkt->hdr.src = c->src;
403         pkt->hdr.dst = c->dst;
404         pkt->hdr.ack = c->rcv.nxt;
405         pkt->hdr.wnd = c->snd.wnd;
406         pkt->hdr.ctl = ACK;
407         pkt->hdr.aux = 0;
408
409         do {
410                 uint32_t seglen = left > c->utcp->mtu ? c->utcp->mtu : left;
411                 pkt->hdr.seq = c->snd.nxt;
412
413                 buffer_copy(&c->sndbuf, pkt->data, seqdiff(c->snd.nxt, c->snd.una), seglen);
414
415                 c->snd.nxt += seglen;
416                 left -= seglen;
417
418                 if(seglen && fin_wanted(c, c->snd.nxt)) {
419                         seglen--;
420                         pkt->hdr.ctl |= FIN;
421                 }
422
423                 print_packet(c->utcp, "send", pkt, sizeof pkt->hdr + seglen);
424                 c->utcp->send(c->utcp, pkt, sizeof pkt->hdr + seglen);
425         } while(left);
426
427         free(pkt);
428 }
429
430 ssize_t utcp_send(struct utcp_connection *c, const void *data, size_t len) {
431         if(c->reapable) {
432                 debug("Error: send() called on closed connection %p\n", c);
433                 errno = EBADF;
434                 return -1;
435         }
436
437         switch(c->state) {
438         case CLOSED:
439         case LISTEN:
440         case SYN_SENT:
441         case SYN_RECEIVED:
442                 debug("Error: send() called on unconnected connection %p\n", c);
443                 errno = ENOTCONN;
444                 return -1;
445         case ESTABLISHED:
446         case CLOSE_WAIT:
447                 break;
448         case FIN_WAIT_1:
449         case FIN_WAIT_2:
450         case CLOSING:
451         case LAST_ACK:
452         case TIME_WAIT:
453                 debug("Error: send() called on closing connection %p\n", c);
454                 errno = EPIPE;
455                 return -1;
456         }
457
458         // Add data to send buffer
459
460         if(!len)
461                 return 0;
462
463         if(!data) {
464                 errno = EFAULT;
465                 return -1;
466         }
467
468         len = buffer_put(&c->sndbuf, data, len);
469         if(len <= 0) {
470                 errno = EWOULDBLOCK;
471                 return 0;
472         }
473
474         c->snd.last += len;
475         ack(c, false);
476         return len;
477 }
478
479 static void swap_ports(struct hdr *hdr) {
480         uint16_t tmp = hdr->src;
481         hdr->src = hdr->dst;
482         hdr->dst = tmp;
483 }
484
485 static void retransmit(struct utcp_connection *c) {
486         if(c->state == CLOSED || c->snd.nxt == c->snd.una)
487                 return;
488
489         struct utcp *utcp = c->utcp;
490
491         struct {
492                 struct hdr hdr;
493                 char data[];
494         } *pkt;
495
496         pkt = malloc(sizeof pkt->hdr + c->utcp->mtu);
497         if(!pkt)
498                 return;
499
500         pkt->hdr.src = c->src;
501         pkt->hdr.dst = c->dst;
502
503         switch(c->state) {
504                 case SYN_SENT:
505                         // Send our SYN again
506                         pkt->hdr.seq = c->snd.iss;
507                         pkt->hdr.ack = 0;
508                         pkt->hdr.wnd = c->rcv.wnd;
509                         pkt->hdr.ctl = SYN;
510                         print_packet(c->utcp, "rtrx", pkt, sizeof pkt->hdr);
511                         utcp->send(utcp, pkt, sizeof pkt->hdr);
512                         break;
513
514                 case SYN_RECEIVED:
515                         // Send SYNACK again
516                         pkt->hdr.seq = c->snd.nxt;
517                         pkt->hdr.ack = c->rcv.nxt;
518                         pkt->hdr.ctl = SYN | ACK;
519                         print_packet(c->utcp, "rtrx", pkt, sizeof pkt->hdr);
520                         utcp->send(utcp, pkt, sizeof pkt->hdr);
521                         break;
522
523                 case ESTABLISHED:
524                 case FIN_WAIT_1:
525                 case CLOSE_WAIT:
526                 case CLOSING:
527                 case LAST_ACK:
528                         // Send unacked data again.
529                         pkt->hdr.seq = c->snd.una;
530                         pkt->hdr.ack = c->rcv.nxt;
531                         pkt->hdr.ctl = ACK;
532                         uint32_t len = seqdiff(c->snd.last, c->snd.una);
533                         if(len > utcp->mtu)
534                                 len = utcp->mtu;
535                         if(fin_wanted(c, c->snd.una + len)) {
536                                 len--;
537                                 pkt->hdr.ctl |= FIN;
538                         }
539                         c->snd.nxt = c->snd.una + len;
540                         c->snd.cwnd = utcp->mtu; // reduce cwnd on retransmit
541                         buffer_copy(&c->sndbuf, pkt->data, 0, len);
542                         print_packet(c->utcp, "rtrx", pkt, sizeof pkt->hdr + len);
543                         utcp->send(utcp, pkt, sizeof pkt->hdr + len);
544                         break;
545
546                 case CLOSED:
547                 case LISTEN:
548                 case TIME_WAIT:
549                 case FIN_WAIT_2:
550                         // We shouldn't need to retransmit anything in this state.
551 #ifdef UTCP_DEBUG
552                         abort();
553 #endif
554                         timerclear(&c->rtrx_timeout);
555                         break;
556         }
557
558         free(pkt);
559 }
560
561 // Update receive buffer and SACK entries after consuming data.
562 static void sack_consume(struct utcp_connection *c, size_t len) {
563         debug("sack_consume %zu\n", len);
564         if(len > c->rcvbuf.used)
565                 abort();
566
567         buffer_get(&c->rcvbuf, NULL, len);
568
569         for(int i = 0; i < NSACKS && c->sacks[i].len; ) {
570                 if(len < c->sacks[i].offset) {
571                         c->sacks[i].offset -= len;
572                         i++;
573                 } else if(len < c->sacks[i].offset + c->sacks[i].len) {
574                         c->sacks[i].offset = 0;
575                         c->sacks[i].len -= len - c->sacks[i].offset;
576                         i++;
577                 } else {
578                         if(i < NSACKS - 1) {
579                                 memmove(&c->sacks[i], &c->sacks[i + 1], (NSACKS - 1 - i) * sizeof c->sacks[i]);
580                                 c->sacks[i + 1].len = 0;
581                         } else {
582                                 c->sacks[i].len = 0;
583                                 break;
584                         }
585                 }
586         }
587
588         for(int i = 0; i < NSACKS && c->sacks[i].len; i++)
589                 debug("SACK[%d] offset %u len %u\n", i, c->sacks[i].offset, c->sacks[i].len);
590 }
591
592 static void handle_out_of_order(struct utcp_connection *c, uint32_t offset, const void *data, size_t len) {
593         debug("out of order packet, offset %u\n", offset);
594         // Packet loss or reordering occured. Store the data in the buffer.
595         ssize_t rxd = buffer_put_at(&c->rcvbuf, offset, data, len);
596         if(rxd < len)
597                 abort();
598
599         // Make note of where we put it.
600         for(int i = 0; i < NSACKS; i++) {
601                 if(!c->sacks[i].len) { // nothing to merge, add new entry
602                         debug("New SACK entry %d\n", i);
603                         c->sacks[i].offset = offset;
604                         c->sacks[i].len = rxd;
605                         break;
606                 } else if(offset < c->sacks[i].offset) {
607                         if(offset + rxd < c->sacks[i].offset) { // insert before
608                                 if(!c->sacks[NSACKS - 1].len) { // only if room left
609                                         debug("Insert SACK entry at %d\n", i);
610                                         memmove(&c->sacks[i + 1], &c->sacks[i], (NSACKS - i - 1) * sizeof c->sacks[i]);
611                                         c->sacks[i].offset = offset;
612                                         c->sacks[i].len = rxd;
613                                 }
614                                 break;
615                         } else { // merge
616                                 debug("Merge with start of SACK entry at %d\n", i);
617                                 c->sacks[i].offset = offset;
618                                 break;
619                         }
620                 } else if(offset <= c->sacks[i].offset + c->sacks[i].len) {
621                         if(offset + rxd > c->sacks[i].offset + c->sacks[i].len) { // merge
622                                 debug("Merge with end of SACK entry at %d\n", i);
623                                 c->sacks[i].len = offset + rxd - c->sacks[i].offset;
624                                 // TODO: handle potential merge with next entry
625                         }
626                         break;
627                 }
628         }
629
630         for(int i = 0; i < NSACKS && c->sacks[i].len; i++)
631                 debug("SACK[%d] offset %u len %u\n", i, c->sacks[i].offset, c->sacks[i].len);
632 }
633
634 static void handle_in_order(struct utcp_connection *c, const void *data, size_t len) {
635         // Check if we can process out-of-order data now.
636         if(c->sacks[0].len && len >= c->sacks[0].offset) { // TODO: handle overlap with second SACK
637                 debug("incoming packet len %zu connected with SACK at %u\n", len, c->sacks[0].offset);
638                 buffer_put_at(&c->rcvbuf, 0, data, len); // TODO: handle return value
639                 len = max(len, c->sacks[0].offset + c->sacks[0].len);
640                 data = c->rcvbuf.data;
641         }
642
643         if(c->recv) {
644                 ssize_t rxd = c->recv(c, data, len);
645                 if(rxd != len) {
646                         // TODO: handle the application not accepting all data.
647                         abort();
648                 }
649         }
650
651         if(c->rcvbuf.used)
652                 sack_consume(c, len);
653
654         c->rcv.nxt += len;
655 }
656
657
658 static void handle_incoming_data(struct utcp_connection *c, uint32_t seq, const void *data, size_t len) {
659         uint32_t offset = seqdiff(seq, c->rcv.nxt);
660         if(offset + len > c->rcvbuf.maxsize)
661                 abort();
662
663         if(offset)
664                 handle_out_of_order(c, offset, data, len);
665         else
666                 handle_in_order(c, data, len);
667 }
668
669
670 ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) {
671         if(!utcp) {
672                 errno = EFAULT;
673                 return -1;
674         }
675
676         if(!len)
677                 return 0;
678
679         if(!data) {
680                 errno = EFAULT;
681                 return -1;
682         }
683
684         print_packet(utcp, "recv", data, len);
685
686         // Drop packets smaller than the header
687
688         struct hdr hdr;
689         if(len < sizeof hdr) {
690                 errno = EBADMSG;
691                 return -1;
692         }
693
694         // Make a copy from the potentially unaligned data to a struct hdr
695
696         memcpy(&hdr, data, sizeof hdr);
697         data += sizeof hdr;
698         len -= sizeof hdr;
699
700         // Drop packets with an unknown CTL flag
701
702         if(hdr.ctl & ~(SYN | ACK | RST | FIN)) {
703                 errno = EBADMSG;
704                 return -1;
705         }
706
707         // Try to match the packet to an existing connection
708
709         struct utcp_connection *c = find_connection(utcp, hdr.dst, hdr.src);
710
711         // Is it for a new connection?
712
713         if(!c) {
714                 // Ignore RST packets
715
716                 if(hdr.ctl & RST)
717                         return 0;
718
719                 // Is it a SYN packet and are we LISTENing?
720
721                 if(hdr.ctl & SYN && !(hdr.ctl & ACK) && utcp->accept) {
722                         // If we don't want to accept it, send a RST back
723                         if((utcp->pre_accept && !utcp->pre_accept(utcp, hdr.dst))) {
724                                 len = 1;
725                                 goto reset;
726                         }
727
728                         // Try to allocate memory, otherwise send a RST back
729                         c = allocate_connection(utcp, hdr.dst, hdr.src);
730                         if(!c) {
731                                 len = 1;
732                                 goto reset;
733                         }
734
735                         // Return SYN+ACK, go to SYN_RECEIVED state
736                         c->snd.wnd = hdr.wnd;
737                         c->rcv.irs = hdr.seq;
738                         c->rcv.nxt = c->rcv.irs + 1;
739                         set_state(c, SYN_RECEIVED);
740
741                         hdr.dst = c->dst;
742                         hdr.src = c->src;
743                         hdr.ack = c->rcv.irs + 1;
744                         hdr.seq = c->snd.iss;
745                         hdr.ctl = SYN | ACK;
746                         print_packet(c->utcp, "send", &hdr, sizeof hdr);
747                         utcp->send(utcp, &hdr, sizeof hdr);
748                 } else {
749                         // No, we don't want your packets, send a RST back
750                         len = 1;
751                         goto reset;
752                 }
753
754                 return 0;
755         }
756
757         debug("%p state %s\n", c->utcp, strstate[c->state]);
758
759         // In case this is for a CLOSED connection, ignore the packet.
760         // TODO: make it so incoming packets can never match a CLOSED connection.
761
762         if(c->state == CLOSED)
763                 return 0;
764
765         // It is for an existing connection.
766
767         uint32_t prevrcvnxt = c->rcv.nxt;
768
769         // 1. Drop invalid packets.
770
771         // 1a. Drop packets that should not happen in our current state.
772
773         switch(c->state) {
774         case SYN_SENT:
775         case SYN_RECEIVED:
776         case ESTABLISHED:
777         case FIN_WAIT_1:
778         case FIN_WAIT_2:
779         case CLOSE_WAIT:
780         case CLOSING:
781         case LAST_ACK:
782         case TIME_WAIT:
783                 break;
784         default:
785 #ifdef UTCP_DEBUG
786                 abort();
787 #endif
788                 break;
789         }
790
791         // 1b. Drop packets with a sequence number not in our receive window.
792
793         bool acceptable;
794
795         if(c->state == SYN_SENT)
796                 acceptable = true;
797
798         // TODO: handle packets overlapping c->rcv.nxt.
799 #if 1
800         // Only use this when accepting out-of-order packets.
801         else if(len == 0)
802                 acceptable = seqdiff(hdr.seq, c->rcv.nxt) >= 0;
803         else
804                 acceptable = seqdiff(hdr.seq, c->rcv.nxt) >= 0 && seqdiff(hdr.seq, c->rcv.nxt) + len <= c->rcvbuf.maxsize;
805 #else
806         if(c->state != SYN_SENT)
807                 acceptable = hdr.seq == c->rcv.nxt;
808 #endif
809
810         if(!acceptable) {
811                 debug("Packet not acceptable, %u <= %u + %zu < %u\n", c->rcv.nxt, hdr.seq, len, c->rcv.nxt + c->rcvbuf.maxsize);
812                 // Ignore unacceptable RST packets.
813                 if(hdr.ctl & RST)
814                         return 0;
815                 // Otherwise, send an ACK back in the hope things improve.
816                 ack(c, true);
817                 return 0;
818         }
819
820         c->snd.wnd = hdr.wnd; // TODO: move below
821
822         // 1c. Drop packets with an invalid ACK.
823         // ackno should not roll back, and it should also not be bigger than what we ever could have sent
824         // (= snd.una + c->sndbuf.used).
825
826         if(hdr.ctl & ACK &&
827                         ((seqdiff(hdr.ack, c->snd.una + c->sndbuf.used) > 0 &&
828                           seqdiff(hdr.ack, c->snd.nxt) > 0) // TODO: simplify this if
829                          || seqdiff(hdr.ack, c->snd.una) < 0)) {
830                 debug("Packet ack seqno out of range, %u <= %u < %u\n", c->snd.una, hdr.ack, c->snd.una + c->sndbuf.used);
831                 // Ignore unacceptable RST packets.
832                 if(hdr.ctl & RST)
833                         return 0;
834                 goto reset;
835         }
836
837         // 2. Handle RST packets
838
839         if(hdr.ctl & RST) {
840                 switch(c->state) {
841                 case SYN_SENT:
842                         if(!(hdr.ctl & ACK))
843                                 return 0;
844                         // The peer has refused our connection.
845                         set_state(c, CLOSED);
846                         errno = ECONNREFUSED;
847                         if(c->recv)
848                                 c->recv(c, NULL, 0);
849                         return 0;
850                 case SYN_RECEIVED:
851                         if(hdr.ctl & ACK)
852                                 return 0;
853                         // We haven't told the application about this connection yet. Silently delete.
854                         free_connection(c);
855                         return 0;
856                 case ESTABLISHED:
857                 case FIN_WAIT_1:
858                 case FIN_WAIT_2:
859                 case CLOSE_WAIT:
860                         if(hdr.ctl & ACK)
861                                 return 0;
862                         // The peer has aborted our connection.
863                         set_state(c, CLOSED);
864                         errno = ECONNRESET;
865                         if(c->recv)
866                                 c->recv(c, NULL, 0);
867                         return 0;
868                 case CLOSING:
869                 case LAST_ACK:
870                 case TIME_WAIT:
871                         if(hdr.ctl & ACK)
872                                 return 0;
873                         // As far as the application is concerned, the connection has already been closed.
874                         // If it has called utcp_close() already, we can immediately free this connection.
875                         if(c->reapable) {
876                                 free_connection(c);
877                                 return 0;
878                         }
879                         // Otherwise, immediately move to the CLOSED state.
880                         set_state(c, CLOSED);
881                         return 0;
882                 default:
883 #ifdef UTCP_DEBUG
884                         abort();
885 #endif
886                         break;
887                 }
888         }
889
890         // 3. Advance snd.una
891
892         uint32_t advanced = seqdiff(hdr.ack, c->snd.una);
893         prevrcvnxt = c->rcv.nxt;
894
895         if(advanced) {
896                 int32_t data_acked = advanced;
897
898                 switch(c->state) {
899                         case SYN_SENT:
900                         case SYN_RECEIVED:
901                                 data_acked--;
902                                 break;
903                         // TODO: handle FIN as well.
904                         default:
905                                 break;
906                 }
907
908                 assert(data_acked >= 0);
909
910                 int32_t bufused = seqdiff(c->snd.last, c->snd.una);
911                 assert(data_acked <= bufused);
912
913                 if(data_acked)
914                         buffer_get(&c->sndbuf, NULL, data_acked);
915
916                 // Also advance snd.nxt if possible
917                 if(seqdiff(c->snd.nxt, hdr.ack) < 0)
918                         c->snd.nxt = hdr.ack;
919
920                 c->snd.una = hdr.ack;
921
922                 c->dupack = 0;
923                 c->snd.cwnd += utcp->mtu;
924                 if(c->snd.cwnd > c->sndbuf.maxsize)
925                         c->snd.cwnd = c->sndbuf.maxsize;
926
927                 // Check if we have sent a FIN that is now ACKed.
928                 switch(c->state) {
929                 case FIN_WAIT_1:
930                         if(c->snd.una == c->snd.last)
931                                 set_state(c, FIN_WAIT_2);
932                         break;
933                 case CLOSING:
934                         if(c->snd.una == c->snd.last) {
935                                 gettimeofday(&c->conn_timeout, NULL);
936                                 c->conn_timeout.tv_sec += 60;
937                                 set_state(c, TIME_WAIT);
938                         }
939                         break;
940                 default:
941                         break;
942                 }
943         } else {
944                 if(!len) {
945                         c->dupack++;
946                         if(c->dupack == 3) {
947                                 debug("Triplicate ACK\n");
948                                 //TODO: Resend one packet and go to fast recovery mode. See RFC 6582.
949                                 //We do a very simple variant here; reset the nxt pointer to the last acknowledged packet from the peer.
950                                 //Reset the congestion window so we wait for ACKs.
951                                 c->snd.nxt = c->snd.una;
952                                 c->snd.cwnd = utcp->mtu;
953                         }
954                 }
955         }
956
957         // 4. Update timers
958
959         if(advanced) {
960                 timerclear(&c->conn_timeout); // It will be set anew in utcp_timeout() if c->snd.una != c->snd.nxt.
961                 if(c->snd.una == c->snd.nxt)
962                         timerclear(&c->rtrx_timeout);
963         }
964
965         // 5. Process SYN stuff
966
967         if(hdr.ctl & SYN) {
968                 switch(c->state) {
969                 case SYN_SENT:
970                         // This is a SYNACK. It should always have ACKed the SYN.
971                         if(!advanced)
972                                 goto reset;
973                         c->rcv.irs = hdr.seq;
974                         c->rcv.nxt = hdr.seq;
975                         set_state(c, ESTABLISHED);
976                         // TODO: notify application of this somehow.
977                         break;
978                 case SYN_RECEIVED:
979                 case ESTABLISHED:
980                 case FIN_WAIT_1:
981                 case FIN_WAIT_2:
982                 case CLOSE_WAIT:
983                 case CLOSING:
984                 case LAST_ACK:
985                 case TIME_WAIT:
986                         // Ehm, no. We should never receive a second SYN.
987                         goto reset;
988                 default:
989 #ifdef UTCP_DEBUG
990                         abort();
991 #endif
992                         return 0;
993                 }
994
995                 // SYN counts as one sequence number
996                 c->rcv.nxt++;
997         }
998
999         // 6. Process new data
1000
1001         if(c->state == SYN_RECEIVED) {
1002                 // This is the ACK after the SYNACK. It should always have ACKed the SYNACK.
1003                 if(!advanced)
1004                         goto reset;
1005
1006                 // Are we still LISTENing?
1007                 if(utcp->accept)
1008                         utcp->accept(c, c->src);
1009
1010                 if(c->state != ESTABLISHED) {
1011                         set_state(c, CLOSED);
1012                         c->reapable = true;
1013                         goto reset;
1014                 }
1015         }
1016
1017         if(len) {
1018                 switch(c->state) {
1019                 case SYN_SENT:
1020                 case SYN_RECEIVED:
1021                         // This should never happen.
1022 #ifdef UTCP_DEBUG
1023                         abort();
1024 #endif
1025                         return 0;
1026                 case ESTABLISHED:
1027                 case FIN_WAIT_1:
1028                 case FIN_WAIT_2:
1029                         break;
1030                 case CLOSE_WAIT:
1031                 case CLOSING:
1032                 case LAST_ACK:
1033                 case TIME_WAIT:
1034                         // Ehm no, We should never receive more data after a FIN.
1035                         goto reset;
1036                 default:
1037 #ifdef UTCP_DEBUG
1038                         abort();
1039 #endif
1040                         return 0;
1041                 }
1042
1043                 handle_incoming_data(c, hdr.seq, data, len);
1044         }
1045
1046         // 7. Process FIN stuff
1047
1048         if((hdr.ctl & FIN) && hdr.seq + len == c->rcv.nxt) {
1049                 switch(c->state) {
1050                 case SYN_SENT:
1051                 case SYN_RECEIVED:
1052                         // This should never happen.
1053 #ifdef UTCP_DEBUG
1054                         abort();
1055 #endif
1056                         break;
1057                 case ESTABLISHED:
1058                         set_state(c, CLOSE_WAIT);
1059                         break;
1060                 case FIN_WAIT_1:
1061                         set_state(c, CLOSING);
1062                         break;
1063                 case FIN_WAIT_2:
1064                         gettimeofday(&c->conn_timeout, NULL);
1065                         c->conn_timeout.tv_sec += 60;
1066                         set_state(c, TIME_WAIT);
1067                         break;
1068                 case CLOSE_WAIT:
1069                 case CLOSING:
1070                 case LAST_ACK:
1071                 case TIME_WAIT:
1072                         // Ehm, no. We should never receive a second FIN.
1073                         goto reset;
1074                 default:
1075 #ifdef UTCP_DEBUG
1076                         abort();
1077 #endif
1078                         break;
1079                 }
1080
1081                 // FIN counts as one sequence number
1082                 c->rcv.nxt++;
1083                 len++;
1084
1085                 // Inform the application that the peer closed the connection.
1086                 if(c->recv) {
1087                         errno = 0;
1088                         c->recv(c, NULL, 0);
1089                 }
1090         }
1091
1092         // Now we send something back if:
1093         // - we advanced rcv.nxt (ie, we got some data that needs to be ACKed)
1094         //   -> sendatleastone = true
1095         // - or we got an ack, so we should maybe send a bit more data
1096         //   -> sendatleastone = false
1097
1098 ack:
1099         ack(c, prevrcvnxt != c->rcv.nxt);
1100         return 0;
1101
1102 reset:
1103         swap_ports(&hdr);
1104         hdr.wnd = 0;
1105         if(hdr.ctl & ACK) {
1106                 hdr.seq = hdr.ack;
1107                 hdr.ctl = RST;
1108         } else {
1109                 hdr.ack = hdr.seq + len;
1110                 hdr.seq = 0;
1111                 hdr.ctl = RST | ACK;
1112         }
1113         print_packet(utcp, "send", &hdr, sizeof hdr);
1114         utcp->send(utcp, &hdr, sizeof hdr);
1115         return 0;
1116
1117 }
1118
1119 int utcp_shutdown(struct utcp_connection *c, int dir) {
1120         debug("%p shutdown %d at %u\n", c ? c->utcp : NULL, dir, c ? c->snd.last : 0);
1121         if(!c) {
1122                 errno = EFAULT;
1123                 return -1;
1124         }
1125
1126         if(c->reapable) {
1127                 debug("Error: shutdown() called on closed connection %p\n", c);
1128                 errno = EBADF;
1129                 return -1;
1130         }
1131
1132         if(!(dir == UTCP_SHUT_RD || dir == UTCP_SHUT_WR || dir == UTCP_SHUT_RDWR)) {
1133                 errno = EINVAL;
1134                 return -1;
1135         }
1136
1137         // TCP does not have a provision for stopping incoming packets.
1138         // The best we can do is to just ignore them.
1139         if(dir == UTCP_SHUT_RD || dir == UTCP_SHUT_RDWR)
1140                 c->recv = NULL;
1141
1142         // The rest of the code deals with shutting down writes.
1143         if(dir == UTCP_SHUT_RD)
1144                 return 0;
1145
1146         switch(c->state) {
1147         case CLOSED:
1148         case LISTEN:
1149                 errno = ENOTCONN;
1150                 return -1;
1151
1152         case SYN_SENT:
1153                 set_state(c, CLOSED);
1154                 return 0;
1155
1156         case SYN_RECEIVED:
1157         case ESTABLISHED:
1158                 set_state(c, FIN_WAIT_1);
1159                 break;
1160         case FIN_WAIT_1:
1161         case FIN_WAIT_2:
1162                 return 0;
1163         case CLOSE_WAIT:
1164                 set_state(c, CLOSING);
1165                 break;
1166
1167         case CLOSING:
1168         case LAST_ACK:
1169         case TIME_WAIT:
1170                 return 0;
1171         }
1172
1173         c->snd.last++;
1174
1175         ack(c, false);
1176         return 0;
1177 }
1178
1179 int utcp_close(struct utcp_connection *c) {
1180         if(utcp_shutdown(c, SHUT_RDWR))
1181                 return -1;
1182         c->recv = NULL;
1183         c->poll = NULL;
1184         c->reapable = true;
1185         return 0;
1186 }
1187
1188 int utcp_abort(struct utcp_connection *c) {
1189         if(!c) {
1190                 errno = EFAULT;
1191                 return -1;
1192         }
1193
1194         if(c->reapable) {
1195                 debug("Error: abort() called on closed connection %p\n", c);
1196                 errno = EBADF;
1197                 return -1;
1198         }
1199
1200         c->recv = NULL;
1201         c->poll = NULL;
1202         c->reapable = true;
1203
1204         switch(c->state) {
1205         case CLOSED:
1206                 return 0;
1207         case LISTEN:
1208         case SYN_SENT:
1209         case CLOSING:
1210         case LAST_ACK:
1211         case TIME_WAIT:
1212                 set_state(c, CLOSED);
1213                 return 0;
1214
1215         case SYN_RECEIVED:
1216         case ESTABLISHED:
1217         case FIN_WAIT_1:
1218         case FIN_WAIT_2:
1219         case CLOSE_WAIT:
1220                 set_state(c, CLOSED);
1221                 break;
1222         }
1223
1224         // Send RST
1225
1226         struct hdr hdr;
1227
1228         hdr.src = c->src;
1229         hdr.dst = c->dst;
1230         hdr.seq = c->snd.nxt;
1231         hdr.ack = 0;
1232         hdr.wnd = 0;
1233         hdr.ctl = RST;
1234
1235         print_packet(c->utcp, "send", &hdr, sizeof hdr);
1236         c->utcp->send(c->utcp, &hdr, sizeof hdr);
1237         return 0;
1238 }
1239
1240 /* Handle timeouts.
1241  * One call to this function will loop through all connections,
1242  * checking if something needs to be resent or not.
1243  * The return value is the time to the next timeout in milliseconds,
1244  * or maybe a negative value if the timeout is infinite.
1245  */
1246 struct timeval utcp_timeout(struct utcp *utcp) {
1247         struct timeval now;
1248         gettimeofday(&now, NULL);
1249         struct timeval next = {now.tv_sec + 3600, now.tv_usec};
1250
1251         for(int i = 0; i < utcp->nconnections; i++) {
1252                 struct utcp_connection *c = utcp->connections[i];
1253                 if(!c)
1254                         continue;
1255
1256                 if(c->state == CLOSED) {
1257                         if(c->reapable) {
1258                                 debug("Reaping %p\n", c);
1259                                 free_connection(c);
1260                                 i--;
1261                         }
1262                         continue;
1263                 }
1264
1265                 if(timerisset(&c->conn_timeout) && timercmp(&c->conn_timeout, &now, <)) {
1266                         errno = ETIMEDOUT;
1267                         c->state = CLOSED;
1268                         if(c->recv)
1269                                 c->recv(c, NULL, 0);
1270                         continue;
1271                 }
1272
1273                 if(timerisset(&c->rtrx_timeout) && timercmp(&c->rtrx_timeout, &now, <)) {
1274                         retransmit(c);
1275                 }
1276
1277                 if(c->poll && buffer_free(&c->sndbuf) && (c->state == ESTABLISHED || c->state == CLOSE_WAIT))
1278                         c->poll(c, buffer_free(&c->sndbuf));
1279
1280                 if(timerisset(&c->conn_timeout) && timercmp(&c->conn_timeout, &next, <))
1281                         next = c->conn_timeout;
1282
1283                 if(c->snd.nxt != c->snd.una) {
1284                         c->rtrx_timeout = now;
1285                         c->rtrx_timeout.tv_sec++;
1286                 } else {
1287                         timerclear(&c->rtrx_timeout);
1288                 }
1289
1290                 if(timerisset(&c->rtrx_timeout) && timercmp(&c->rtrx_timeout, &next, <))
1291                         next = c->rtrx_timeout;
1292         }
1293
1294         struct timeval diff;
1295         timersub(&next, &now, &diff);
1296         return diff;
1297 }
1298
1299 bool utcp_is_active(struct utcp *utcp) {
1300         if(!utcp)
1301                 return false;
1302
1303         for(int i = 0; i < utcp->nconnections; i++)
1304                 if(utcp->connections[i]->state != CLOSED && utcp->connections[i]->state != TIME_WAIT)
1305                         return true;
1306
1307         return false;
1308 }
1309
1310 struct utcp *utcp_init(utcp_accept_t accept, utcp_pre_accept_t pre_accept, utcp_send_t send, void *priv) {
1311         if(!send) {
1312                 errno = EFAULT;
1313                 return NULL;
1314         }
1315
1316         struct utcp *utcp = calloc(1, sizeof *utcp);
1317         if(!utcp)
1318                 return NULL;
1319
1320         utcp->accept = accept;
1321         utcp->pre_accept = pre_accept;
1322         utcp->send = send;
1323         utcp->priv = priv;
1324         utcp->mtu = DEFAULT_MTU;
1325         utcp->timeout = DEFAULT_USER_TIMEOUT; // s
1326
1327         return utcp;
1328 }
1329
1330 void utcp_exit(struct utcp *utcp) {
1331         if(!utcp)
1332                 return;
1333         for(int i = 0; i < utcp->nconnections; i++) {
1334                 if(!utcp->connections[i]->reapable)
1335                         debug("Warning, freeing unclosed connection %p\n", utcp->connections[i]);
1336                 buffer_exit(&utcp->connections[i]->sndbuf);
1337                 free(utcp->connections[i]);
1338         }
1339         free(utcp->connections);
1340         free(utcp);
1341 }
1342
1343 uint16_t utcp_get_mtu(struct utcp *utcp) {
1344         return utcp ? utcp->mtu : 0;
1345 }
1346
1347 void utcp_set_mtu(struct utcp *utcp, uint16_t mtu) {
1348         // TODO: handle overhead of the header
1349         if(utcp)
1350                 utcp->mtu = mtu;
1351 }
1352
1353 int utcp_get_user_timeout(struct utcp *u) {
1354         return u ? u->timeout : 0;
1355 }
1356
1357 void utcp_set_user_timeout(struct utcp *u, int timeout) {
1358         if(u)
1359                 u->timeout = timeout;
1360 }
1361
1362 size_t utcp_get_sndbuf(struct utcp_connection *c) {
1363         return c ? c->sndbuf.maxsize : 0;
1364 }
1365
1366 size_t utcp_get_sndbuf_free(struct utcp_connection *c) {
1367         if(c && (c->state == ESTABLISHED || c->state == CLOSE_WAIT))
1368                 return buffer_free(&c->sndbuf);
1369         else
1370                 return 0;
1371 }
1372
1373 void utcp_set_sndbuf(struct utcp_connection *c, size_t size) {
1374         if(!c)
1375                 return;
1376         c->sndbuf.maxsize = size;
1377         if(c->sndbuf.maxsize != size)
1378                 c->sndbuf.maxsize = -1;
1379 }
1380
1381 size_t utcp_get_rcvbuf(struct utcp_connection *c) {
1382         return c ? c->rcvbuf.maxsize : 0;
1383 }
1384
1385 size_t utcp_get_rcvbuf_free(struct utcp_connection *c) {
1386         if(c && (c->state == ESTABLISHED || c->state == CLOSE_WAIT))
1387                 return buffer_free(&c->rcvbuf);
1388         else
1389                 return 0;
1390 }
1391
1392 void utcp_set_rcvbuf(struct utcp_connection *c, size_t size) {
1393         if(!c)
1394                 return;
1395         c->rcvbuf.maxsize = size;
1396         if(c->rcvbuf.maxsize != size)
1397                 c->rcvbuf.maxsize = -1;
1398 }
1399
1400 bool utcp_get_nodelay(struct utcp_connection *c) {
1401         return c ? c->nodelay : false;
1402 }
1403
1404 void utcp_set_nodelay(struct utcp_connection *c, bool nodelay) {
1405         if(c)
1406                 c->nodelay = nodelay;
1407 }
1408
1409 bool utcp_get_keepalive(struct utcp_connection *c) {
1410         return c ? c->keepalive : false;
1411 }
1412
1413 void utcp_set_keepalive(struct utcp_connection *c, bool keepalive) {
1414         if(c)
1415                 c->keepalive = keepalive;
1416 }
1417
1418 size_t utcp_get_outq(struct utcp_connection *c) {
1419         return c ? seqdiff(c->snd.nxt, c->snd.una) : 0;
1420 }
1421
1422 void utcp_set_recv_cb(struct utcp_connection *c, utcp_recv_t recv) {
1423         if(c)
1424                 c->recv = recv;
1425 }
1426
1427 void utcp_set_poll_cb(struct utcp_connection *c, utcp_poll_t poll) {
1428         if(c)
1429                 c->poll = poll;
1430 }
1431
1432 void utcp_set_accept_cb(struct utcp *utcp, utcp_accept_t accept, utcp_pre_accept_t pre_accept) {
1433         if(utcp) {
1434                 utcp->accept = accept;
1435                 utcp->pre_accept = pre_accept;
1436         }
1437 }