]> git.meshlink.io Git - utcp/blob - utcp.c
Add a receive buffer.
[utcp] / utcp.c
1 /*
2     utcp.c -- Userspace TCP
3     Copyright (C) 2014 Guus Sliepen <guus@tinc-vpn.org>
4
5     This program is free software; you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation; either version 2 of the License, or
8     (at your option) any later version.
9
10     This program is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14
15     You should have received a copy of the GNU General Public License along
16     with this program; if not, write to the Free Software Foundation, Inc.,
17     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #define _GNU_SOURCE
21
22 #include <assert.h>
23 #include <errno.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <stdint.h>
27 #include <stdbool.h>
28 #include <string.h>
29 #include <unistd.h>
30 #include <sys/time.h>
31 #include <sys/socket.h>
32
33 #include "utcp_priv.h"
34
35 #ifndef EBADMSG
36 #define EBADMSG         104
37 #endif
38
39 #ifndef SHUT_RDWR
40 #define SHUT_RDWR 2
41 #endif
42
43 #ifdef poll
44 #undef poll
45 #endif
46
47 #ifndef timersub
48 #define timersub(a, b, r) do {\
49         (r)->tv_sec = (a)->tv_sec - (b)->tv_sec;\
50         (r)->tv_usec = (a)->tv_usec - (b)->tv_usec;\
51         if((r)->tv_usec < 0)\
52                 (r)->tv_sec--, (r)->tv_usec += 1000000;\
53 } while (0)
54 #endif
55
56 #ifndef max
57 #define max(a, b) ((a) > (b) ? (a) : (b))
58 #endif
59
60 #ifdef UTCP_DEBUG
61 #include <stdarg.h>
62
63 static void debug(const char *format, ...) {
64         va_list ap;
65         va_start(ap, format);
66         vfprintf(stderr, format, ap);
67         va_end(ap);
68 }
69
70 static void print_packet(struct utcp *utcp, const char *dir, const void *pkt, size_t len) {
71         struct hdr hdr;
72         if(len < sizeof hdr) {
73                 debug("%p %s: short packet (%zu bytes)\n", utcp, dir, len);
74                 return;
75         }
76
77         memcpy(&hdr, pkt, sizeof hdr);
78         fprintf (stderr, "%p %s: len=%zu, src=%u dst=%u seq=%u ack=%u wnd=%u ctl=", utcp, dir, len, hdr.src, hdr.dst, hdr.seq, hdr.ack, hdr.wnd);
79         if(hdr.ctl & SYN)
80                 debug("SYN");
81         if(hdr.ctl & RST)
82                 debug("RST");
83         if(hdr.ctl & FIN)
84                 debug("FIN");
85         if(hdr.ctl & ACK)
86                 debug("ACK");
87
88         if(len > sizeof hdr) {
89                 debug(" data=");
90                 for(int i = sizeof hdr; i < len; i++) {
91                         const char *data = pkt;
92                         debug("%c", data[i] >= 32 ? data[i] : '.');
93                 }
94         }
95
96         debug("\n");
97 }
98 #else
99 #define debug(...)
100 #define print_packet(...)
101 #endif
102
103 static void set_state(struct utcp_connection *c, enum state state) {
104         c->state = state;
105         if(state == ESTABLISHED)
106                 timerclear(&c->conn_timeout);
107         debug("%p new state: %s\n", c->utcp, strstate[state]);
108 }
109
110 static bool fin_wanted(struct utcp_connection *c, uint32_t seq) {
111         if(seq != c->snd.last)
112                 return false;
113         switch(c->state) {
114         case FIN_WAIT_1:
115         case CLOSING:
116         case LAST_ACK:
117                 return true;
118         default:
119                 return false;
120         }
121 }
122
123 static inline void list_connections(struct utcp *utcp) {
124         debug("%p has %d connections:\n", utcp, utcp->nconnections);
125         for(int i = 0; i < utcp->nconnections; i++)
126                 debug("  %u -> %u state %s\n", utcp->connections[i]->src, utcp->connections[i]->dst, strstate[utcp->connections[i]->state]);
127 }
128
129 static int32_t seqdiff(uint32_t a, uint32_t b) {
130         return a - b;
131 }
132
133 // Buffer functions
134 // TODO: convert to ringbuffers to avoid memmove() operations.
135
136 // Store data into the buffer
137 static ssize_t buffer_put_at(struct buffer *buf, size_t offset, const void *data, size_t len) {
138         if(buf->maxsize <= buf->used)
139                 return 0;
140
141         debug("buffer_put_at %zu %zu %zu\n", buf->used, offset, len);
142
143         size_t required = offset + len;
144         if(required > buf->maxsize) {
145                 if(offset >= buf->maxsize)
146                         return 0;
147                 abort();
148                 len = buf->maxsize - offset;
149                 required = buf->maxsize;
150         }
151
152         if(required > buf->size) {
153                 size_t newsize = buf->size;
154                 if(!newsize) {
155                         newsize = required;
156                 } else {
157                         do {
158                                 newsize *= 2;
159                         } while(newsize < buf->used + len);
160                 }
161                 if(newsize > buf->maxsize)
162                         newsize = buf->maxsize;
163                 char *newdata = realloc(buf->data, newsize);
164                 if(!newdata)
165                         return -1;
166                 buf->data = newdata;
167                 buf->size = newsize;
168         }
169
170         memcpy(buf->data + offset, data, len);
171         if(required > buf->used)
172                 buf->used = required;
173         return len;
174 }
175
176 static ssize_t buffer_put(struct buffer *buf, const void *data, size_t len) {
177         return buffer_put_at(buf, buf->used, data, len);
178 }
179
180 // Get data from the buffer. data can be NULL.
181 static ssize_t buffer_get(struct buffer *buf, void *data, size_t len) {
182         if(len > buf->used)
183                 len = buf->used;
184         if(data)
185                 memcpy(data, buf->data, len);
186         if(len < buf->used)
187                 memmove(buf->data, buf->data + len, buf->used - len);
188         buf->used -= len;
189         return len;
190 }
191
192 // Copy data from the buffer without removing it.
193 static ssize_t buffer_copy(struct buffer *buf, void *data, size_t offset, size_t len) {
194         if(offset >= buf->used)
195                 return 0;
196         if(offset + len > buf->used)
197                 len = buf->used - offset;
198         memcpy(data, buf->data + offset, len);
199         return len;
200 }
201
202 static bool buffer_init(struct buffer *buf, uint32_t len, uint32_t maxlen) {
203         memset(buf, 0, sizeof *buf);
204         if(len) {
205                 buf->data = malloc(len);
206                 if(!buf->data)
207                         return false;
208         }
209         buf->size = len;
210         buf->maxsize = maxlen;
211         return true;
212 }
213
214 static void buffer_exit(struct buffer *buf) {
215         free(buf->data);
216         memset(buf, 0, sizeof *buf);
217 }
218
219 static uint32_t buffer_free(const struct buffer *buf) {
220         return buf->maxsize - buf->used;
221 }
222
223 // Connections are stored in a sorted list.
224 // This gives O(log(N)) lookup time, O(N log(N)) insertion time and O(N) deletion time.
225
226 static int compare(const void *va, const void *vb) {
227         assert(va && vb);
228
229         const struct utcp_connection *a = *(struct utcp_connection **)va;
230         const struct utcp_connection *b = *(struct utcp_connection **)vb;
231
232         assert(a && b);
233         assert(a->src && b->src);
234
235         int c = (int)a->src - (int)b->src;
236         if(c)
237                 return c;
238         c = (int)a->dst - (int)b->dst;
239         return c;
240 }
241
242 static struct utcp_connection *find_connection(const struct utcp *utcp, uint16_t src, uint16_t dst) {
243         if(!utcp->nconnections)
244                 return NULL;
245         struct utcp_connection key = {
246                 .src = src,
247                 .dst = dst,
248         }, *keyp = &key;
249         struct utcp_connection **match = bsearch(&keyp, utcp->connections, utcp->nconnections, sizeof *utcp->connections, compare);
250         return match ? *match : NULL;
251 }
252
253 static void free_connection(struct utcp_connection *c) {
254         struct utcp *utcp = c->utcp;
255         struct utcp_connection **cp = bsearch(&c, utcp->connections, utcp->nconnections, sizeof *utcp->connections, compare);
256
257         assert(cp);
258
259         int i = cp - utcp->connections;
260         memmove(cp, cp + 1, (utcp->nconnections - i - 1) * sizeof *cp);
261         utcp->nconnections--;
262
263         buffer_exit(&c->sndbuf);
264         free(c);
265 }
266
267 static struct utcp_connection *allocate_connection(struct utcp *utcp, uint16_t src, uint16_t dst) {
268         // Check whether this combination of src and dst is free
269
270         if(src) {
271                 if(find_connection(utcp, src, dst)) {
272                         errno = EADDRINUSE;
273                         return NULL;
274                 }
275         } else { // If src == 0, generate a random port number with the high bit set
276                 if(utcp->nconnections >= 32767) {
277                         errno = ENOMEM;
278                         return NULL;
279                 }
280                 src = rand() | 0x8000;
281                 while(find_connection(utcp, src, dst))
282                         src++;
283         }
284
285         // Allocate memory for the new connection
286
287         if(utcp->nconnections >= utcp->nallocated) {
288                 if(!utcp->nallocated)
289                         utcp->nallocated = 4;
290                 else
291                         utcp->nallocated *= 2;
292                 struct utcp_connection **new_array = realloc(utcp->connections, utcp->nallocated * sizeof *utcp->connections);
293                 if(!new_array)
294                         return NULL;
295                 utcp->connections = new_array;
296         }
297
298         struct utcp_connection *c = calloc(1, sizeof *c);
299         if(!c)
300                 return NULL;
301
302         if(!buffer_init(&c->sndbuf, DEFAULT_SNDBUFSIZE, DEFAULT_MAXSNDBUFSIZE)) {
303                 free(c);
304                 return NULL;
305         }
306
307         if(!buffer_init(&c->rcvbuf, DEFAULT_RCVBUFSIZE, DEFAULT_MAXRCVBUFSIZE)) {
308                 free(c);
309                 return NULL;
310         }
311
312         // Fill in the details
313
314         c->src = src;
315         c->dst = dst;
316 #ifdef UTCP_DEBUG
317 #warning debugging
318         c->snd.iss = 0;
319 #else
320         c->snd.iss = rand();
321 #endif
322         c->snd.una = c->snd.iss;
323         c->snd.nxt = c->snd.iss + 1;
324         c->rcv.wnd = utcp->mtu;
325         c->snd.last = c->snd.nxt;
326         c->snd.cwnd = utcp->mtu;
327         c->utcp = utcp;
328
329         // Add it to the sorted list of connections
330
331         utcp->connections[utcp->nconnections++] = c;
332         qsort(utcp->connections, utcp->nconnections, sizeof *utcp->connections, compare);
333
334         return c;
335 }
336
337 struct utcp_connection *utcp_connect(struct utcp *utcp, uint16_t dst, utcp_recv_t recv, void *priv) {
338         struct utcp_connection *c = allocate_connection(utcp, 0, dst);
339         if(!c)
340                 return NULL;
341
342         c->recv = recv;
343         c->priv = priv;
344
345         struct hdr hdr;
346
347         hdr.src = c->src;
348         hdr.dst = c->dst;
349         hdr.seq = c->snd.iss;
350         hdr.ack = 0;
351         hdr.wnd = c->rcv.wnd;
352         hdr.ctl = SYN;
353         hdr.aux = 0;
354
355         set_state(c, SYN_SENT);
356
357         print_packet(utcp, "send", &hdr, sizeof hdr);
358         utcp->send(utcp, &hdr, sizeof hdr);
359
360         gettimeofday(&c->conn_timeout, NULL);
361         c->conn_timeout.tv_sec += utcp->timeout;
362
363         return c;
364 }
365
366 void utcp_accept(struct utcp_connection *c, utcp_recv_t recv, void *priv) {
367         if(c->reapable || c->state != SYN_RECEIVED) {
368                 debug("Error: accept() called on invalid connection %p in state %s\n", c, strstate[c->state]);
369                 return;
370         }
371
372         debug("%p accepted, %p %p\n", c, recv, priv);
373         c->recv = recv;
374         c->priv = priv;
375         set_state(c, ESTABLISHED);
376 }
377
378 static void ack(struct utcp_connection *c, bool sendatleastone) {
379         int32_t left = seqdiff(c->snd.last, c->snd.nxt);
380         int32_t cwndleft = c->snd.cwnd - seqdiff(c->snd.nxt, c->snd.una);
381         debug("cwndleft = %d\n", cwndleft);
382
383         assert(left >= 0);
384
385         if(cwndleft <= 0)
386                 cwndleft = 0;
387
388         if(cwndleft < left)
389                 left = cwndleft;
390
391         if(!left && !sendatleastone)
392                 return;
393
394         struct {
395                 struct hdr hdr;
396                 char data[];
397         } *pkt;
398
399         pkt = malloc(sizeof pkt->hdr + c->utcp->mtu);
400         if(!pkt)
401                 return;
402
403         pkt->hdr.src = c->src;
404         pkt->hdr.dst = c->dst;
405         pkt->hdr.ack = c->rcv.nxt;
406         pkt->hdr.wnd = c->snd.wnd;
407         pkt->hdr.ctl = ACK;
408         pkt->hdr.aux = 0;
409
410         do {
411                 uint32_t seglen = left > c->utcp->mtu ? c->utcp->mtu : left;
412                 pkt->hdr.seq = c->snd.nxt;
413
414                 buffer_copy(&c->sndbuf, pkt->data, seqdiff(c->snd.nxt, c->snd.una), seglen);
415
416                 c->snd.nxt += seglen;
417                 left -= seglen;
418
419                 if(seglen && fin_wanted(c, c->snd.nxt)) {
420                         seglen--;
421                         pkt->hdr.ctl |= FIN;
422                 }
423
424                 print_packet(c->utcp, "send", pkt, sizeof pkt->hdr + seglen);
425                 c->utcp->send(c->utcp, pkt, sizeof pkt->hdr + seglen);
426         } while(left);
427
428         free(pkt);
429 }
430
431 ssize_t utcp_send(struct utcp_connection *c, const void *data, size_t len) {
432         if(c->reapable) {
433                 debug("Error: send() called on closed connection %p\n", c);
434                 errno = EBADF;
435                 return -1;
436         }
437
438         switch(c->state) {
439         case CLOSED:
440         case LISTEN:
441         case SYN_SENT:
442         case SYN_RECEIVED:
443                 debug("Error: send() called on unconnected connection %p\n", c);
444                 errno = ENOTCONN;
445                 return -1;
446         case ESTABLISHED:
447         case CLOSE_WAIT:
448                 break;
449         case FIN_WAIT_1:
450         case FIN_WAIT_2:
451         case CLOSING:
452         case LAST_ACK:
453         case TIME_WAIT:
454                 debug("Error: send() called on closing connection %p\n", c);
455                 errno = EPIPE;
456                 return -1;
457         }
458
459         // Add data to send buffer
460
461         if(!len)
462                 return 0;
463
464         if(!data) {
465                 errno = EFAULT;
466                 return -1;
467         }
468
469         len = buffer_put(&c->sndbuf, data, len);
470         if(len <= 0) {
471                 errno = EWOULDBLOCK;
472                 return 0;
473         }
474
475         c->snd.last += len;
476         ack(c, false);
477         return len;
478 }
479
480 static void swap_ports(struct hdr *hdr) {
481         uint16_t tmp = hdr->src;
482         hdr->src = hdr->dst;
483         hdr->dst = tmp;
484 }
485
486 static void retransmit(struct utcp_connection *c) {
487         if(c->state == CLOSED || c->snd.nxt == c->snd.una)
488                 return;
489
490         struct utcp *utcp = c->utcp;
491
492         struct {
493                 struct hdr hdr;
494                 char data[];
495         } *pkt;
496
497         pkt = malloc(sizeof pkt->hdr + c->utcp->mtu);
498         if(!pkt)
499                 return;
500
501         pkt->hdr.src = c->src;
502         pkt->hdr.dst = c->dst;
503
504         switch(c->state) {
505                 case SYN_SENT:
506                         // Send our SYN again
507                         pkt->hdr.seq = c->snd.iss;
508                         pkt->hdr.ack = 0;
509                         pkt->hdr.wnd = c->rcv.wnd;
510                         pkt->hdr.ctl = SYN;
511                         print_packet(c->utcp, "rtrx", pkt, sizeof pkt->hdr);
512                         utcp->send(utcp, pkt, sizeof pkt->hdr);
513                         break;
514
515                 case SYN_RECEIVED:
516                         // Send SYNACK again
517                         pkt->hdr.seq = c->snd.nxt;
518                         pkt->hdr.ack = c->rcv.nxt;
519                         pkt->hdr.ctl = SYN | ACK;
520                         print_packet(c->utcp, "rtrx", pkt, sizeof pkt->hdr);
521                         utcp->send(utcp, pkt, sizeof pkt->hdr);
522                         break;
523
524                 case ESTABLISHED:
525                 case FIN_WAIT_1:
526                 case CLOSE_WAIT:
527                 case CLOSING:
528                 case LAST_ACK:
529                         // Send unacked data again.
530                         pkt->hdr.seq = c->snd.una;
531                         pkt->hdr.ack = c->rcv.nxt;
532                         pkt->hdr.ctl = ACK;
533                         uint32_t len = seqdiff(c->snd.last, c->snd.una);
534                         if(len > utcp->mtu)
535                                 len = utcp->mtu;
536                         if(fin_wanted(c, c->snd.una + len)) {
537                                 len--;
538                                 pkt->hdr.ctl |= FIN;
539                         }
540                         c->snd.nxt = c->snd.una + len;
541                         c->snd.cwnd = utcp->mtu; // reduce cwnd on retransmit
542                         buffer_copy(&c->sndbuf, pkt->data, 0, len);
543                         print_packet(c->utcp, "rtrx", pkt, sizeof pkt->hdr + len);
544                         utcp->send(utcp, pkt, sizeof pkt->hdr + len);
545                         break;
546
547                 case CLOSED:
548                 case LISTEN:
549                 case TIME_WAIT:
550                 case FIN_WAIT_2:
551                         // We shouldn't need to retransmit anything in this state.
552 #ifdef UTCP_DEBUG
553                         abort();
554 #endif
555                         timerclear(&c->rtrx_timeout);
556                         break;
557         }
558
559         free(pkt);
560 }
561
562 // Update receive buffer and SACK entries after consuming data.
563 static void sack_consume(struct utcp_connection *c, size_t len) {
564         debug("sack_consume %zu\n", len);
565         if(len > c->rcvbuf.used)
566                 abort();
567
568         buffer_get(&c->rcvbuf, NULL, len);
569
570         for(int i = 0; i < NSACKS && c->sacks[i].len; ) {
571                 if(len < c->sacks[i].offset) {
572                         c->sacks[i].offset -= len;
573                         i++;
574                 } else if(len < c->sacks[i].offset + c->sacks[i].len) {
575                         c->sacks[i].offset = 0;
576                         c->sacks[i].len -= len - c->sacks[i].offset;
577                         i++;
578                 } else {
579                         if(i < NSACKS - 1) {
580                                 memmove(&c->sacks[i], &c->sacks[i + 1], (NSACKS - 1 - i) * sizeof c->sacks[i]);
581                                 c->sacks[i + 1].len = 0;
582                         } else {
583                                 c->sacks[i].len = 0;
584                                 break;
585                         }
586                 }
587         }
588
589         for(int i = 0; i < NSACKS && c->sacks[i].len; i++)
590                 debug("SACK[%d] offset %u len %u\n", i, c->sacks[i].offset, c->sacks[i].len);
591 }
592
593 static void handle_out_of_order(struct utcp_connection *c, uint32_t offset, const void *data, size_t len) {
594         debug("out of order packet, offset %u\n", offset);
595         // Packet loss or reordering occured. Store the data in the buffer.
596         ssize_t rxd = buffer_put_at(&c->rcvbuf, offset, data, len);
597         if(rxd < len)
598                 abort();
599
600         // Make note of where we put it.
601         for(int i = 0; i < NSACKS; i++) {
602                 if(!c->sacks[i].len) { // nothing to merge, add new entry
603                         debug("New SACK entry %d\n", i);
604                         c->sacks[i].offset = offset;
605                         c->sacks[i].len = rxd;
606                         break;
607                 } else if(offset < c->sacks[i].offset) {
608                         if(offset + rxd < c->sacks[i].offset) { // insert before
609                                 if(!c->sacks[NSACKS - 1].len) { // only if room left
610                                         debug("Insert SACK entry at %d\n", i);
611                                         memmove(&c->sacks[i + 1], &c->sacks[i], (NSACKS - i - 1) * sizeof c->sacks[i]);
612                                         c->sacks[i].offset = offset;
613                                         c->sacks[i].len = rxd;
614                                 }
615                                 break;
616                         } else { // merge
617                                 debug("Merge with start of SACK entry at %d\n", i);
618                                 c->sacks[i].offset = offset;
619                                 break;
620                         }
621                 } else if(offset <= c->sacks[i].offset + c->sacks[i].len) {
622                         if(offset + rxd > c->sacks[i].offset + c->sacks[i].len) { // merge
623                                 debug("Merge with end of SACK entry at %d\n", i);
624                                 c->sacks[i].len = offset + rxd - c->sacks[i].offset;
625                                 // TODO: handle potential merge with next entry
626                         }
627                         break;
628                 }
629         }
630
631         for(int i = 0; i < NSACKS && c->sacks[i].len; i++)
632                 debug("SACK[%d] offset %u len %u\n", i, c->sacks[i].offset, c->sacks[i].len);
633 }
634
635 static void handle_in_order(struct utcp_connection *c, const void *data, size_t len) {
636         // Check if we can process out-of-order data now.
637         if(c->sacks[0].len && len >= c->sacks[0].offset) { // TODO: handle overlap with second SACK
638                 debug("incoming packet len %zu connected with SACK at %u\n", len, c->sacks[0].offset);
639                 buffer_put_at(&c->rcvbuf, 0, data, len); // TODO: handle return value
640                 len = max(len, c->sacks[0].offset + c->sacks[0].len);
641                 data = c->rcvbuf.data;
642         }
643
644         if(c->recv) {
645                 ssize_t rxd = c->recv(c, data, len);
646                 if(rxd != len) {
647                         // TODO: handle the application not accepting all data.
648                         abort();
649                 }
650         }
651
652         if(c->rcvbuf.used)
653                 sack_consume(c, len);
654
655         c->rcv.nxt += len;
656 }
657
658
659 static void handle_incoming_data(struct utcp_connection *c, uint32_t seq, const void *data, size_t len) {
660         uint32_t offset = seqdiff(seq, c->rcv.nxt);
661         if(offset + len > c->rcvbuf.maxsize)
662                 abort();
663
664         if(offset)
665                 handle_out_of_order(c, offset, data, len);
666         else
667                 handle_in_order(c, data, len);
668 }
669
670
671 ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) {
672         if(!utcp) {
673                 errno = EFAULT;
674                 return -1;
675         }
676
677         if(!len)
678                 return 0;
679
680         if(!data) {
681                 errno = EFAULT;
682                 return -1;
683         }
684
685         print_packet(utcp, "recv", data, len);
686
687         // Drop packets smaller than the header
688
689         struct hdr hdr;
690         if(len < sizeof hdr) {
691                 errno = EBADMSG;
692                 return -1;
693         }
694
695         // Make a copy from the potentially unaligned data to a struct hdr
696
697         memcpy(&hdr, data, sizeof hdr);
698         data += sizeof hdr;
699         len -= sizeof hdr;
700
701         // Drop packets with an unknown CTL flag
702
703         if(hdr.ctl & ~(SYN | ACK | RST | FIN)) {
704                 errno = EBADMSG;
705                 return -1;
706         }
707
708         // Try to match the packet to an existing connection
709
710         struct utcp_connection *c = find_connection(utcp, hdr.dst, hdr.src);
711
712         // Is it for a new connection?
713
714         if(!c) {
715                 // Ignore RST packets
716
717                 if(hdr.ctl & RST)
718                         return 0;
719
720                 // Is it a SYN packet and are we LISTENing?
721
722                 if(hdr.ctl & SYN && !(hdr.ctl & ACK) && utcp->accept) {
723                         // If we don't want to accept it, send a RST back
724                         if((utcp->pre_accept && !utcp->pre_accept(utcp, hdr.dst))) {
725                                 len = 1;
726                                 goto reset;
727                         }
728
729                         // Try to allocate memory, otherwise send a RST back
730                         c = allocate_connection(utcp, hdr.dst, hdr.src);
731                         if(!c) {
732                                 len = 1;
733                                 goto reset;
734                         }
735
736                         // Return SYN+ACK, go to SYN_RECEIVED state
737                         c->snd.wnd = hdr.wnd;
738                         c->rcv.irs = hdr.seq;
739                         c->rcv.nxt = c->rcv.irs + 1;
740                         set_state(c, SYN_RECEIVED);
741
742                         hdr.dst = c->dst;
743                         hdr.src = c->src;
744                         hdr.ack = c->rcv.irs + 1;
745                         hdr.seq = c->snd.iss;
746                         hdr.ctl = SYN | ACK;
747                         print_packet(c->utcp, "send", &hdr, sizeof hdr);
748                         utcp->send(utcp, &hdr, sizeof hdr);
749                 } else {
750                         // No, we don't want your packets, send a RST back
751                         len = 1;
752                         goto reset;
753                 }
754
755                 return 0;
756         }
757
758         debug("%p state %s\n", c->utcp, strstate[c->state]);
759
760         // In case this is for a CLOSED connection, ignore the packet.
761         // TODO: make it so incoming packets can never match a CLOSED connection.
762
763         if(c->state == CLOSED)
764                 return 0;
765
766         // It is for an existing connection.
767
768         uint32_t prevrcvnxt = c->rcv.nxt;
769
770         // 1. Drop invalid packets.
771
772         // 1a. Drop packets that should not happen in our current state.
773
774         switch(c->state) {
775         case SYN_SENT:
776         case SYN_RECEIVED:
777         case ESTABLISHED:
778         case FIN_WAIT_1:
779         case FIN_WAIT_2:
780         case CLOSE_WAIT:
781         case CLOSING:
782         case LAST_ACK:
783         case TIME_WAIT:
784                 break;
785         default:
786 #ifdef UTCP_DEBUG
787                 abort();
788 #endif
789                 break;
790         }
791
792         // 1b. Drop packets with a sequence number not in our receive window.
793
794         bool acceptable;
795
796         if(c->state == SYN_SENT)
797                 acceptable = true;
798
799         // TODO: handle packets overlapping c->rcv.nxt.
800 #if 1
801         // Only use this when accepting out-of-order packets.
802         else if(len == 0)
803                 acceptable = seqdiff(hdr.seq, c->rcv.nxt) >= 0;
804         else
805                 acceptable = seqdiff(hdr.seq, c->rcv.nxt) >= 0 && seqdiff(hdr.seq, c->rcv.nxt) + len <= c->rcvbuf.maxsize;
806 #else
807         if(c->state != SYN_SENT)
808                 acceptable = hdr.seq == c->rcv.nxt;
809 #endif
810
811         if(!acceptable) {
812                 debug("Packet not acceptable, %u <= %u + %zu < %u\n", c->rcv.nxt, hdr.seq, len, c->rcv.nxt + c->rcvbuf.maxsize);
813                 // Ignore unacceptable RST packets.
814                 if(hdr.ctl & RST)
815                         return 0;
816                 // Otherwise, send an ACK back in the hope things improve.
817                 ack(c, true);
818                 return 0;
819         }
820
821         c->snd.wnd = hdr.wnd; // TODO: move below
822
823         // 1c. Drop packets with an invalid ACK.
824         // ackno should not roll back, and it should also not be bigger than what we ever could have sent
825         // (= snd.una + c->sndbuf.used).
826
827         if(hdr.ctl & ACK &&
828                         ((seqdiff(hdr.ack, c->snd.una + c->sndbuf.used) > 0 &&
829                           seqdiff(hdr.ack, c->snd.nxt) > 0) // TODO: simplify this if
830                          || seqdiff(hdr.ack, c->snd.una) < 0)) {
831                 debug("Packet ack seqno out of range, %u <= %u < %u\n", c->snd.una, hdr.ack, c->snd.una + c->sndbuf.used);
832                 // Ignore unacceptable RST packets.
833                 if(hdr.ctl & RST)
834                         return 0;
835                 goto reset;
836         }
837
838         // 2. Handle RST packets
839
840         if(hdr.ctl & RST) {
841                 switch(c->state) {
842                 case SYN_SENT:
843                         if(!(hdr.ctl & ACK))
844                                 return 0;
845                         // The peer has refused our connection.
846                         set_state(c, CLOSED);
847                         errno = ECONNREFUSED;
848                         if(c->recv)
849                                 c->recv(c, NULL, 0);
850                         return 0;
851                 case SYN_RECEIVED:
852                         if(hdr.ctl & ACK)
853                                 return 0;
854                         // We haven't told the application about this connection yet. Silently delete.
855                         free_connection(c);
856                         return 0;
857                 case ESTABLISHED:
858                 case FIN_WAIT_1:
859                 case FIN_WAIT_2:
860                 case CLOSE_WAIT:
861                         if(hdr.ctl & ACK)
862                                 return 0;
863                         // The peer has aborted our connection.
864                         set_state(c, CLOSED);
865                         errno = ECONNRESET;
866                         if(c->recv)
867                                 c->recv(c, NULL, 0);
868                         return 0;
869                 case CLOSING:
870                 case LAST_ACK:
871                 case TIME_WAIT:
872                         if(hdr.ctl & ACK)
873                                 return 0;
874                         // As far as the application is concerned, the connection has already been closed.
875                         // If it has called utcp_close() already, we can immediately free this connection.
876                         if(c->reapable) {
877                                 free_connection(c);
878                                 return 0;
879                         }
880                         // Otherwise, immediately move to the CLOSED state.
881                         set_state(c, CLOSED);
882                         return 0;
883                 default:
884 #ifdef UTCP_DEBUG
885                         abort();
886 #endif
887                         break;
888                 }
889         }
890
891         // 3. Advance snd.una
892
893         uint32_t advanced = seqdiff(hdr.ack, c->snd.una);
894         prevrcvnxt = c->rcv.nxt;
895
896         if(advanced) {
897                 int32_t data_acked = advanced;
898
899                 switch(c->state) {
900                         case SYN_SENT:
901                         case SYN_RECEIVED:
902                                 data_acked--;
903                                 break;
904                         // TODO: handle FIN as well.
905                         default:
906                                 break;
907                 }
908
909                 assert(data_acked >= 0);
910
911                 int32_t bufused = seqdiff(c->snd.last, c->snd.una);
912                 assert(data_acked <= bufused);
913
914                 if(data_acked)
915                         buffer_get(&c->sndbuf, NULL, data_acked);
916
917                 // Also advance snd.nxt if possible
918                 if(seqdiff(c->snd.nxt, hdr.ack) < 0)
919                         c->snd.nxt = hdr.ack;
920
921                 c->snd.una = hdr.ack;
922
923                 c->dupack = 0;
924                 c->snd.cwnd += utcp->mtu;
925                 if(c->snd.cwnd > c->sndbuf.maxsize)
926                         c->snd.cwnd = c->sndbuf.maxsize;
927
928                 // Check if we have sent a FIN that is now ACKed.
929                 switch(c->state) {
930                 case FIN_WAIT_1:
931                         if(c->snd.una == c->snd.last)
932                                 set_state(c, FIN_WAIT_2);
933                         break;
934                 case CLOSING:
935                         if(c->snd.una == c->snd.last) {
936                                 gettimeofday(&c->conn_timeout, NULL);
937                                 c->conn_timeout.tv_sec += 60;
938                                 set_state(c, TIME_WAIT);
939                         }
940                         break;
941                 default:
942                         break;
943                 }
944         } else {
945                 if(!len) {
946                         c->dupack++;
947                         if(c->dupack == 3) {
948                                 debug("Triplicate ACK\n");
949                                 //TODO: Resend one packet and go to fast recovery mode. See RFC 6582.
950                                 //We do a very simple variant here; reset the nxt pointer to the last acknowledged packet from the peer.
951                                 //Reset the congestion window so we wait for ACKs.
952                                 c->snd.nxt = c->snd.una;
953                                 c->snd.cwnd = utcp->mtu;
954                         }
955                 }
956         }
957
958         // 4. Update timers
959
960         if(advanced) {
961                 timerclear(&c->conn_timeout); // It will be set anew in utcp_timeout() if c->snd.una != c->snd.nxt.
962                 if(c->snd.una == c->snd.nxt)
963                         timerclear(&c->rtrx_timeout);
964         }
965
966         // 5. Process SYN stuff
967
968         if(hdr.ctl & SYN) {
969                 switch(c->state) {
970                 case SYN_SENT:
971                         // This is a SYNACK. It should always have ACKed the SYN.
972                         if(!advanced)
973                                 goto reset;
974                         c->rcv.irs = hdr.seq;
975                         c->rcv.nxt = hdr.seq;
976                         set_state(c, ESTABLISHED);
977                         // TODO: notify application of this somehow.
978                         break;
979                 case SYN_RECEIVED:
980                 case ESTABLISHED:
981                 case FIN_WAIT_1:
982                 case FIN_WAIT_2:
983                 case CLOSE_WAIT:
984                 case CLOSING:
985                 case LAST_ACK:
986                 case TIME_WAIT:
987                         // Ehm, no. We should never receive a second SYN.
988                         goto reset;
989                 default:
990 #ifdef UTCP_DEBUG
991                         abort();
992 #endif
993                         return 0;
994                 }
995
996                 // SYN counts as one sequence number
997                 c->rcv.nxt++;
998         }
999
1000         // 6. Process new data
1001
1002         if(c->state == SYN_RECEIVED) {
1003                 // This is the ACK after the SYNACK. It should always have ACKed the SYNACK.
1004                 if(!advanced)
1005                         goto reset;
1006
1007                 // Are we still LISTENing?
1008                 if(utcp->accept)
1009                         utcp->accept(c, c->src);
1010
1011                 if(c->state != ESTABLISHED) {
1012                         set_state(c, CLOSED);
1013                         c->reapable = true;
1014                         goto reset;
1015                 }
1016         }
1017
1018         if(len) {
1019                 switch(c->state) {
1020                 case SYN_SENT:
1021                 case SYN_RECEIVED:
1022                         // This should never happen.
1023 #ifdef UTCP_DEBUG
1024                         abort();
1025 #endif
1026                         return 0;
1027                 case ESTABLISHED:
1028                 case FIN_WAIT_1:
1029                 case FIN_WAIT_2:
1030                         break;
1031                 case CLOSE_WAIT:
1032                 case CLOSING:
1033                 case LAST_ACK:
1034                 case TIME_WAIT:
1035                         // Ehm no, We should never receive more data after a FIN.
1036                         goto reset;
1037                 default:
1038 #ifdef UTCP_DEBUG
1039                         abort();
1040 #endif
1041                         return 0;
1042                 }
1043
1044                 handle_incoming_data(c, hdr.seq, data, len);
1045         }
1046
1047         // 7. Process FIN stuff
1048
1049         if((hdr.ctl & FIN) && hdr.seq + len == c->rcv.nxt) {
1050                 switch(c->state) {
1051                 case SYN_SENT:
1052                 case SYN_RECEIVED:
1053                         // This should never happen.
1054 #ifdef UTCP_DEBUG
1055                         abort();
1056 #endif
1057                         break;
1058                 case ESTABLISHED:
1059                         set_state(c, CLOSE_WAIT);
1060                         break;
1061                 case FIN_WAIT_1:
1062                         set_state(c, CLOSING);
1063                         break;
1064                 case FIN_WAIT_2:
1065                         gettimeofday(&c->conn_timeout, NULL);
1066                         c->conn_timeout.tv_sec += 60;
1067                         set_state(c, TIME_WAIT);
1068                         break;
1069                 case CLOSE_WAIT:
1070                 case CLOSING:
1071                 case LAST_ACK:
1072                 case TIME_WAIT:
1073                         // Ehm, no. We should never receive a second FIN.
1074                         goto reset;
1075                 default:
1076 #ifdef UTCP_DEBUG
1077                         abort();
1078 #endif
1079                         break;
1080                 }
1081
1082                 // FIN counts as one sequence number
1083                 c->rcv.nxt++;
1084                 len++;
1085
1086                 // Inform the application that the peer closed the connection.
1087                 if(c->recv) {
1088                         errno = 0;
1089                         c->recv(c, NULL, 0);
1090                 }
1091         }
1092
1093         // Now we send something back if:
1094         // - we advanced rcv.nxt (ie, we got some data that needs to be ACKed)
1095         //   -> sendatleastone = true
1096         // - or we got an ack, so we should maybe send a bit more data
1097         //   -> sendatleastone = false
1098
1099 ack:
1100         ack(c, prevrcvnxt != c->rcv.nxt);
1101         return 0;
1102
1103 reset:
1104         swap_ports(&hdr);
1105         hdr.wnd = 0;
1106         if(hdr.ctl & ACK) {
1107                 hdr.seq = hdr.ack;
1108                 hdr.ctl = RST;
1109         } else {
1110                 hdr.ack = hdr.seq + len;
1111                 hdr.seq = 0;
1112                 hdr.ctl = RST | ACK;
1113         }
1114         print_packet(utcp, "send", &hdr, sizeof hdr);
1115         utcp->send(utcp, &hdr, sizeof hdr);
1116         return 0;
1117
1118 }
1119
1120 int utcp_shutdown(struct utcp_connection *c, int dir) {
1121         debug("%p shutdown %d at %u\n", c ? c->utcp : NULL, dir, c ? c->snd.last : 0);
1122         if(!c) {
1123                 errno = EFAULT;
1124                 return -1;
1125         }
1126
1127         if(c->reapable) {
1128                 debug("Error: shutdown() called on closed connection %p\n", c);
1129                 errno = EBADF;
1130                 return -1;
1131         }
1132
1133         if(!(dir == UTCP_SHUT_RD || dir == UTCP_SHUT_WR || dir == UTCP_SHUT_RDWR)) {
1134                 errno = EINVAL;
1135                 return -1;
1136         }
1137
1138         // TCP does not have a provision for stopping incoming packets.
1139         // The best we can do is to just ignore them.
1140         if(dir == UTCP_SHUT_RD || dir == UTCP_SHUT_RDWR)
1141                 c->recv = NULL;
1142
1143         // The rest of the code deals with shutting down writes.
1144         if(dir == UTCP_SHUT_RD)
1145                 return 0;
1146
1147         switch(c->state) {
1148         case CLOSED:
1149         case LISTEN:
1150                 errno = ENOTCONN;
1151                 return -1;
1152
1153         case SYN_SENT:
1154                 set_state(c, CLOSED);
1155                 return 0;
1156
1157         case SYN_RECEIVED:
1158         case ESTABLISHED:
1159                 set_state(c, FIN_WAIT_1);
1160                 break;
1161         case FIN_WAIT_1:
1162         case FIN_WAIT_2:
1163                 return 0;
1164         case CLOSE_WAIT:
1165                 set_state(c, CLOSING);
1166                 break;
1167
1168         case CLOSING:
1169         case LAST_ACK:
1170         case TIME_WAIT:
1171                 return 0;
1172         }
1173
1174         c->snd.last++;
1175
1176         ack(c, false);
1177         return 0;
1178 }
1179
1180 int utcp_close(struct utcp_connection *c) {
1181         if(utcp_shutdown(c, SHUT_RDWR))
1182                 return -1;
1183         c->recv = NULL;
1184         c->poll = NULL;
1185         c->reapable = true;
1186         return 0;
1187 }
1188
1189 int utcp_abort(struct utcp_connection *c) {
1190         if(!c) {
1191                 errno = EFAULT;
1192                 return -1;
1193         }
1194
1195         if(c->reapable) {
1196                 debug("Error: abort() called on closed connection %p\n", c);
1197                 errno = EBADF;
1198                 return -1;
1199         }
1200
1201         c->recv = NULL;
1202         c->poll = NULL;
1203         c->reapable = true;
1204
1205         switch(c->state) {
1206         case CLOSED:
1207                 return 0;
1208         case LISTEN:
1209         case SYN_SENT:
1210         case CLOSING:
1211         case LAST_ACK:
1212         case TIME_WAIT:
1213                 set_state(c, CLOSED);
1214                 return 0;
1215
1216         case SYN_RECEIVED:
1217         case ESTABLISHED:
1218         case FIN_WAIT_1:
1219         case FIN_WAIT_2:
1220         case CLOSE_WAIT:
1221                 set_state(c, CLOSED);
1222                 break;
1223         }
1224
1225         // Send RST
1226
1227         struct hdr hdr;
1228
1229         hdr.src = c->src;
1230         hdr.dst = c->dst;
1231         hdr.seq = c->snd.nxt;
1232         hdr.ack = 0;
1233         hdr.wnd = 0;
1234         hdr.ctl = RST;
1235
1236         print_packet(c->utcp, "send", &hdr, sizeof hdr);
1237         c->utcp->send(c->utcp, &hdr, sizeof hdr);
1238         return 0;
1239 }
1240
1241 /* Handle timeouts.
1242  * One call to this function will loop through all connections,
1243  * checking if something needs to be resent or not.
1244  * The return value is the time to the next timeout in milliseconds,
1245  * or maybe a negative value if the timeout is infinite.
1246  */
1247 struct timeval utcp_timeout(struct utcp *utcp) {
1248         struct timeval now;
1249         gettimeofday(&now, NULL);
1250         struct timeval next = {now.tv_sec + 3600, now.tv_usec};
1251
1252         for(int i = 0; i < utcp->nconnections; i++) {
1253                 struct utcp_connection *c = utcp->connections[i];
1254                 if(!c)
1255                         continue;
1256
1257                 if(c->state == CLOSED) {
1258                         if(c->reapable) {
1259                                 debug("Reaping %p\n", c);
1260                                 free_connection(c);
1261                                 i--;
1262                         }
1263                         continue;
1264                 }
1265
1266                 if(timerisset(&c->conn_timeout) && timercmp(&c->conn_timeout, &now, <)) {
1267                         errno = ETIMEDOUT;
1268                         c->state = CLOSED;
1269                         if(c->recv)
1270                                 c->recv(c, NULL, 0);
1271                         continue;
1272                 }
1273
1274                 if(timerisset(&c->rtrx_timeout) && timercmp(&c->rtrx_timeout, &now, <)) {
1275                         retransmit(c);
1276                 }
1277
1278                 if(c->poll && buffer_free(&c->sndbuf) && (c->state == ESTABLISHED || c->state == CLOSE_WAIT))
1279                         c->poll(c, buffer_free(&c->sndbuf));
1280
1281                 if(timerisset(&c->conn_timeout) && timercmp(&c->conn_timeout, &next, <))
1282                         next = c->conn_timeout;
1283
1284                 if(c->snd.nxt != c->snd.una) {
1285                         c->rtrx_timeout = now;
1286                         c->rtrx_timeout.tv_sec++;
1287                 } else {
1288                         timerclear(&c->rtrx_timeout);
1289                 }
1290
1291                 if(timerisset(&c->rtrx_timeout) && timercmp(&c->rtrx_timeout, &next, <))
1292                         next = c->rtrx_timeout;
1293         }
1294
1295         struct timeval diff;
1296         timersub(&next, &now, &diff);
1297         return diff;
1298 }
1299
1300 bool utcp_is_active(struct utcp *utcp) {
1301         if(!utcp)
1302                 return false;
1303
1304         for(int i = 0; i < utcp->nconnections; i++)
1305                 if(utcp->connections[i]->state != CLOSED && utcp->connections[i]->state != TIME_WAIT)
1306                         return true;
1307
1308         return false;
1309 }
1310
1311 struct utcp *utcp_init(utcp_accept_t accept, utcp_pre_accept_t pre_accept, utcp_send_t send, void *priv) {
1312         struct utcp *utcp = calloc(1, sizeof *utcp);
1313         if(!utcp)
1314                 return NULL;
1315
1316         if(!send) {
1317                 errno = EFAULT;
1318                 return NULL;
1319         }
1320
1321         utcp->accept = accept;
1322         utcp->pre_accept = pre_accept;
1323         utcp->send = send;
1324         utcp->priv = priv;
1325         utcp->mtu = 1000;
1326         utcp->timeout = 60;
1327
1328         return utcp;
1329 }
1330
1331 void utcp_exit(struct utcp *utcp) {
1332         if(!utcp)
1333                 return;
1334         for(int i = 0; i < utcp->nconnections; i++) {
1335                 if(!utcp->connections[i]->reapable)
1336                         debug("Warning, freeing unclosed connection %p\n", utcp->connections[i]);
1337                 buffer_exit(&utcp->connections[i]->sndbuf);
1338                 free(utcp->connections[i]);
1339         }
1340         free(utcp->connections);
1341         free(utcp);
1342 }
1343
1344 uint16_t utcp_get_mtu(struct utcp *utcp) {
1345         return utcp ? utcp->mtu : 0;
1346 }
1347
1348 void utcp_set_mtu(struct utcp *utcp, uint16_t mtu) {
1349         // TODO: handle overhead of the header
1350         if(utcp)
1351                 utcp->mtu = mtu;
1352 }
1353
1354 int utcp_get_user_timeout(struct utcp *u) {
1355         return u ? u->timeout : 0;
1356 }
1357
1358 void utcp_set_user_timeout(struct utcp *u, int timeout) {
1359         if(u)
1360                 u->timeout = timeout;
1361 }
1362
1363 size_t utcp_get_sndbuf(struct utcp_connection *c) {
1364         return c ? c->sndbuf.maxsize : 0;
1365 }
1366
1367 size_t utcp_get_sndbuf_free(struct utcp_connection *c) {
1368         if(c && (c->state == ESTABLISHED || c->state == CLOSE_WAIT))
1369                 return buffer_free(&c->sndbuf);
1370         else
1371                 return 0;
1372 }
1373
1374 void utcp_set_sndbuf(struct utcp_connection *c, size_t size) {
1375         if(!c)
1376                 return;
1377         c->sndbuf.maxsize = size;
1378         if(c->sndbuf.maxsize != size)
1379                 c->sndbuf.maxsize = -1;
1380 }
1381
1382 size_t utcp_get_rcvbuf(struct utcp_connection *c) {
1383         return c ? c->rcvbuf.maxsize : 0;
1384 }
1385
1386 size_t utcp_get_rcvbuf_free(struct utcp_connection *c) {
1387         if(c && (c->state == ESTABLISHED || c->state == CLOSE_WAIT))
1388                 return buffer_free(&c->rcvbuf);
1389         else
1390                 return 0;
1391 }
1392
1393 void utcp_set_rcvbuf(struct utcp_connection *c, size_t size) {
1394         if(!c)
1395                 return;
1396         c->rcvbuf.maxsize = size;
1397         if(c->rcvbuf.maxsize != size)
1398                 c->rcvbuf.maxsize = -1;
1399 }
1400
1401 bool utcp_get_nodelay(struct utcp_connection *c) {
1402         return c ? c->nodelay : false;
1403 }
1404
1405 void utcp_set_nodelay(struct utcp_connection *c, bool nodelay) {
1406         if(c)
1407                 c->nodelay = nodelay;
1408 }
1409
1410 bool utcp_get_keepalive(struct utcp_connection *c) {
1411         return c ? c->keepalive : false;
1412 }
1413
1414 void utcp_set_keepalive(struct utcp_connection *c, bool keepalive) {
1415         if(c)
1416                 c->keepalive = keepalive;
1417 }
1418
1419 size_t utcp_get_outq(struct utcp_connection *c) {
1420         return c ? seqdiff(c->snd.nxt, c->snd.una) : 0;
1421 }
1422
1423 void utcp_set_recv_cb(struct utcp_connection *c, utcp_recv_t recv) {
1424         if(c)
1425                 c->recv = recv;
1426 }
1427
1428 void utcp_set_poll_cb(struct utcp_connection *c, utcp_poll_t poll) {
1429         if(c)
1430                 c->poll = poll;
1431 }
1432
1433 void utcp_set_accept_cb(struct utcp *utcp, utcp_accept_t accept, utcp_pre_accept_t pre_accept) {
1434         if(utcp) {
1435                 utcp->accept = accept;
1436                 utcp->pre_accept = pre_accept;
1437         }
1438 }