]> git.meshlink.io Git - utcp/blob - utcp.c
ca3c132f60c9132da69b019e3583d83db11974df
[utcp] / utcp.c
1 /*
2     utcp.c -- Userspace TCP
3     Copyright (C) 2014 Guus Sliepen <guus@tinc-vpn.org>
4
5     This program is free software; you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation; either version 2 of the License, or
8     (at your option) any later version.
9
10     This program is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14
15     You should have received a copy of the GNU General Public License along
16     with this program; if not, write to the Free Software Foundation, Inc.,
17     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #define _GNU_SOURCE
21
22 #include <assert.h>
23 #include <errno.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <stdint.h>
27 #include <stdbool.h>
28 #include <string.h>
29 #include <unistd.h>
30 #include <sys/time.h>
31 #include <sys/socket.h>
32
33 #include "utcp_priv.h"
34
35 #ifdef UTCP_DEBUG
36 #include <stdarg.h>
37
38 static void debug(const char *format, ...) {
39         va_list ap;
40         va_start(ap, format);
41         vfprintf(stderr, format, ap);
42         va_end(ap);
43 }
44
45 static void print_packet(struct utcp *utcp, const char *dir, const void *pkt, size_t len) {
46         struct hdr hdr;
47         if(len < sizeof hdr) {
48                 debug("%p %s: short packet (%zu bytes)\n", utcp, dir, len);
49                 return;
50         }
51
52         memcpy(&hdr, pkt, sizeof hdr);
53         fprintf (stderr, "%p %s: len=%zu, src=%u dst=%u seq=%u ack=%u wnd=%u ctl=", utcp, dir, len, hdr.src, hdr.dst, hdr.seq, hdr.ack, hdr.wnd);
54         if(hdr.ctl & SYN)
55                 debug("SYN");
56         if(hdr.ctl & RST)
57                 debug("RST");
58         if(hdr.ctl & FIN)
59                 debug("FIN");
60         if(hdr.ctl & ACK)
61                 debug("ACK");
62
63         if(len > sizeof hdr) {
64                 debug(" data=");
65                 for(int i = sizeof hdr; i < len; i++) {
66                         const char *data = pkt;
67                         debug("%c", data[i] >= 32 ? data[i] : '.');
68                 }
69         }
70
71         debug("\n");
72 }
73 #else
74 #define debug(...)
75 #define print_packet(...)
76 #endif
77
78 static void set_state(struct utcp_connection *c, enum state state) {
79         c->state = state;
80         if(state == ESTABLISHED)
81                 timerclear(&c->conn_timeout);
82         debug("%p new state: %s\n", c->utcp, strstate[state]);
83 }
84
85 static inline void list_connections(struct utcp *utcp) {
86         debug("%p has %d connections:\n", utcp, utcp->nconnections);
87         for(int i = 0; i < utcp->nconnections; i++)
88                 debug("  %u -> %u state %s\n", utcp->connections[i]->src, utcp->connections[i]->dst, strstate[utcp->connections[i]->state]);
89 }
90
91 static int32_t seqdiff(uint32_t a, uint32_t b) {
92         return a - b;
93 }
94
95 // Connections are stored in a sorted list.
96 // This gives O(log(N)) lookup time, O(N log(N)) insertion time and O(N) deletion time.
97
98 static int compare(const void *va, const void *vb) {
99         const struct utcp_connection *a = *(struct utcp_connection **)va;
100         const struct utcp_connection *b = *(struct utcp_connection **)vb;
101
102         assert(a->src && b->src);
103
104         int c = (int)a->src - (int)b->src;
105         if(c)
106                 return c;
107         c = (int)a->dst - (int)b->dst;
108         return c;
109 }
110
111 static struct utcp_connection *find_connection(const struct utcp *utcp, uint16_t src, uint16_t dst) {
112         if(!utcp->nconnections)
113                 return NULL;
114         struct utcp_connection key = {
115                 .src = src,
116                 .dst = dst,
117         }, *keyp = &key;
118         struct utcp_connection **match = bsearch(&keyp, utcp->connections, utcp->nconnections, sizeof *utcp->connections, compare);
119         return match ? *match : NULL;
120 }
121
122 static void free_connection(struct utcp_connection *c) {
123         struct utcp *utcp = c->utcp;
124         struct utcp_connection **cp = bsearch(&c, utcp->connections, utcp->nconnections, sizeof *utcp->connections, compare);
125
126         assert(cp);
127
128         int i = cp - utcp->connections;
129         memmove(cp + i, cp + i + 1, (utcp->nconnections - i - 1) * sizeof *cp);
130         utcp->nconnections--;
131
132         free(c->sndbuf);
133         free(c);
134 }
135
136 static struct utcp_connection *allocate_connection(struct utcp *utcp, uint16_t src, uint16_t dst) {
137         // Check whether this combination of src and dst is free
138
139         if(src) {
140                 if(find_connection(utcp, src, dst)) {
141                         errno = EADDRINUSE;
142                         return NULL;
143                 }
144         } else { // If src == 0, generate a random port number with the high bit set
145                 if(utcp->nconnections >= 32767) {
146                         errno = ENOMEM;
147                         return NULL;
148                 }
149                 src = rand() | 0x8000;
150                 while(find_connection(utcp, src, dst))
151                         src++;
152         }
153
154         // Allocate memory for the new connection
155
156         if(utcp->nconnections >= utcp->nallocated) {
157                 if(!utcp->nallocated)
158                         utcp->nallocated = 4;
159                 else
160                         utcp->nallocated *= 2;
161                 struct utcp_connection **new_array = realloc(utcp->connections, utcp->nallocated * sizeof *utcp->connections);
162                 if(!new_array)
163                         return NULL;
164                 utcp->connections = new_array;
165         }
166
167         struct utcp_connection *c = calloc(1, sizeof *c);
168         if(!c)
169                 return NULL;
170
171         c->sndbufsize = DEFAULT_SNDBUFSIZE;
172         c->maxsndbufsize = DEFAULT_MAXSNDBUFSIZE;
173         c->sndbuf = malloc(c->sndbufsize);
174         if(!c->sndbuf) {
175                 free(c);
176                 return NULL;
177         }
178
179         // Fill in the details
180
181         c->src = src;
182         c->dst = dst;
183         c->snd.iss = rand();
184         c->snd.una = c->snd.iss;
185         c->snd.nxt = c->snd.iss + 1;
186         c->rcv.wnd = utcp->mtu;
187         c->snd.last = c->snd.nxt;
188         c->snd.cwnd = utcp->mtu;
189         c->utcp = utcp;
190
191         // Add it to the sorted list of connections
192
193         utcp->connections[utcp->nconnections++] = c;
194         qsort(utcp->connections, utcp->nconnections, sizeof *utcp->connections, compare);
195
196         return c;
197 }
198
199 struct utcp_connection *utcp_connect(struct utcp *utcp, uint16_t dst, utcp_recv_t recv, void *priv) {
200         struct utcp_connection *c = allocate_connection(utcp, 0, dst);
201         if(!c)
202                 return NULL;
203
204         c->recv = recv;
205         c->priv = priv;
206
207         struct hdr hdr;
208
209         hdr.src = c->src;
210         hdr.dst = c->dst;
211         hdr.seq = c->snd.iss;
212         hdr.ack = 0;
213         hdr.wnd = c->rcv.wnd;
214         hdr.ctl = SYN;
215         hdr.aux = 0;
216
217         set_state(c, SYN_SENT);
218
219         print_packet(utcp, "send", &hdr, sizeof hdr);
220         utcp->send(utcp, &hdr, sizeof hdr);
221
222         gettimeofday(&c->conn_timeout, NULL);
223         c->conn_timeout.tv_sec += utcp->timeout;
224
225         return c;
226 }
227
228 void utcp_accept(struct utcp_connection *c, utcp_recv_t recv, void *priv) {
229         if(c->reapable || c->state != SYN_RECEIVED) {
230                 debug("Error: accept() called on invalid connection %p in state %s\n", c, strstate[c->state]);
231                 return;
232         }
233
234         debug("%p accepted, %p %p\n", c, recv, priv);
235         c->recv = recv;
236         c->priv = priv;
237         set_state(c, ESTABLISHED);
238 }
239
240 static void ack(struct utcp_connection *c, bool sendatleastone) {
241         int32_t left = seqdiff(c->snd.last, c->snd.nxt);
242         int32_t cwndleft = c->snd.cwnd - seqdiff(c->snd.nxt, c->snd.una);
243         char *data = c->sndbuf + seqdiff(c->snd.nxt, c->snd.una);
244
245         assert(left >= 0);
246
247         if(cwndleft <= 0)
248                 cwndleft = 0;
249
250         if(cwndleft < left)
251                 left = cwndleft;
252
253         if(!left && !sendatleastone)
254                 return;
255
256         struct {
257                 struct hdr hdr;
258                 char data[c->utcp->mtu];
259         } pkt;
260
261         pkt.hdr.src = c->src;
262         pkt.hdr.dst = c->dst;
263         pkt.hdr.ack = c->rcv.nxt;
264         pkt.hdr.wnd = c->snd.wnd;
265         pkt.hdr.ctl = ACK;
266         pkt.hdr.aux = 0;
267
268         do {
269                 uint32_t seglen = left > c->utcp->mtu ? c->utcp->mtu : left;
270                 pkt.hdr.seq = c->snd.nxt;
271
272                 memcpy(pkt.data, data, seglen);
273
274                 c->snd.nxt += seglen;
275                 data += seglen;
276                 left -= seglen;
277
278                 if(c->state != ESTABLISHED && !left && seglen) {
279                         switch(c->state) {
280                         case FIN_WAIT_1:
281                         case CLOSING:
282                                 seglen--;
283                                 pkt.hdr.ctl |= FIN;
284                                 break;
285                         default:
286                                 break;
287                         }
288                 }
289
290                 print_packet(c->utcp, "send", &pkt, sizeof pkt.hdr + seglen);
291                 c->utcp->send(c->utcp, &pkt, sizeof pkt.hdr + seglen);
292         } while(left);
293 }
294
295 ssize_t utcp_send(struct utcp_connection *c, const void *data, size_t len) {
296         if(c->reapable) {
297                 debug("Error: send() called on closed connection %p\n", c);
298                 errno = EBADF;
299                 return -1;
300         }
301
302         switch(c->state) {
303         case CLOSED:
304         case LISTEN:
305         case SYN_SENT:
306         case SYN_RECEIVED:
307                 debug("Error: send() called on unconnected connection %p\n", c);
308                 errno = ENOTCONN;
309                 return -1;
310         case ESTABLISHED:
311         case CLOSE_WAIT:
312                 break;
313         case FIN_WAIT_1:
314         case FIN_WAIT_2:
315         case CLOSING:
316         case LAST_ACK:
317         case TIME_WAIT:
318                 debug("Error: send() called on closing connection %p\n", c);
319                 errno = EPIPE;
320                 return -1;
321         }
322
323         // Add data to send buffer
324
325         if(!len)
326                 return 0;
327
328         if(!data) {
329                 errno = EFAULT;
330                 return -1;
331         }
332
333         uint32_t bufused = seqdiff(c->snd.nxt, c->snd.una);
334
335         /* Check our send buffer.
336          * - If it's big enough, just put the data in there.
337          * - If not, decide whether to enlarge if possible.
338          * - Cap len so it doesn't overflow our buffer.
339          */
340
341         if(len > c->sndbufsize - bufused && c->sndbufsize < c->maxsndbufsize) {
342                 uint32_t newbufsize;
343                 if(c->sndbufsize > c->maxsndbufsize / 2)
344                         newbufsize = c->maxsndbufsize;
345                 else
346                         newbufsize = c->sndbufsize * 2;
347                 if(bufused + len > newbufsize) {
348                         if(bufused + len > c->maxsndbufsize)
349                                 newbufsize = c->maxsndbufsize;
350                         else
351                                 newbufsize = bufused + len;
352                 }
353                 char *newbuf = realloc(c->sndbuf, newbufsize);
354                 if(newbuf) {
355                         c->sndbuf = newbuf;
356                         c->sndbufsize = newbufsize;
357                 }
358         }
359
360         if(len > c->sndbufsize - bufused)
361                 len = c->sndbufsize - bufused;
362
363         if(!len) {
364                 errno == EWOULDBLOCK;
365                 return 0;
366         }
367
368         memcpy(c->sndbuf + bufused, data, len);
369         c->snd.last += len;
370
371         ack(c, false);
372         return len;
373 }
374
375 static void swap_ports(struct hdr *hdr) {
376         uint16_t tmp = hdr->src;
377         hdr->src = hdr->dst;
378         hdr->dst = tmp;
379 }
380
381 ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) {
382         if(!utcp) {
383                 errno = EFAULT;
384                 return -1;
385         }
386
387         if(!len)
388                 return 0;
389
390         if(!data) {
391                 errno = EFAULT;
392                 return -1;
393         }
394
395         print_packet(utcp, "recv", data, len);
396
397         // Drop packets smaller than the header
398
399         struct hdr hdr;
400         if(len < sizeof hdr) {
401                 errno = EBADMSG;
402                 return -1;
403         }
404
405         // Make a copy from the potentially unaligned data to a struct hdr
406
407         memcpy(&hdr, data, sizeof hdr);
408         data += sizeof hdr;
409         len -= sizeof hdr;
410
411         // Drop packets with an unknown CTL flag
412
413         if(hdr.ctl & ~(SYN | ACK | RST | FIN)) {
414                 errno = EBADMSG;
415                 return -1;
416         }
417
418         // Try to match the packet to an existing connection
419
420         struct utcp_connection *c = find_connection(utcp, hdr.dst, hdr.src);
421
422         // Is it for a new connection?
423
424         if(!c) {
425                 // Ignore RST packets
426
427                 if(hdr.ctl & RST)
428                         return 0;
429
430                 // Is it a SYN packet and are we LISTENing?
431
432                 if(hdr.ctl & SYN && !(hdr.ctl & ACK) && utcp->accept) {
433                         // If we don't want to accept it, send a RST back
434                         if((utcp->pre_accept && !utcp->pre_accept(utcp, hdr.dst))) {
435                                 len = 1;
436                                 goto reset;
437                         }
438
439                         // Try to allocate memory, otherwise send a RST back
440                         c = allocate_connection(utcp, hdr.dst, hdr.src);
441                         if(!c) {
442                                 len = 1;
443                                 goto reset;
444                         }
445
446                         // Return SYN+ACK, go to SYN_RECEIVED state
447                         c->snd.wnd = hdr.wnd;
448                         c->rcv.irs = hdr.seq;
449                         c->rcv.nxt = c->rcv.irs + 1;
450                         set_state(c, SYN_RECEIVED);
451
452                         hdr.dst = c->dst;
453                         hdr.src = c->src;
454                         hdr.ack = c->rcv.irs + 1;
455                         hdr.seq = c->snd.iss;
456                         hdr.ctl = SYN | ACK;
457                         print_packet(c->utcp, "send", &hdr, sizeof hdr);
458                         utcp->send(utcp, &hdr, sizeof hdr);
459                 } else {
460                         // No, we don't want your packets, send a RST back
461                         len = 1;
462                         goto reset;
463                 }
464
465                 return 0;
466         }
467
468         debug("%p state %s\n", c->utcp, strstate[c->state]);
469
470         // In case this is for a CLOSED connection, ignore the packet.
471         // TODO: make it so incoming packets can never match a CLOSED connection.
472
473         if(c->state == CLOSED)
474                 return 0;
475
476         // It is for an existing connection.
477
478         // 1. Drop invalid packets.
479
480         // 1a. Drop packets that should not happen in our current state.
481
482         switch(c->state) {
483         case SYN_SENT:
484         case SYN_RECEIVED:
485         case ESTABLISHED:
486         case FIN_WAIT_1:
487         case FIN_WAIT_2:
488         case CLOSE_WAIT:
489         case CLOSING:
490         case LAST_ACK:
491         case TIME_WAIT:
492                 break;
493         default:
494                 abort();
495         }
496
497         // 1b. Drop packets with a sequence number not in our receive window.
498
499         bool acceptable;
500
501         if(c->state == SYN_SENT)
502                 acceptable = true;
503
504         // TODO: handle packets overlapping c->rcv.nxt.
505 #if 0
506         // Only use this when accepting out-of-order packets.
507         else if(len == 0)
508                 if(c->rcv.wnd == 0)
509                         acceptable = hdr.seq == c->rcv.nxt;
510                 else
511                         acceptable = (seqdiff(hdr.seq, c->rcv.nxt) >= 0 && seqdiff(hdr.seq, c->rcv.nxt + c->rcv.wnd) < 0);
512         else
513                 if(c->rcv.wnd == 0)
514                         // We don't accept data when the receive window is zero.
515                         acceptable = false;
516                 else
517                         // Both start and end of packet must be within the receive window
518                         acceptable = (seqdiff(hdr.seq, c->rcv.nxt) >= 0 && seqdiff(hdr.seq, c->rcv.nxt + c->rcv.wnd) < 0)
519                                 || (seqdiff(hdr.seq + len + 1, c->rcv.nxt) >= 0 && seqdiff(hdr.seq + len - 1, c->rcv.nxt + c->rcv.wnd) < 0);
520 #else
521         if(c->state != SYN_SENT)
522                 acceptable = hdr.seq == c->rcv.nxt;
523 #endif
524
525         if(!acceptable) {
526                 debug("Packet not acceptable, %u  <= %u + %zu < %u\n", c->rcv.nxt, hdr.seq, len, c->rcv.nxt + c->rcv.wnd);
527                 // Ignore unacceptable RST packets.
528                 if(hdr.ctl & RST)
529                         return 0;
530                 // Otherwise, send an ACK back in the hope things improve.
531                 goto ack;
532         }
533
534         c->snd.wnd = hdr.wnd; // TODO: move below
535
536         // 1c. Drop packets with an invalid ACK.
537         // ackno should not roll back, and it should also not be bigger than snd.nxt.
538
539         if(hdr.ctl & ACK && (seqdiff(hdr.ack, c->snd.nxt) > 0 || seqdiff(hdr.ack, c->snd.una) < 0)) {
540                 debug("Packet ack seqno out of range, %u %u %u\n", hdr.ack, c->snd.una, c->snd.nxt);
541                 // Ignore unacceptable RST packets.
542                 if(hdr.ctl & RST)
543                         return 0;
544                 goto reset;
545         }
546
547         // 2. Handle RST packets
548
549         if(hdr.ctl & RST) {
550                 switch(c->state) {
551                 case SYN_SENT:
552                         if(!(hdr.ctl & ACK))
553                                 return 0;
554                         // The peer has refused our connection.
555                         set_state(c, CLOSED);
556                         errno = ECONNREFUSED;
557                         if(c->recv)
558                                 c->recv(c, NULL, 0);
559                         return 0;
560                 case SYN_RECEIVED:
561                         if(hdr.ctl & ACK)
562                                 return 0;
563                         // We haven't told the application about this connection yet. Silently delete.
564                         free_connection(c);
565                         return 0;
566                 case ESTABLISHED:
567                 case FIN_WAIT_1:
568                 case FIN_WAIT_2:
569                 case CLOSE_WAIT:
570                         if(hdr.ctl & ACK)
571                                 return 0;
572                         // The peer has aborted our connection.
573                         set_state(c, CLOSED);
574                         errno = ECONNRESET;
575                         if(c->recv)
576                                 c->recv(c, NULL, 0);
577                         return 0;
578                 case CLOSING:
579                 case LAST_ACK:
580                 case TIME_WAIT:
581                         if(hdr.ctl & ACK)
582                                 return 0;
583                         // As far as the application is concerned, the connection has already been closed.
584                         // If it has called utcp_close() already, we can immediately free this connection.
585                         if(c->reapable) {
586                                 free_connection(c);
587                                 return 0;
588                         }
589                         // Otherwise, immediately move to the CLOSED state.
590                         set_state(c, CLOSED);
591                         return 0;
592                 default:
593                         abort();
594                 }
595         }
596
597         // 3. Advance snd.una
598
599         uint32_t advanced = seqdiff(hdr.ack, c->snd.una);
600         uint32_t prevrcvnxt = c->rcv.nxt;
601
602         if(advanced) {
603                 int32_t data_acked = advanced;
604
605                 switch(c->state) {
606                         case SYN_SENT:
607                         case SYN_RECEIVED:
608                                 data_acked--;
609                                 break;
610                         // TODO: handle FIN as well.
611                         default:
612                                 break;
613                 }
614
615                 assert(data_acked >= 0);
616
617                 int32_t bufused = seqdiff(c->snd.last, c->snd.una);
618                 assert(data_acked <= bufused);
619
620                 // Make room in the send buffer.
621                 // TODO: try to avoid memmoving too much. Circular buffer?
622                 uint32_t left = bufused - data_acked;
623                 if(data_acked && left)
624                         memmove(c->sndbuf, c->sndbuf + data_acked, left);
625
626                 c->snd.una = hdr.ack;
627
628                 c->dupack = 0;
629                 c->snd.cwnd += utcp->mtu;
630                 if(c->snd.cwnd > c->maxsndbufsize)
631                         c->snd.cwnd = c->maxsndbufsize;
632
633                 // Check if we have sent a FIN that is now ACKed.
634                 switch(c->state) {
635                 case FIN_WAIT_1:
636                         if(c->snd.una == c->snd.last)
637                                 set_state(c, FIN_WAIT_2);
638                         break;
639                 case CLOSING:
640                         if(c->snd.una == c->snd.last) {
641                                 gettimeofday(&c->conn_timeout, NULL);
642                                 c->conn_timeout.tv_sec += 60;
643                                 set_state(c, TIME_WAIT);
644                         }
645                         break;
646                 default:
647                         break;
648                 }
649         } else {
650                 if(!len) {
651                         c->dupack++;
652                         if(c->dupack >= 3) {
653                                 debug("Triplicate ACK\n");
654                                 //TODO: Resend one packet and go to fast recovery mode. See RFC 6582.
655                                 //abort();
656                         }
657                 }
658         }
659
660         // 4. Update timers
661
662         if(advanced) {
663                 timerclear(&c->conn_timeout); // It will be set anew in utcp_timeout() if c->snd.una != c->snd.nxt.
664                 if(c->snd.una == c->snd.nxt)
665                         timerclear(&c->rtrx_timeout);
666         }
667
668         // 5. Process SYN stuff
669
670         if(hdr.ctl & SYN) {
671                 switch(c->state) {
672                 case SYN_SENT:
673                         // This is a SYNACK. It should always have ACKed the SYN.
674                         if(!advanced)
675                                 goto reset;
676                         c->rcv.irs = hdr.seq;
677                         c->rcv.nxt = hdr.seq;
678                         set_state(c, ESTABLISHED);
679                         // TODO: notify application of this somehow.
680                         break;
681                 case SYN_RECEIVED:
682                 case ESTABLISHED:
683                 case FIN_WAIT_1:
684                 case FIN_WAIT_2:
685                 case CLOSE_WAIT:
686                 case CLOSING:
687                 case LAST_ACK:
688                 case TIME_WAIT:
689                         // Ehm, no. We should never receive a second SYN.
690                         goto reset;
691                 default:
692                         abort();
693                 }
694
695                 // SYN counts as one sequence number
696                 c->rcv.nxt++;
697         }
698
699         // 6. Process new data
700
701         if(c->state == SYN_RECEIVED) {
702                 // This is the ACK after the SYNACK. It should always have ACKed the SYNACK.
703                 if(!advanced)
704                         goto reset;
705
706                 // Are we still LISTENing?
707                 if(utcp->accept)
708                         utcp->accept(c, c->src);
709
710                 if(c->state != ESTABLISHED) {
711                         set_state(c, CLOSED);
712                         c->reapable = true;
713                         goto reset;
714                 }
715         }
716
717         if(len) {
718                 switch(c->state) {
719                 case SYN_SENT:
720                 case SYN_RECEIVED:
721                         // This should never happen.
722                         abort();
723                 case ESTABLISHED:
724                 case FIN_WAIT_1:
725                 case FIN_WAIT_2:
726                         break;
727                 case CLOSE_WAIT:
728                 case CLOSING:
729                 case LAST_ACK:
730                 case TIME_WAIT:
731                         // Ehm no, We should never receive more data after a FIN.
732                         goto reset;
733                 default:
734                         abort();
735                 }
736
737                 ssize_t rxd;
738
739                 if(c->recv) {
740                         rxd = c->recv(c, data, len);
741                         if(rxd != len) {
742                                 // TODO: once we have a receive buffer, handle the application not accepting all data.
743                                 fprintf(stderr, "c->recv(%p, %p, %zu) returned %zd\n", c, data, len, rxd);
744                                 abort();
745                         }
746                         if(rxd < 0)
747                                 rxd = 0;
748                         else if(rxd > len)
749                                 rxd = len; // Bad application, bad!
750                 } else {
751                         rxd = len;
752                 }
753
754                 c->rcv.nxt += len;
755         }
756
757         // 7. Process FIN stuff
758
759         if(hdr.ctl & FIN) {
760                 switch(c->state) {
761                 case SYN_SENT:
762                 case SYN_RECEIVED:
763                         // This should never happen.
764                         abort();
765                 case ESTABLISHED:
766                         set_state(c, CLOSE_WAIT);
767                         break;
768                 case FIN_WAIT_1:
769                         set_state(c, CLOSING);
770                         break;
771                 case FIN_WAIT_2:
772                         gettimeofday(&c->conn_timeout, NULL);
773                         c->conn_timeout.tv_sec += 60;
774                         set_state(c, TIME_WAIT);
775                         break;
776                 case CLOSE_WAIT:
777                 case CLOSING:
778                 case LAST_ACK:
779                 case TIME_WAIT:
780                         // Ehm, no. We should never receive a second FIN.
781                         goto reset;
782                 default:
783                         abort();
784                 }
785
786                 // FIN counts as one sequence number
787                 c->rcv.nxt++;
788                 len++;
789
790                 // Inform the application that the peer closed the connection.
791                 if(c->recv) {
792                         errno = 0;
793                         c->recv(c, NULL, 0);
794                 }
795         }
796
797         // Now we send something back if:
798         // - we advanced rcv.nxt (ie, we got some data that needs to be ACKed)
799         //   -> sendatleastone = true
800         // - or we got an ack, so we should maybe send a bit more data
801         //   -> sendatleastone = false
802
803 ack:
804         ack(c, prevrcvnxt != c->rcv.nxt);
805         return 0;
806
807 reset:
808         swap_ports(&hdr);
809         hdr.wnd = 0;
810         if(hdr.ctl & ACK) {
811                 hdr.seq = hdr.ack;
812                 hdr.ctl = RST;
813         } else {
814                 hdr.ack = hdr.seq + len;
815                 hdr.seq = 0;
816                 hdr.ctl = RST | ACK;
817         }
818         print_packet(utcp, "send", &hdr, sizeof hdr);
819         utcp->send(utcp, &hdr, sizeof hdr);
820         return 0;
821
822 }
823
824 int utcp_shutdown(struct utcp_connection *c, int dir) {
825         debug("%p shutdown %d\n", c ? c->utcp : NULL, dir);
826         if(!c) {
827                 errno = EFAULT;
828                 return -1;
829         }
830
831         if(c->reapable) {
832                 debug("Error: shutdown() called on closed connection %p\n", c);
833                 errno = EBADF;
834                 return -1;
835         }
836
837         // TODO: handle dir
838
839         switch(c->state) {
840         case CLOSED:
841                 return 0;
842         case LISTEN:
843         case SYN_SENT:
844                 set_state(c, CLOSED);
845                 return 0;
846
847         case SYN_RECEIVED:
848         case ESTABLISHED:
849                 set_state(c, FIN_WAIT_1);
850                 break;
851         case FIN_WAIT_1:
852         case FIN_WAIT_2:
853                 return 0;
854         case CLOSE_WAIT:
855                 set_state(c, CLOSING);
856                 break;
857
858         case CLOSING:
859         case LAST_ACK:
860         case TIME_WAIT:
861                 return 0;
862         }
863
864         c->snd.last++;
865
866         ack(c, false);
867         return 0;
868 }
869
870 int utcp_close(struct utcp_connection *c) {
871         if(utcp_shutdown(c, SHUT_RDWR))
872                 return -1;
873         c->reapable = true;
874         return 0;
875 }
876
877 int utcp_abort(struct utcp_connection *c) {
878         if(!c) {
879                 errno = EFAULT;
880                 return -1;
881         }
882
883         if(c->reapable) {
884                 debug("Error: abort() called on closed connection %p\n", c);
885                 errno = EBADF;
886                 return -1;
887         }
888
889         c->reapable = true;
890
891         switch(c->state) {
892         case CLOSED:
893                 return 0;
894         case LISTEN:
895         case SYN_SENT:
896         case CLOSING:
897         case LAST_ACK:
898         case TIME_WAIT:
899                 set_state(c, CLOSED);
900                 return 0;
901
902         case SYN_RECEIVED:
903         case ESTABLISHED:
904         case FIN_WAIT_1:
905         case FIN_WAIT_2:
906         case CLOSE_WAIT:
907                 set_state(c, CLOSED);
908                 break;
909         }
910
911         // Send RST
912
913         struct hdr hdr;
914
915         hdr.src = c->src;
916         hdr.dst = c->dst;
917         hdr.seq = c->snd.nxt;
918         hdr.ack = 0;
919         hdr.wnd = 0;
920         hdr.ctl = RST;
921
922         print_packet(c->utcp, "send", &hdr, sizeof hdr);
923         c->utcp->send(c->utcp, &hdr, sizeof hdr);
924         return 0;
925 }
926
927 static void retransmit(struct utcp_connection *c) {
928         if(c->state == CLOSED || c->snd.nxt == c->snd.una)
929                 return;
930
931         struct utcp *utcp = c->utcp;
932
933         struct {
934                 struct hdr hdr;
935                 char data[c->utcp->mtu];
936         } pkt;
937
938         pkt.hdr.src = c->src;
939         pkt.hdr.dst = c->dst;
940
941         switch(c->state) {
942                 case LISTEN:
943                         // TODO: this should not happen
944                         break;
945
946                 case SYN_SENT:
947                         pkt.hdr.seq = c->snd.iss;
948                         pkt.hdr.ack = 0;
949                         pkt.hdr.wnd = c->rcv.wnd;
950                         pkt.hdr.ctl = SYN;
951                         print_packet(c->utcp, "rtrx", &pkt, sizeof pkt.hdr);
952                         utcp->send(utcp, &pkt, sizeof pkt.hdr);
953                         break;
954
955                 case SYN_RECEIVED:
956                         pkt.hdr.seq = c->snd.nxt;
957                         pkt.hdr.ack = c->rcv.nxt;
958                         pkt.hdr.ctl = SYN | ACK;
959                         print_packet(c->utcp, "rtrx", &pkt, sizeof pkt.hdr);
960                         utcp->send(utcp, &pkt, sizeof pkt.hdr);
961                         break;
962
963                 case ESTABLISHED:
964                 case FIN_WAIT_1:
965                         pkt.hdr.seq = c->snd.una;
966                         pkt.hdr.ack = c->rcv.nxt;
967                         pkt.hdr.ctl = ACK;
968                         uint32_t len = seqdiff(c->snd.nxt, c->snd.una);
969                         if(c->state == FIN_WAIT_1)
970                                 len--;
971                         if(len > utcp->mtu)
972                                 len = utcp->mtu;
973                         else {
974                                 if(c->state == FIN_WAIT_1)
975                                         pkt.hdr.ctl |= FIN;
976                         }
977                         memcpy(pkt.data, c->sndbuf, len);
978                         print_packet(c->utcp, "rtrx", &pkt, sizeof pkt.hdr + len);
979                         utcp->send(utcp, &pkt, sizeof pkt.hdr + len);
980                         break;
981
982                 default:
983                         // TODO: implement
984                         abort();
985         }
986 }
987
988 /* Handle timeouts.
989  * One call to this function will loop through all connections,
990  * checking if something needs to be resent or not.
991  * The return value is the time to the next timeout in milliseconds,
992  * or maybe a negative value if the timeout is infinite.
993  */
994 int utcp_timeout(struct utcp *utcp) {
995         struct timeval now;
996         gettimeofday(&now, NULL);
997         struct timeval next = {now.tv_sec + 3600, now.tv_usec};
998
999         for(int i = 0; i < utcp->nconnections; i++) {
1000                 struct utcp_connection *c = utcp->connections[i];
1001                 if(!c)
1002                         continue;
1003
1004                 if(c->state == CLOSED) {
1005                         if(c->reapable) {
1006                                 debug("Reaping %p\n", c);
1007                                 free_connection(c);
1008                                 i--;
1009                         }
1010                         continue;
1011                 }
1012
1013                 if(timerisset(&c->conn_timeout) && timercmp(&c->conn_timeout, &now, <)) {
1014                         errno = ETIMEDOUT;
1015                         c->state = CLOSED;
1016                         if(c->recv)
1017                                 c->recv(c, NULL, 0);
1018                         continue;
1019                 }
1020
1021                 if(timerisset(&c->rtrx_timeout) && timercmp(&c->rtrx_timeout, &now, <)) {
1022                         retransmit(c);
1023                 }
1024
1025                 if(c->poll && c->sndbufsize < c->maxsndbufsize / 2)
1026                         c->poll(c, c->maxsndbufsize - c->sndbufsize);
1027
1028                 if(timerisset(&c->conn_timeout) && timercmp(&c->conn_timeout, &next, <))
1029                         next = c->conn_timeout;
1030
1031                 if(c->snd.nxt != c->snd.una) {
1032                         c->rtrx_timeout = now;
1033                         c->rtrx_timeout.tv_sec++;
1034                 } else {
1035                         timerclear(&c->rtrx_timeout);
1036                 }
1037
1038                 if(timerisset(&c->rtrx_timeout) && timercmp(&c->rtrx_timeout, &next, <))
1039                         next = c->rtrx_timeout;
1040         }
1041
1042         struct timeval diff;
1043         timersub(&next, &now, &diff);
1044         if(diff.tv_sec < 0)
1045                 return 0;
1046         return diff.tv_sec * 1000 + diff.tv_usec / 1000;
1047 }
1048
1049 struct utcp *utcp_init(utcp_accept_t accept, utcp_pre_accept_t pre_accept, utcp_send_t send, void *priv) {
1050         struct utcp *utcp = calloc(1, sizeof *utcp);
1051         if(!utcp)
1052                 return NULL;
1053
1054         if(!send) {
1055                 errno = EFAULT;
1056                 return NULL;
1057         }
1058
1059         utcp->accept = accept;
1060         utcp->pre_accept = pre_accept;
1061         utcp->send = send;
1062         utcp->priv = priv;
1063         utcp->mtu = 1000;
1064         utcp->timeout = 60;
1065
1066         return utcp;
1067 }
1068
1069 void utcp_exit(struct utcp *utcp) {
1070         if(!utcp)
1071                 return;
1072         for(int i = 0; i < utcp->nconnections; i++) {
1073                 if(!utcp->connections[i]->reapable)
1074                         debug("Warning, freeing unclosed connection %p\n", utcp->connections[i]);
1075                 free(utcp->connections[i]->sndbuf);
1076                 free(utcp->connections[i]);
1077         }
1078         free(utcp->connections);
1079         free(utcp);
1080 }
1081
1082 uint16_t utcp_get_mtu(struct utcp *utcp) {
1083         return utcp->mtu;
1084 }
1085
1086 void utcp_set_mtu(struct utcp *utcp, uint16_t mtu) {
1087         // TODO: handle overhead of the header
1088         utcp->mtu = mtu;
1089 }
1090
1091 int utcp_get_user_timeout(struct utcp *u) {
1092         return u->timeout;
1093 }
1094
1095 void utcp_set_user_timeout(struct utcp *u, int timeout) {
1096         u->timeout = timeout;
1097 }
1098
1099 size_t utcp_get_sndbuf(struct utcp_connection *c) {
1100         return c->maxsndbufsize;
1101 }
1102
1103 size_t utcp_get_sndbuf_free(struct utcp_connection *c) {
1104         return c->maxsndbufsize - c->sndbufsize;
1105 }
1106
1107 void utcp_set_sndbuf(struct utcp_connection *c, size_t size) {
1108         c->maxsndbufsize = size;
1109         if(c->maxsndbufsize != size)
1110                 c->maxsndbufsize = -1;
1111 }
1112
1113 bool utcp_get_nodelay(struct utcp_connection *c) {
1114         return c->nodelay;
1115 }
1116
1117 void utcp_set_nodelay(struct utcp_connection *c, bool nodelay) {
1118         c->nodelay = nodelay;
1119 }
1120
1121 bool utcp_get_keepalive(struct utcp_connection *c) {
1122         return c->keepalive;
1123 }
1124
1125 void utcp_set_keepalive(struct utcp_connection *c, bool keepalive) {
1126         c->keepalive = keepalive;
1127 }
1128
1129 size_t utcp_get_outq(struct utcp_connection *c) {
1130         return seqdiff(c->snd.nxt, c->snd.una);
1131 }
1132
1133 void utcp_set_recv_cb(struct utcp_connection *c, utcp_recv_t recv) {
1134         c->recv = recv;
1135 }
1136
1137 void utcp_set_poll_cb(struct utcp_connection *c, utcp_poll_t poll) {
1138         c->poll = poll;
1139 }