]> git.meshlink.io Git - utcp/blob - utcp.c
Implement slow start.
[utcp] / utcp.c
1 /*
2     utcp.c -- Userspace TCP
3     Copyright (C) 2014 Guus Sliepen <guus@tinc-vpn.org>
4
5     This program is free software; you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation; either version 2 of the License, or
8     (at your option) any later version.
9
10     This program is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14
15     You should have received a copy of the GNU General Public License along
16     with this program; if not, write to the Free Software Foundation, Inc.,
17     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #define _GNU_SOURCE
21
22 #include <assert.h>
23 #include <errno.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <stdint.h>
27 #include <stdbool.h>
28 #include <string.h>
29 #include <unistd.h>
30 #include <sys/time.h>
31 #include <sys/socket.h>
32
33 #include "utcp_priv.h"
34
35 #ifdef UTCP_DEBUG
36 #include <stdarg.h>
37
38 static void debug(const char *format, ...) {
39         va_list ap;
40         va_start(ap, format);
41         vfprintf(stderr, format, ap);
42         va_end(ap);
43 }
44
45 static void print_packet(struct utcp *utcp, const char *dir, const void *pkt, size_t len) {
46         struct hdr hdr;
47         if(len < sizeof hdr) {
48                 debug("%p %s: short packet (%zu bytes)\n", utcp, dir, len);
49                 return;
50         }
51
52         memcpy(&hdr, pkt, sizeof hdr);
53         fprintf (stderr, "%p %s: len=%zu, src=%u dst=%u seq=%u ack=%u wnd=%u ctl=", utcp, dir, len, hdr.src, hdr.dst, hdr.seq, hdr.ack, hdr.wnd);
54         if(hdr.ctl & SYN)
55                 debug("SYN");
56         if(hdr.ctl & RST)
57                 debug("RST");
58         if(hdr.ctl & FIN)
59                 debug("FIN");
60         if(hdr.ctl & ACK)
61                 debug("ACK");
62
63         if(len > sizeof hdr) {
64                 debug(" data=");
65                 for(int i = sizeof hdr; i < len; i++) {
66                         const char *data = pkt;
67                         debug("%c", data[i] >= 32 ? data[i] : '.');
68                 }
69         }
70
71         debug("\n");
72 }
73 #else
74 #define debug(...)
75 #define print_packet(...)
76 #endif
77
78 static void set_state(struct utcp_connection *c, enum state state) {
79         c->state = state;
80         if(state == ESTABLISHED)
81                 timerclear(&c->conn_timeout);
82         debug("%p new state: %s\n", c->utcp, strstate[state]);
83 }
84
85 static inline void list_connections(struct utcp *utcp) {
86         debug("%p has %d connections:\n", utcp, utcp->nconnections);
87         for(int i = 0; i < utcp->nconnections; i++)
88                 debug("  %u -> %u state %s\n", utcp->connections[i]->src, utcp->connections[i]->dst, strstate[utcp->connections[i]->state]);
89 }
90
91 static int32_t seqdiff(uint32_t a, uint32_t b) {
92         return a - b;
93 }
94
95 // Connections are stored in a sorted list.
96 // This gives O(log(N)) lookup time, O(N log(N)) insertion time and O(N) deletion time.
97
98 static int compare(const void *va, const void *vb) {
99         const struct utcp_connection *a = *(struct utcp_connection **)va;
100         const struct utcp_connection *b = *(struct utcp_connection **)vb;
101
102         assert(a->src && b->src);
103
104         int c = (int)a->src - (int)b->src;
105         if(c)
106                 return c;
107         c = (int)a->dst - (int)b->dst;
108         return c;
109 }
110
111 static struct utcp_connection *find_connection(const struct utcp *utcp, uint16_t src, uint16_t dst) {
112         if(!utcp->nconnections)
113                 return NULL;
114         struct utcp_connection key = {
115                 .src = src,
116                 .dst = dst,
117         }, *keyp = &key;
118         struct utcp_connection **match = bsearch(&keyp, utcp->connections, utcp->nconnections, sizeof *utcp->connections, compare);
119         return match ? *match : NULL;
120 }
121
122 static void free_connection(struct utcp_connection *c) {
123         struct utcp *utcp = c->utcp;
124         struct utcp_connection **cp = bsearch(&c, utcp->connections, utcp->nconnections, sizeof *utcp->connections, compare);
125
126         assert(cp);
127
128         int i = cp - utcp->connections;
129         memmove(cp + i, cp + i + 1, (utcp->nconnections - i - 1) * sizeof *cp);
130         utcp->nconnections--;
131
132         free(c->sndbuf);
133         free(c);
134 }
135
136 static struct utcp_connection *allocate_connection(struct utcp *utcp, uint16_t src, uint16_t dst) {
137         // Check whether this combination of src and dst is free
138
139         if(src) {
140                 if(find_connection(utcp, src, dst)) {
141                         errno = EADDRINUSE;
142                         return NULL;
143                 }
144         } else { // If src == 0, generate a random port number with the high bit set
145                 if(utcp->nconnections >= 32767) {
146                         errno = ENOMEM;
147                         return NULL;
148                 }
149                 src = rand() | 0x8000;
150                 while(find_connection(utcp, src, dst))
151                         src++;
152         }
153
154         // Allocate memory for the new connection
155
156         if(utcp->nconnections >= utcp->nallocated) {
157                 if(!utcp->nallocated)
158                         utcp->nallocated = 4;
159                 else
160                         utcp->nallocated *= 2;
161                 struct utcp_connection **new_array = realloc(utcp->connections, utcp->nallocated * sizeof *utcp->connections);
162                 if(!new_array)
163                         return NULL;
164                 utcp->connections = new_array;
165         }
166
167         struct utcp_connection *c = calloc(1, sizeof *c);
168         if(!c)
169                 return NULL;
170
171         c->sndbufsize = DEFAULT_SNDBUFSIZE;
172         c->maxsndbufsize = DEFAULT_MAXSNDBUFSIZE;
173         c->sndbuf = malloc(c->sndbufsize);
174         if(!c->sndbuf) {
175                 free(c);
176                 return NULL;
177         }
178
179         // Fill in the details
180
181         c->src = src;
182         c->dst = dst;
183         c->snd.iss = rand();
184         c->snd.una = c->snd.iss;
185         c->snd.nxt = c->snd.iss + 1;
186         c->rcv.wnd = utcp->mtu;
187         c->snd.last = c->snd.nxt;
188         c->snd.cwnd = utcp->mtu;
189         c->utcp = utcp;
190
191         // Add it to the sorted list of connections
192
193         utcp->connections[utcp->nconnections++] = c;
194         qsort(utcp->connections, utcp->nconnections, sizeof *utcp->connections, compare);
195
196         return c;
197 }
198
199 struct utcp_connection *utcp_connect(struct utcp *utcp, uint16_t dst, utcp_recv_t recv, void *priv) {
200         struct utcp_connection *c = allocate_connection(utcp, 0, dst);
201         if(!c)
202                 return NULL;
203
204         c->recv = recv;
205
206         struct hdr hdr;
207
208         hdr.src = c->src;
209         hdr.dst = c->dst;
210         hdr.seq = c->snd.iss;
211         hdr.ack = 0;
212         hdr.wnd = c->rcv.wnd;
213         hdr.ctl = SYN;
214         hdr.aux = 0;
215
216         set_state(c, SYN_SENT);
217
218         print_packet(utcp, "send", &hdr, sizeof hdr);
219         utcp->send(utcp, &hdr, sizeof hdr);
220
221         gettimeofday(&c->conn_timeout, NULL);
222         c->conn_timeout.tv_sec += utcp->timeout;
223
224         return c;
225 }
226
227 void utcp_accept(struct utcp_connection *c, utcp_recv_t recv, void *priv) {
228         if(c->reapable || c->state != SYN_RECEIVED) {
229                 debug("Error: accept() called on invalid connection %p in state %s\n", c, strstate[c->state]);
230                 return;
231         }
232
233         debug("%p accepted, %p %p\n", c, recv, priv);
234         c->recv = recv;
235         c->priv = priv;
236         set_state(c, ESTABLISHED);
237 }
238
239 static void ack(struct utcp_connection *c, bool sendatleastone) {
240         int32_t left = seqdiff(c->snd.last, c->snd.nxt);
241         int32_t cwndleft = c->snd.cwnd - seqdiff(c->snd.nxt, c->snd.una);
242         char *data = c->sndbuf + seqdiff(c->snd.nxt, c->snd.una);
243
244         assert(left >= 0);
245
246         if(cwndleft <= 0)
247                 cwndleft = 0;
248
249         if(cwndleft < left)
250                 left = cwndleft;
251
252         if(!left && !sendatleastone)
253                 return;
254
255         struct {
256                 struct hdr hdr;
257                 char data[c->utcp->mtu];
258         } pkt;
259
260         pkt.hdr.src = c->src;
261         pkt.hdr.dst = c->dst;
262         pkt.hdr.ack = c->rcv.nxt;
263         pkt.hdr.wnd = c->snd.wnd;
264         pkt.hdr.ctl = ACK;
265         pkt.hdr.aux = 0;
266
267         do {
268                 uint32_t seglen = left > c->utcp->mtu ? c->utcp->mtu : left;
269                 pkt.hdr.seq = c->snd.nxt;
270
271                 memcpy(pkt.data, data, seglen);
272
273                 c->snd.nxt += seglen;
274                 data += seglen;
275                 left -= seglen;
276
277                 if(c->state != ESTABLISHED && !left && seglen) {
278                         switch(c->state) {
279                         case FIN_WAIT_1:
280                         case CLOSING:
281                                 seglen--;
282                                 pkt.hdr.ctl |= FIN;
283                                 break;
284                         default:
285                                 break;
286                         }
287                 }
288
289                 print_packet(c->utcp, "send", &pkt, sizeof pkt.hdr + seglen);
290                 c->utcp->send(c->utcp, &pkt, sizeof pkt.hdr + seglen);
291         } while(left);
292 }
293
294 ssize_t utcp_send(struct utcp_connection *c, const void *data, size_t len) {
295         if(c->reapable) {
296                 debug("Error: send() called on closed connection %p\n", c);
297                 errno = EBADF;
298                 return -1;
299         }
300
301         switch(c->state) {
302         case CLOSED:
303         case LISTEN:
304         case SYN_SENT:
305         case SYN_RECEIVED:
306                 debug("Error: send() called on unconnected connection %p\n", c);
307                 errno = ENOTCONN;
308                 return -1;
309         case ESTABLISHED:
310         case CLOSE_WAIT:
311                 break;
312         case FIN_WAIT_1:
313         case FIN_WAIT_2:
314         case CLOSING:
315         case LAST_ACK:
316         case TIME_WAIT:
317                 debug("Error: send() called on closing connection %p\n", c);
318                 errno = EPIPE;
319                 return -1;
320         }
321
322         // Add data to send buffer
323
324         if(!len)
325                 return 0;
326
327         if(!data) {
328                 errno = EFAULT;
329                 return -1;
330         }
331
332         uint32_t bufused = seqdiff(c->snd.nxt, c->snd.una);
333
334         /* Check our send buffer.
335          * - If it's big enough, just put the data in there.
336          * - If not, decide whether to enlarge if possible.
337          * - Cap len so it doesn't overflow our buffer.
338          */
339
340         if(len > c->sndbufsize - bufused && c->sndbufsize < c->maxsndbufsize) {
341                 uint32_t newbufsize;
342                 if(c->sndbufsize > c->maxsndbufsize / 2)
343                         newbufsize = c->maxsndbufsize;
344                 else
345                         newbufsize = c->sndbufsize * 2;
346                 if(bufused + len > newbufsize) {
347                         if(bufused + len > c->maxsndbufsize)
348                                 newbufsize = c->maxsndbufsize;
349                         else
350                                 newbufsize = bufused + len;
351                 }
352                 char *newbuf = realloc(c->sndbuf, newbufsize);
353                 if(newbuf) {
354                         c->sndbuf = newbuf;
355                         c->sndbufsize = newbufsize;
356                 }
357         }
358
359         if(len > c->sndbufsize - bufused)
360                 len = c->sndbufsize - bufused;
361
362         if(!len) {
363                 errno == EWOULDBLOCK;
364                 return 0;
365         }
366
367         memcpy(c->sndbuf + bufused, data, len);
368         c->snd.last += len;
369
370         ack(c, false);
371         return len;
372 }
373
374 static void swap_ports(struct hdr *hdr) {
375         uint16_t tmp = hdr->src;
376         hdr->src = hdr->dst;
377         hdr->dst = tmp;
378 }
379
380 ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) {
381         if(!utcp) {
382                 errno = EFAULT;
383                 return -1;
384         }
385
386         if(!len)
387                 return 0;
388
389         if(!data) {
390                 errno = EFAULT;
391                 return -1;
392         }
393
394         print_packet(utcp, "recv", data, len);
395
396         // Drop packets smaller than the header
397
398         struct hdr hdr;
399         if(len < sizeof hdr) {
400                 errno = EBADMSG;
401                 return -1;
402         }
403
404         // Make a copy from the potentially unaligned data to a struct hdr
405
406         memcpy(&hdr, data, sizeof hdr);
407         data += sizeof hdr;
408         len -= sizeof hdr;
409
410         // Drop packets with an unknown CTL flag
411
412         if(hdr.ctl & ~(SYN | ACK | RST | FIN)) {
413                 errno = EBADMSG;
414                 return -1;
415         }
416
417         // Try to match the packet to an existing connection
418
419         struct utcp_connection *c = find_connection(utcp, hdr.dst, hdr.src);
420
421         // Is it for a new connection?
422
423         if(!c) {
424                 // Ignore RST packets
425
426                 if(hdr.ctl & RST)
427                         return 0;
428
429                 // Is it a SYN packet and are we LISTENing?
430
431                 if(hdr.ctl & SYN && !(hdr.ctl & ACK) && utcp->accept) {
432                         // If we don't want to accept it, send a RST back
433                         if((utcp->pre_accept && !utcp->pre_accept(utcp, hdr.dst))) {
434                                 len = 1;
435                                 goto reset;
436                         }
437
438                         // Try to allocate memory, otherwise send a RST back
439                         c = allocate_connection(utcp, hdr.dst, hdr.src);
440                         if(!c) {
441                                 len = 1;
442                                 goto reset;
443                         }
444
445                         // Return SYN+ACK, go to SYN_RECEIVED state
446                         c->snd.wnd = hdr.wnd;
447                         c->rcv.irs = hdr.seq;
448                         c->rcv.nxt = c->rcv.irs + 1;
449                         set_state(c, SYN_RECEIVED);
450
451                         hdr.dst = c->dst;
452                         hdr.src = c->src;
453                         hdr.ack = c->rcv.irs + 1;
454                         hdr.seq = c->snd.iss;
455                         hdr.ctl = SYN | ACK;
456                         print_packet(c->utcp, "send", &hdr, sizeof hdr);
457                         utcp->send(utcp, &hdr, sizeof hdr);
458                 } else {
459                         // No, we don't want your packets, send a RST back
460                         len = 1;
461                         goto reset;
462                 }
463
464                 return 0;
465         }
466
467         debug("%p state %s\n", c->utcp, strstate[c->state]);
468
469         // In case this is for a CLOSED connection, ignore the packet.
470         // TODO: make it so incoming packets can never match a CLOSED connection.
471
472         if(c->state == CLOSED)
473                 return 0;
474
475         // It is for an existing connection.
476
477         // 1. Drop invalid packets.
478
479         // 1a. Drop packets that should not happen in our current state.
480
481         switch(c->state) {
482         case SYN_SENT:
483         case SYN_RECEIVED:
484         case ESTABLISHED:
485         case FIN_WAIT_1:
486         case FIN_WAIT_2:
487         case CLOSE_WAIT:
488         case CLOSING:
489         case LAST_ACK:
490         case TIME_WAIT:
491                 break;
492         default:
493                 abort();
494         }
495
496         // 1b. Drop packets with a sequence number not in our receive window.
497
498         bool acceptable;
499
500         if(c->state == SYN_SENT)
501                 acceptable = true;
502
503         // TODO: handle packets overlapping c->rcv.nxt.
504 #if 0
505         // Only use this when accepting out-of-order packets.
506         else if(len == 0)
507                 if(c->rcv.wnd == 0)
508                         acceptable = hdr.seq == c->rcv.nxt;
509                 else
510                         acceptable = (seqdiff(hdr.seq, c->rcv.nxt) >= 0 && seqdiff(hdr.seq, c->rcv.nxt + c->rcv.wnd) < 0);
511         else
512                 if(c->rcv.wnd == 0)
513                         // We don't accept data when the receive window is zero.
514                         acceptable = false;
515                 else
516                         // Both start and end of packet must be within the receive window
517                         acceptable = (seqdiff(hdr.seq, c->rcv.nxt) >= 0 && seqdiff(hdr.seq, c->rcv.nxt + c->rcv.wnd) < 0)
518                                 || (seqdiff(hdr.seq + len + 1, c->rcv.nxt) >= 0 && seqdiff(hdr.seq + len - 1, c->rcv.nxt + c->rcv.wnd) < 0);
519 #else
520         if(c->state != SYN_SENT)
521                 acceptable = hdr.seq == c->rcv.nxt;
522 #endif
523
524         if(!acceptable) {
525                 debug("Packet not acceptable, %u  <= %u + %zu < %u\n", c->rcv.nxt, hdr.seq, len, c->rcv.nxt + c->rcv.wnd);
526                 // Ignore unacceptable RST packets.
527                 if(hdr.ctl & RST)
528                         return 0;
529                 // Otherwise, send an ACK back in the hope things improve.
530                 goto ack;
531         }
532
533         c->snd.wnd = hdr.wnd; // TODO: move below
534
535         // 1c. Drop packets with an invalid ACK.
536         // ackno should not roll back, and it should also not be bigger than snd.nxt.
537
538         if(hdr.ctl & ACK && (seqdiff(hdr.ack, c->snd.nxt) > 0 || seqdiff(hdr.ack, c->snd.una) < 0)) {
539                 debug("Packet ack seqno out of range, %u %u %u\n", hdr.ack, c->snd.una, c->snd.nxt);
540                 // Ignore unacceptable RST packets.
541                 if(hdr.ctl & RST)
542                         return 0;
543                 goto reset;
544         }
545
546         // 2. Handle RST packets
547
548         if(hdr.ctl & RST) {
549                 switch(c->state) {
550                 case SYN_SENT:
551                         if(!(hdr.ctl & ACK))
552                                 return 0;
553                         // The peer has refused our connection.
554                         set_state(c, CLOSED);
555                         errno = ECONNREFUSED;
556                         if(c->recv)
557                                 c->recv(c, NULL, 0);
558                         return 0;
559                 case SYN_RECEIVED:
560                         if(hdr.ctl & ACK)
561                                 return 0;
562                         // We haven't told the application about this connection yet. Silently delete.
563                         free_connection(c);
564                         return 0;
565                 case ESTABLISHED:
566                 case FIN_WAIT_1:
567                 case FIN_WAIT_2:
568                 case CLOSE_WAIT:
569                         if(hdr.ctl & ACK)
570                                 return 0;
571                         // The peer has aborted our connection.
572                         set_state(c, CLOSED);
573                         errno = ECONNRESET;
574                         if(c->recv)
575                                 c->recv(c, NULL, 0);
576                         return 0;
577                 case CLOSING:
578                 case LAST_ACK:
579                 case TIME_WAIT:
580                         if(hdr.ctl & ACK)
581                                 return 0;
582                         // As far as the application is concerned, the connection has already been closed.
583                         // If it has called utcp_close() already, we can immediately free this connection.
584                         if(c->reapable) {
585                                 free_connection(c);
586                                 return 0;
587                         }
588                         // Otherwise, immediately move to the CLOSED state.
589                         set_state(c, CLOSED);
590                         return 0;
591                 default:
592                         abort();
593                 }
594         }
595
596         // 3. Advance snd.una
597
598         uint32_t advanced = seqdiff(hdr.ack, c->snd.una);
599         uint32_t prevrcvnxt = c->rcv.nxt;
600
601         if(advanced) {
602                 int32_t data_acked = advanced;
603
604                 switch(c->state) {
605                         case SYN_SENT:
606                         case SYN_RECEIVED:
607                                 data_acked--;
608                                 break;
609                         // TODO: handle FIN as well.
610                         default:
611                                 break;
612                 }
613
614                 assert(data_acked >= 0);
615
616                 int32_t bufused = seqdiff(c->snd.last, c->snd.una);
617                 assert(data_acked <= bufused);
618
619                 // Make room in the send buffer.
620                 // TODO: try to avoid memmoving too much. Circular buffer?
621                 uint32_t left = bufused - data_acked;
622                 if(data_acked && left)
623                         memmove(c->sndbuf, c->sndbuf + data_acked, left);
624
625                 c->snd.una = hdr.ack;
626
627                 c->dupack = 0;
628                 c->snd.cwnd += utcp->mtu;
629                 if(c->snd.cwnd > c->maxsndbufsize)
630                         c->snd.cwnd = c->maxsndbufsize;
631
632                 // Check if we have sent a FIN that is now ACKed.
633                 switch(c->state) {
634                 case FIN_WAIT_1:
635                         if(c->snd.una == c->snd.last)
636                                 set_state(c, FIN_WAIT_2);
637                         break;
638                 case CLOSING:
639                         if(c->snd.una == c->snd.last) {
640                                 gettimeofday(&c->conn_timeout, NULL);
641                                 c->conn_timeout.tv_sec += 60;
642                                 set_state(c, TIME_WAIT);
643                         }
644                         break;
645                 default:
646                         break;
647                 }
648         } else {
649                 if(!len) {
650                         c->dupack++;
651                         if(c->dupack >= 3) {
652                                 debug("Triplicate ACK\n");
653                                 //TODO: Resend one packet and go to fast recovery mode. See RFC 6582.
654                                 //abort();
655                         }
656                 }
657         }
658
659         // 4. Update timers
660
661         if(advanced) {
662                 timerclear(&c->conn_timeout); // It will be set anew in utcp_timeout() if c->snd.una != c->snd.nxt.
663                 if(c->snd.una == c->snd.nxt)
664                         timerclear(&c->rtrx_timeout);
665         }
666
667         // 5. Process SYN stuff
668
669         if(hdr.ctl & SYN) {
670                 switch(c->state) {
671                 case SYN_SENT:
672                         // This is a SYNACK. It should always have ACKed the SYN.
673                         if(!advanced)
674                                 goto reset;
675                         c->rcv.irs = hdr.seq;
676                         c->rcv.nxt = hdr.seq;
677                         set_state(c, ESTABLISHED);
678                         // TODO: notify application of this somehow.
679                         break;
680                 case SYN_RECEIVED:
681                 case ESTABLISHED:
682                 case FIN_WAIT_1:
683                 case FIN_WAIT_2:
684                 case CLOSE_WAIT:
685                 case CLOSING:
686                 case LAST_ACK:
687                 case TIME_WAIT:
688                         // Ehm, no. We should never receive a second SYN.
689                         goto reset;
690                 default:
691                         abort();
692                 }
693
694                 // SYN counts as one sequence number
695                 c->rcv.nxt++;
696         }
697
698         // 6. Process new data
699
700         if(c->state == SYN_RECEIVED) {
701                 // This is the ACK after the SYNACK. It should always have ACKed the SYNACK.
702                 if(!advanced)
703                         goto reset;
704
705                 // Are we still LISTENing?
706                 if(utcp->accept)
707                         utcp->accept(c, c->src);
708
709                 if(c->state != ESTABLISHED) {
710                         set_state(c, CLOSED);
711                         c->reapable = true;
712                         goto reset;
713                 }
714         }
715
716         if(len) {
717                 switch(c->state) {
718                 case SYN_SENT:
719                 case SYN_RECEIVED:
720                         // This should never happen.
721                         abort();
722                 case ESTABLISHED:
723                 case FIN_WAIT_1:
724                 case FIN_WAIT_2:
725                         break;
726                 case CLOSE_WAIT:
727                 case CLOSING:
728                 case LAST_ACK:
729                 case TIME_WAIT:
730                         // Ehm no, We should never receive more data after a FIN.
731                         goto reset;
732                 default:
733                         abort();
734                 }
735
736                 ssize_t rxd;
737
738                 if(c->recv) {
739                         rxd = c->recv(c, data, len);
740                         if(rxd != len) {
741                                 // TODO: once we have a receive buffer, handle the application not accepting all data.
742                                 fprintf(stderr, "c->recv(%p, %p, %zu) returned %zd\n", c, data, len, rxd);
743                                 abort();
744                         }
745                         if(rxd < 0)
746                                 rxd = 0;
747                         else if(rxd > len)
748                                 rxd = len; // Bad application, bad!
749                 } else {
750                         rxd = len;
751                 }
752
753                 c->rcv.nxt += len;
754         }
755
756         // 7. Process FIN stuff
757
758         if(hdr.ctl & FIN) {
759                 switch(c->state) {
760                 case SYN_SENT:
761                 case SYN_RECEIVED:
762                         // This should never happen.
763                         abort();
764                 case ESTABLISHED:
765                         set_state(c, CLOSE_WAIT);
766                         break;
767                 case FIN_WAIT_1:
768                         set_state(c, CLOSING);
769                         break;
770                 case FIN_WAIT_2:
771                         gettimeofday(&c->conn_timeout, NULL);
772                         c->conn_timeout.tv_sec += 60;
773                         set_state(c, TIME_WAIT);
774                         break;
775                 case CLOSE_WAIT:
776                 case CLOSING:
777                 case LAST_ACK:
778                 case TIME_WAIT:
779                         // Ehm, no. We should never receive a second FIN.
780                         goto reset;
781                 default:
782                         abort();
783                 }
784
785                 // FIN counts as one sequence number
786                 c->rcv.nxt++;
787                 len++;
788
789                 // Inform the application that the peer closed the connection.
790                 if(c->recv) {
791                         errno = 0;
792                         c->recv(c, NULL, 0);
793                 }
794         }
795
796         // Now we send something back if:
797         // - we advanced rcv.nxt (ie, we got some data that needs to be ACKed)
798         //   -> sendatleastone = true
799         // - or we got an ack, so we should maybe send a bit more data
800         //   -> sendatleastone = false
801
802 ack:
803         ack(c, prevrcvnxt != c->rcv.nxt);
804         return 0;
805
806 reset:
807         swap_ports(&hdr);
808         hdr.wnd = 0;
809         if(hdr.ctl & ACK) {
810                 hdr.seq = hdr.ack;
811                 hdr.ctl = RST;
812         } else {
813                 hdr.ack = hdr.seq + len;
814                 hdr.seq = 0;
815                 hdr.ctl = RST | ACK;
816         }
817         print_packet(utcp, "send", &hdr, sizeof hdr);
818         utcp->send(utcp, &hdr, sizeof hdr);
819         return 0;
820
821 }
822
823 int utcp_shutdown(struct utcp_connection *c, int dir) {
824         debug("%p shutdown %d\n", c ? c->utcp : NULL, dir);
825         if(!c) {
826                 errno = EFAULT;
827                 return -1;
828         }
829
830         if(c->reapable) {
831                 debug("Error: shutdown() called on closed connection %p\n", c);
832                 errno = EBADF;
833                 return -1;
834         }
835
836         // TODO: handle dir
837
838         switch(c->state) {
839         case CLOSED:
840                 return 0;
841         case LISTEN:
842         case SYN_SENT:
843                 set_state(c, CLOSED);
844                 return 0;
845
846         case SYN_RECEIVED:
847         case ESTABLISHED:
848                 set_state(c, FIN_WAIT_1);
849                 break;
850         case FIN_WAIT_1:
851         case FIN_WAIT_2:
852                 return 0;
853         case CLOSE_WAIT:
854                 set_state(c, CLOSING);
855                 break;
856
857         case CLOSING:
858         case LAST_ACK:
859         case TIME_WAIT:
860                 return 0;
861         }
862
863         c->snd.last++;
864
865         ack(c, false);
866         return 0;
867 }
868
869 int utcp_close(struct utcp_connection *c) {
870         if(utcp_shutdown(c, SHUT_RDWR))
871                 return -1;
872         c->reapable = true;
873         return 0;
874 }
875
876 int utcp_abort(struct utcp_connection *c) {
877         if(!c) {
878                 errno = EFAULT;
879                 return -1;
880         }
881
882         if(c->reapable) {
883                 debug("Error: abort() called on closed connection %p\n", c);
884                 errno = EBADF;
885                 return -1;
886         }
887
888         c->reapable = true;
889
890         switch(c->state) {
891         case CLOSED:
892                 return 0;
893         case LISTEN:
894         case SYN_SENT:
895         case CLOSING:
896         case LAST_ACK:
897         case TIME_WAIT:
898                 set_state(c, CLOSED);
899                 return 0;
900
901         case SYN_RECEIVED:
902         case ESTABLISHED:
903         case FIN_WAIT_1:
904         case FIN_WAIT_2:
905         case CLOSE_WAIT:
906                 set_state(c, CLOSED);
907                 break;
908         }
909
910         // Send RST
911
912         struct hdr hdr;
913
914         hdr.src = c->src;
915         hdr.dst = c->dst;
916         hdr.seq = c->snd.nxt;
917         hdr.ack = 0;
918         hdr.wnd = 0;
919         hdr.ctl = RST;
920
921         print_packet(c->utcp, "send", &hdr, sizeof hdr);
922         c->utcp->send(c->utcp, &hdr, sizeof hdr);
923         return 0;
924 }
925
926 static void retransmit(struct utcp_connection *c) {
927         if(c->state == CLOSED || c->snd.nxt == c->snd.una)
928                 return;
929
930         struct utcp *utcp = c->utcp;
931
932         struct {
933                 struct hdr hdr;
934                 char data[c->utcp->mtu];
935         } pkt;
936
937         pkt.hdr.src = c->src;
938         pkt.hdr.dst = c->dst;
939
940         switch(c->state) {
941                 case LISTEN:
942                         // TODO: this should not happen
943                         break;
944
945                 case SYN_SENT:
946                         pkt.hdr.seq = c->snd.iss;
947                         pkt.hdr.ack = 0;
948                         pkt.hdr.wnd = c->rcv.wnd;
949                         pkt.hdr.ctl = SYN;
950                         print_packet(c->utcp, "rtrx", &pkt, sizeof pkt.hdr);
951                         utcp->send(utcp, &pkt, sizeof pkt.hdr);
952                         break;
953
954                 case SYN_RECEIVED:
955                         pkt.hdr.seq = c->snd.nxt;
956                         pkt.hdr.ack = c->rcv.nxt;
957                         pkt.hdr.ctl = SYN | ACK;
958                         print_packet(c->utcp, "rtrx", &pkt, sizeof pkt.hdr);
959                         utcp->send(utcp, &pkt, sizeof pkt.hdr);
960                         break;
961
962                 case ESTABLISHED:
963                 case FIN_WAIT_1:
964                         pkt.hdr.seq = c->snd.una;
965                         pkt.hdr.ack = c->rcv.nxt;
966                         pkt.hdr.ctl = ACK;
967                         uint32_t len = seqdiff(c->snd.nxt, c->snd.una);
968                         if(c->state == FIN_WAIT_1)
969                                 len--;
970                         if(len > utcp->mtu)
971                                 len = utcp->mtu;
972                         else {
973                                 if(c->state == FIN_WAIT_1)
974                                         pkt.hdr.ctl |= FIN;
975                         }
976                         memcpy(pkt.data, c->sndbuf, len);
977                         print_packet(c->utcp, "rtrx", &pkt, sizeof pkt.hdr + len);
978                         utcp->send(utcp, &pkt, sizeof pkt.hdr + len);
979                         break;
980
981                 default:
982                         // TODO: implement
983                         abort();
984         }
985 }
986
987 /* Handle timeouts.
988  * One call to this function will loop through all connections,
989  * checking if something needs to be resent or not.
990  * The return value is the time to the next timeout in milliseconds,
991  * or maybe a negative value if the timeout is infinite.
992  */
993 int utcp_timeout(struct utcp *utcp) {
994         struct timeval now;
995         gettimeofday(&now, NULL);
996         struct timeval next = {now.tv_sec + 3600, now.tv_usec};
997
998         for(int i = 0; i < utcp->nconnections; i++) {
999                 struct utcp_connection *c = utcp->connections[i];
1000                 if(!c)
1001                         continue;
1002
1003                 if(c->state == CLOSED) {
1004                         if(c->reapable) {
1005                                 debug("Reaping %p\n", c);
1006                                 free_connection(c);
1007                                 i--;
1008                         }
1009                         continue;
1010                 }
1011
1012                 if(timerisset(&c->conn_timeout) && timercmp(&c->conn_timeout, &now, <)) {
1013                         errno = ETIMEDOUT;
1014                         c->state = CLOSED;
1015                         if(c->recv)
1016                                 c->recv(c, NULL, 0);
1017                         continue;
1018                 }
1019
1020                 if(timerisset(&c->rtrx_timeout) && timercmp(&c->rtrx_timeout, &now, <)) {
1021                         retransmit(c);
1022                 }
1023
1024                 if(timerisset(&c->conn_timeout) && timercmp(&c->conn_timeout, &next, <))
1025                         next = c->conn_timeout;
1026
1027                 if(c->snd.nxt != c->snd.una) {
1028                         c->rtrx_timeout = now;
1029                         c->rtrx_timeout.tv_sec++;
1030                 } else {
1031                         timerclear(&c->rtrx_timeout);
1032                 }
1033
1034                 if(timerisset(&c->rtrx_timeout) && timercmp(&c->rtrx_timeout, &next, <))
1035                         next = c->rtrx_timeout;
1036         }
1037
1038         struct timeval diff;
1039         timersub(&next, &now, &diff);
1040         if(diff.tv_sec < 0)
1041                 return 0;
1042         return diff.tv_sec * 1000 + diff.tv_usec / 1000;
1043 }
1044
1045 struct utcp *utcp_init(utcp_accept_t accept, utcp_pre_accept_t pre_accept, utcp_send_t send, void *priv) {
1046         struct utcp *utcp = calloc(1, sizeof *utcp);
1047         if(!utcp)
1048                 return NULL;
1049
1050         if(!send) {
1051                 errno = EFAULT;
1052                 return NULL;
1053         }
1054
1055         utcp->accept = accept;
1056         utcp->pre_accept = pre_accept;
1057         utcp->send = send;
1058         utcp->priv = priv;
1059         utcp->mtu = 1000;
1060         utcp->timeout = 60;
1061
1062         return utcp;
1063 }
1064
1065 void utcp_exit(struct utcp *utcp) {
1066         if(!utcp)
1067                 return;
1068         for(int i = 0; i < utcp->nconnections; i++) {
1069                 if(!utcp->connections[i]->reapable)
1070                         debug("Warning, freeing unclosed connection %p\n", utcp->connections[i]);
1071                 free(utcp->connections[i]->sndbuf);
1072                 free(utcp->connections[i]);
1073         }
1074         free(utcp->connections);
1075         free(utcp);
1076 }
1077
1078 uint16_t utcp_get_mtu(struct utcp *utcp) {
1079         return utcp->mtu;
1080 }
1081
1082 void utcp_set_mtu(struct utcp *utcp, uint16_t mtu) {
1083         // TODO: handle overhead of the header
1084         utcp->mtu = mtu;
1085 }
1086
1087 int utcp_get_user_timeout(struct utcp *u) {
1088         return u->timeout;
1089 }
1090
1091 void utcp_set_user_timeout(struct utcp *u, int timeout) {
1092         u->timeout = timeout;
1093 }
1094
1095 size_t utcp_get_sndbuf(struct utcp_connection *c) {
1096         return c->maxsndbufsize;
1097 }
1098
1099 void utcp_set_sndbuf(struct utcp_connection *c, size_t size) {
1100         c->maxsndbufsize = size;
1101         if(c->maxsndbufsize != size)
1102                 c->maxsndbufsize = -1;
1103 }
1104
1105 bool utcp_get_nodelay(struct utcp_connection *c) {
1106         return c->nodelay;
1107 }
1108
1109 void utcp_set_nodelay(struct utcp_connection *c, bool nodelay) {
1110         c->nodelay = nodelay;
1111 }
1112
1113 bool utcp_get_keepalive(struct utcp_connection *c) {
1114         return c->keepalive;
1115 }
1116
1117 void utcp_set_keepalive(struct utcp_connection *c, bool keepalive) {
1118         c->keepalive = keepalive;
1119 }
1120
1121 size_t utcp_get_outq(struct utcp_connection *c) {
1122         return seqdiff(c->snd.nxt, c->snd.una);
1123 }