]> git.meshlink.io Git - utcp/blob - utcp.c
Allow putting data in a buffer at an arbitrary offset.
[utcp] / utcp.c
1 /*
2     utcp.c -- Userspace TCP
3     Copyright (C) 2014 Guus Sliepen <guus@tinc-vpn.org>
4
5     This program is free software; you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation; either version 2 of the License, or
8     (at your option) any later version.
9
10     This program is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14
15     You should have received a copy of the GNU General Public License along
16     with this program; if not, write to the Free Software Foundation, Inc.,
17     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #define _GNU_SOURCE
21
22 #include <assert.h>
23 #include <errno.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <stdint.h>
27 #include <stdbool.h>
28 #include <string.h>
29 #include <unistd.h>
30 #include <sys/time.h>
31 #include <sys/socket.h>
32
33 #include "utcp_priv.h"
34
35 #ifndef EBADMSG
36 #define EBADMSG         104
37 #endif
38
39 #ifndef SHUT_RDWR
40 #define SHUT_RDWR 2
41 #endif
42
43 #ifdef poll
44 #undef poll
45 #endif
46
47 #ifndef timersub
48 #define timersub(a, b, r) do {\
49         (r)->tv_sec = (a)->tv_sec - (b)->tv_sec;\
50         (r)->tv_usec = (a)->tv_usec - (b)->tv_usec;\
51         if((r)->tv_usec < 0)\
52                 (r)->tv_sec--, (r)->tv_usec += 1000000;\
53 } while (0)
54 #endif
55
56 #ifdef UTCP_DEBUG
57 #include <stdarg.h>
58
59 static void debug(const char *format, ...) {
60         va_list ap;
61         va_start(ap, format);
62         vfprintf(stderr, format, ap);
63         va_end(ap);
64 }
65
66 static void print_packet(struct utcp *utcp, const char *dir, const void *pkt, size_t len) {
67         struct hdr hdr;
68         if(len < sizeof hdr) {
69                 debug("%p %s: short packet (%zu bytes)\n", utcp, dir, len);
70                 return;
71         }
72
73         memcpy(&hdr, pkt, sizeof hdr);
74         fprintf (stderr, "%p %s: len=%zu, src=%u dst=%u seq=%u ack=%u wnd=%u ctl=", utcp, dir, len, hdr.src, hdr.dst, hdr.seq, hdr.ack, hdr.wnd);
75         if(hdr.ctl & SYN)
76                 debug("SYN");
77         if(hdr.ctl & RST)
78                 debug("RST");
79         if(hdr.ctl & FIN)
80                 debug("FIN");
81         if(hdr.ctl & ACK)
82                 debug("ACK");
83
84         if(len > sizeof hdr) {
85                 debug(" data=");
86                 for(int i = sizeof hdr; i < len; i++) {
87                         const char *data = pkt;
88                         debug("%c", data[i] >= 32 ? data[i] : '.');
89                 }
90         }
91
92         debug("\n");
93 }
94 #else
95 #define debug(...)
96 #define print_packet(...)
97 #endif
98
99 static void set_state(struct utcp_connection *c, enum state state) {
100         c->state = state;
101         if(state == ESTABLISHED)
102                 timerclear(&c->conn_timeout);
103         debug("%p new state: %s\n", c->utcp, strstate[state]);
104 }
105
106 static bool fin_wanted(struct utcp_connection *c, uint32_t seq) {
107         if(seq != c->snd.last)
108                 return false;
109         switch(c->state) {
110         case FIN_WAIT_1:
111         case CLOSING:
112         case LAST_ACK:
113                 return true;
114         default:
115                 return false;
116         }
117 }
118
119 static inline void list_connections(struct utcp *utcp) {
120         debug("%p has %d connections:\n", utcp, utcp->nconnections);
121         for(int i = 0; i < utcp->nconnections; i++)
122                 debug("  %u -> %u state %s\n", utcp->connections[i]->src, utcp->connections[i]->dst, strstate[utcp->connections[i]->state]);
123 }
124
125 static int32_t seqdiff(uint32_t a, uint32_t b) {
126         return a - b;
127 }
128
129 // Buffer functions
130 // TODO: convert to ringbuffers to avoid memmove() operations.
131
132 // Store data into the buffer
133 static ssize_t buffer_put_at(struct buffer *buf, size_t offset, const void *data, size_t len) {
134         if(buf->maxsize <= buf->used)
135                 return 0;
136
137         debug("buffer_put_at %zu %zu %zu\n", buf->used, offset, len);
138
139         size_t required = offset + len;
140         if(required > buf->maxsize) {
141                 if(offset >= buf->maxsize)
142                         return 0;
143                 abort();
144                 len = buf->maxsize - offset;
145                 required = buf->maxsize;
146         }
147
148         if(required > buf->size) {
149                 size_t newsize = buf->size;
150                 if(!newsize) {
151                         newsize = required;
152                 } else {
153                         do {
154                                 newsize *= 2;
155                         } while(newsize < buf->used + len);
156                 }
157                 if(newsize > buf->maxsize)
158                         newsize = buf->maxsize;
159                 char *newdata = realloc(buf->data, newsize);
160                 if(!newdata)
161                         return -1;
162                 buf->data = newdata;
163                 buf->size = newsize;
164         }
165
166         memcpy(buf->data + offset, data, len);
167         if(required > buf->used)
168                 buf->used = required;
169         return len;
170 }
171
172 static ssize_t buffer_put(struct buffer *buf, const void *data, size_t len) {
173         return buffer_put_at(buf, buf->used, data, len);
174 }
175
176 // Get data from the buffer. data can be NULL.
177 static ssize_t buffer_get(struct buffer *buf, void *data, size_t len) {
178         if(len > buf->used)
179                 len = buf->used;
180         if(data)
181                 memcpy(data, buf->data, len);
182         if(len < buf->used)
183                 memmove(buf->data, buf->data + len, buf->used - len);
184         buf->used -= len;
185         return len;
186 }
187
188 // Copy data from the buffer without removing it.
189 static ssize_t buffer_copy(struct buffer *buf, void *data, size_t offset, size_t len) {
190         if(offset >= buf->used)
191                 return 0;
192         if(offset + len > buf->used)
193                 len = buf->used - offset;
194         memcpy(data, buf->data + offset, len);
195         return len;
196 }
197
198 static bool buffer_init(struct buffer *buf, uint32_t len, uint32_t maxlen) {
199         memset(buf, 0, sizeof *buf);
200         buf->data = malloc(len);
201         if(!len)
202                 return false;
203         buf->size = len;
204         buf->maxsize = maxlen;
205         return true;
206 }
207
208 static void buffer_exit(struct buffer *buf) {
209         free(buf->data);
210         memset(buf, 0, sizeof *buf);
211 }
212
213 static uint32_t buffer_free(const struct buffer *buf) {
214         return buf->maxsize - buf->used;
215 }
216
217 // Connections are stored in a sorted list.
218 // This gives O(log(N)) lookup time, O(N log(N)) insertion time and O(N) deletion time.
219
220 static int compare(const void *va, const void *vb) {
221         assert(va && vb);
222
223         const struct utcp_connection *a = *(struct utcp_connection **)va;
224         const struct utcp_connection *b = *(struct utcp_connection **)vb;
225
226         assert(a && b);
227         assert(a->src && b->src);
228
229         int c = (int)a->src - (int)b->src;
230         if(c)
231                 return c;
232         c = (int)a->dst - (int)b->dst;
233         return c;
234 }
235
236 static struct utcp_connection *find_connection(const struct utcp *utcp, uint16_t src, uint16_t dst) {
237         if(!utcp->nconnections)
238                 return NULL;
239         struct utcp_connection key = {
240                 .src = src,
241                 .dst = dst,
242         }, *keyp = &key;
243         struct utcp_connection **match = bsearch(&keyp, utcp->connections, utcp->nconnections, sizeof *utcp->connections, compare);
244         return match ? *match : NULL;
245 }
246
247 static void free_connection(struct utcp_connection *c) {
248         struct utcp *utcp = c->utcp;
249         struct utcp_connection **cp = bsearch(&c, utcp->connections, utcp->nconnections, sizeof *utcp->connections, compare);
250
251         assert(cp);
252
253         int i = cp - utcp->connections;
254         memmove(cp, cp + 1, (utcp->nconnections - i - 1) * sizeof *cp);
255         utcp->nconnections--;
256
257         buffer_exit(&c->sndbuf);
258         free(c);
259 }
260
261 static struct utcp_connection *allocate_connection(struct utcp *utcp, uint16_t src, uint16_t dst) {
262         // Check whether this combination of src and dst is free
263
264         if(src) {
265                 if(find_connection(utcp, src, dst)) {
266                         errno = EADDRINUSE;
267                         return NULL;
268                 }
269         } else { // If src == 0, generate a random port number with the high bit set
270                 if(utcp->nconnections >= 32767) {
271                         errno = ENOMEM;
272                         return NULL;
273                 }
274                 src = rand() | 0x8000;
275                 while(find_connection(utcp, src, dst))
276                         src++;
277         }
278
279         // Allocate memory for the new connection
280
281         if(utcp->nconnections >= utcp->nallocated) {
282                 if(!utcp->nallocated)
283                         utcp->nallocated = 4;
284                 else
285                         utcp->nallocated *= 2;
286                 struct utcp_connection **new_array = realloc(utcp->connections, utcp->nallocated * sizeof *utcp->connections);
287                 if(!new_array)
288                         return NULL;
289                 utcp->connections = new_array;
290         }
291
292         struct utcp_connection *c = calloc(1, sizeof *c);
293         if(!c)
294                 return NULL;
295
296         if(!buffer_init(&c->sndbuf, DEFAULT_SNDBUFSIZE, DEFAULT_MAXSNDBUFSIZE)) {
297                 free(c);
298                 return NULL;
299         }
300
301         // Fill in the details
302
303         c->src = src;
304         c->dst = dst;
305 #ifdef UTCP_DEBUG
306 #warning debugging
307         c->snd.iss = 0;
308 #else
309         c->snd.iss = rand();
310 #endif
311         c->snd.una = c->snd.iss;
312         c->snd.nxt = c->snd.iss + 1;
313         c->rcv.wnd = utcp->mtu;
314         c->snd.last = c->snd.nxt;
315         c->snd.cwnd = utcp->mtu;
316         c->utcp = utcp;
317
318         // Add it to the sorted list of connections
319
320         utcp->connections[utcp->nconnections++] = c;
321         qsort(utcp->connections, utcp->nconnections, sizeof *utcp->connections, compare);
322
323         return c;
324 }
325
326 struct utcp_connection *utcp_connect(struct utcp *utcp, uint16_t dst, utcp_recv_t recv, void *priv) {
327         struct utcp_connection *c = allocate_connection(utcp, 0, dst);
328         if(!c)
329                 return NULL;
330
331         c->recv = recv;
332         c->priv = priv;
333
334         struct hdr hdr;
335
336         hdr.src = c->src;
337         hdr.dst = c->dst;
338         hdr.seq = c->snd.iss;
339         hdr.ack = 0;
340         hdr.wnd = c->rcv.wnd;
341         hdr.ctl = SYN;
342         hdr.aux = 0;
343
344         set_state(c, SYN_SENT);
345
346         print_packet(utcp, "send", &hdr, sizeof hdr);
347         utcp->send(utcp, &hdr, sizeof hdr);
348
349         gettimeofday(&c->conn_timeout, NULL);
350         c->conn_timeout.tv_sec += utcp->timeout;
351
352         return c;
353 }
354
355 void utcp_accept(struct utcp_connection *c, utcp_recv_t recv, void *priv) {
356         if(c->reapable || c->state != SYN_RECEIVED) {
357                 debug("Error: accept() called on invalid connection %p in state %s\n", c, strstate[c->state]);
358                 return;
359         }
360
361         debug("%p accepted, %p %p\n", c, recv, priv);
362         c->recv = recv;
363         c->priv = priv;
364         set_state(c, ESTABLISHED);
365 }
366
367 static void ack(struct utcp_connection *c, bool sendatleastone) {
368         int32_t left = seqdiff(c->snd.last, c->snd.nxt);
369         int32_t cwndleft = c->snd.cwnd - seqdiff(c->snd.nxt, c->snd.una);
370
371         assert(left >= 0);
372
373         if(cwndleft <= 0)
374                 cwndleft = 0;
375
376         if(cwndleft < left)
377                 left = cwndleft;
378
379         if(!left && !sendatleastone)
380                 return;
381
382         struct {
383                 struct hdr hdr;
384                 char data[];
385         } *pkt;
386
387         pkt = malloc(sizeof pkt->hdr + c->utcp->mtu);
388         if(!pkt)
389                 return;
390
391         pkt->hdr.src = c->src;
392         pkt->hdr.dst = c->dst;
393         pkt->hdr.ack = c->rcv.nxt;
394         pkt->hdr.wnd = c->snd.wnd;
395         pkt->hdr.ctl = ACK;
396         pkt->hdr.aux = 0;
397
398         do {
399                 uint32_t seglen = left > c->utcp->mtu ? c->utcp->mtu : left;
400                 pkt->hdr.seq = c->snd.nxt;
401
402                 buffer_copy(&c->sndbuf, pkt->data, seqdiff(c->snd.nxt, c->snd.una), seglen);
403
404                 c->snd.nxt += seglen;
405                 left -= seglen;
406
407                 if(seglen && fin_wanted(c, c->snd.nxt)) {
408                         seglen--;
409                         pkt->hdr.ctl |= FIN;
410                 }
411
412                 print_packet(c->utcp, "send", pkt, sizeof pkt->hdr + seglen);
413                 c->utcp->send(c->utcp, pkt, sizeof pkt->hdr + seglen);
414         } while(left);
415
416         free(pkt);
417 }
418
419 ssize_t utcp_send(struct utcp_connection *c, const void *data, size_t len) {
420         if(c->reapable) {
421                 debug("Error: send() called on closed connection %p\n", c);
422                 errno = EBADF;
423                 return -1;
424         }
425
426         switch(c->state) {
427         case CLOSED:
428         case LISTEN:
429         case SYN_SENT:
430         case SYN_RECEIVED:
431                 debug("Error: send() called on unconnected connection %p\n", c);
432                 errno = ENOTCONN;
433                 return -1;
434         case ESTABLISHED:
435         case CLOSE_WAIT:
436                 break;
437         case FIN_WAIT_1:
438         case FIN_WAIT_2:
439         case CLOSING:
440         case LAST_ACK:
441         case TIME_WAIT:
442                 debug("Error: send() called on closing connection %p\n", c);
443                 errno = EPIPE;
444                 return -1;
445         }
446
447         // Add data to send buffer
448
449         if(!len)
450                 return 0;
451
452         if(!data) {
453                 errno = EFAULT;
454                 return -1;
455         }
456
457         len = buffer_put(&c->sndbuf, data, len);
458         if(len <= 0) {
459                 errno = EWOULDBLOCK;
460                 return 0;
461         }
462
463         c->snd.last += len;
464         ack(c, false);
465         return len;
466 }
467
468 static void swap_ports(struct hdr *hdr) {
469         uint16_t tmp = hdr->src;
470         hdr->src = hdr->dst;
471         hdr->dst = tmp;
472 }
473
474 static void retransmit(struct utcp_connection *c) {
475         if(c->state == CLOSED || c->snd.nxt == c->snd.una)
476                 return;
477
478         struct utcp *utcp = c->utcp;
479
480         struct {
481                 struct hdr hdr;
482                 char data[];
483         } *pkt;
484
485         pkt = malloc(sizeof pkt->hdr + c->utcp->mtu);
486         if(!pkt)
487                 return;
488
489         pkt->hdr.src = c->src;
490         pkt->hdr.dst = c->dst;
491
492         switch(c->state) {
493                 case SYN_SENT:
494                         // Send our SYN again
495                         pkt->hdr.seq = c->snd.iss;
496                         pkt->hdr.ack = 0;
497                         pkt->hdr.wnd = c->rcv.wnd;
498                         pkt->hdr.ctl = SYN;
499                         print_packet(c->utcp, "rtrx", pkt, sizeof pkt->hdr);
500                         utcp->send(utcp, pkt, sizeof pkt->hdr);
501                         break;
502
503                 case SYN_RECEIVED:
504                         // Send SYNACK again
505                         pkt->hdr.seq = c->snd.nxt;
506                         pkt->hdr.ack = c->rcv.nxt;
507                         pkt->hdr.ctl = SYN | ACK;
508                         print_packet(c->utcp, "rtrx", pkt, sizeof pkt->hdr);
509                         utcp->send(utcp, pkt, sizeof pkt->hdr);
510                         break;
511
512                 case ESTABLISHED:
513                 case FIN_WAIT_1:
514                 case CLOSE_WAIT:
515                 case CLOSING:
516                 case LAST_ACK:
517                         // Send unacked data again.
518                         pkt->hdr.seq = c->snd.una;
519                         pkt->hdr.ack = c->rcv.nxt;
520                         pkt->hdr.ctl = ACK;
521                         uint32_t len = seqdiff(c->snd.last, c->snd.una);
522                         if(len > utcp->mtu)
523                                 len = utcp->mtu;
524                         if(fin_wanted(c, c->snd.una + len)) {
525                                 len--;
526                                 pkt->hdr.ctl |= FIN;
527                         }
528                         c->snd.nxt = c->snd.una + len;
529                         buffer_copy(&c->sndbuf, pkt->data, 0, len);
530                         print_packet(c->utcp, "rtrx", pkt, sizeof pkt->hdr + len);
531                         utcp->send(utcp, pkt, sizeof pkt->hdr + len);
532                         break;
533
534                 case CLOSED:
535                 case LISTEN:
536                 case TIME_WAIT:
537                 case FIN_WAIT_2:
538                         // We shouldn't need to retransmit anything in this state.
539 #ifdef UTCP_DEBUG
540                         abort();
541 #endif
542                         timerclear(&c->rtrx_timeout);
543                         break;
544         }
545
546         free(pkt);
547 }
548
549
550 ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) {
551         if(!utcp) {
552                 errno = EFAULT;
553                 return -1;
554         }
555
556         if(!len)
557                 return 0;
558
559         if(!data) {
560                 errno = EFAULT;
561                 return -1;
562         }
563
564         print_packet(utcp, "recv", data, len);
565
566         // Drop packets smaller than the header
567
568         struct hdr hdr;
569         if(len < sizeof hdr) {
570                 errno = EBADMSG;
571                 return -1;
572         }
573
574         // Make a copy from the potentially unaligned data to a struct hdr
575
576         memcpy(&hdr, data, sizeof hdr);
577         data += sizeof hdr;
578         len -= sizeof hdr;
579
580         // Drop packets with an unknown CTL flag
581
582         if(hdr.ctl & ~(SYN | ACK | RST | FIN)) {
583                 errno = EBADMSG;
584                 return -1;
585         }
586
587         // Try to match the packet to an existing connection
588
589         struct utcp_connection *c = find_connection(utcp, hdr.dst, hdr.src);
590
591         // Is it for a new connection?
592
593         if(!c) {
594                 // Ignore RST packets
595
596                 if(hdr.ctl & RST)
597                         return 0;
598
599                 // Is it a SYN packet and are we LISTENing?
600
601                 if(hdr.ctl & SYN && !(hdr.ctl & ACK) && utcp->accept) {
602                         // If we don't want to accept it, send a RST back
603                         if((utcp->pre_accept && !utcp->pre_accept(utcp, hdr.dst))) {
604                                 len = 1;
605                                 goto reset;
606                         }
607
608                         // Try to allocate memory, otherwise send a RST back
609                         c = allocate_connection(utcp, hdr.dst, hdr.src);
610                         if(!c) {
611                                 len = 1;
612                                 goto reset;
613                         }
614
615                         // Return SYN+ACK, go to SYN_RECEIVED state
616                         c->snd.wnd = hdr.wnd;
617                         c->rcv.irs = hdr.seq;
618                         c->rcv.nxt = c->rcv.irs + 1;
619                         set_state(c, SYN_RECEIVED);
620
621                         hdr.dst = c->dst;
622                         hdr.src = c->src;
623                         hdr.ack = c->rcv.irs + 1;
624                         hdr.seq = c->snd.iss;
625                         hdr.ctl = SYN | ACK;
626                         print_packet(c->utcp, "send", &hdr, sizeof hdr);
627                         utcp->send(utcp, &hdr, sizeof hdr);
628                 } else {
629                         // No, we don't want your packets, send a RST back
630                         len = 1;
631                         goto reset;
632                 }
633
634                 return 0;
635         }
636
637         debug("%p state %s\n", c->utcp, strstate[c->state]);
638
639         // In case this is for a CLOSED connection, ignore the packet.
640         // TODO: make it so incoming packets can never match a CLOSED connection.
641
642         if(c->state == CLOSED)
643                 return 0;
644
645         // It is for an existing connection.
646
647         uint32_t prevrcvnxt = c->rcv.nxt;
648
649         // 1. Drop invalid packets.
650
651         // 1a. Drop packets that should not happen in our current state.
652
653         switch(c->state) {
654         case SYN_SENT:
655         case SYN_RECEIVED:
656         case ESTABLISHED:
657         case FIN_WAIT_1:
658         case FIN_WAIT_2:
659         case CLOSE_WAIT:
660         case CLOSING:
661         case LAST_ACK:
662         case TIME_WAIT:
663                 break;
664         default:
665 #ifdef UTCP_DEBUG
666                 abort();
667 #endif
668                 break;
669         }
670
671         // 1b. Drop packets with a sequence number not in our receive window.
672
673         bool acceptable;
674
675         if(c->state == SYN_SENT)
676                 acceptable = true;
677
678         // TODO: handle packets overlapping c->rcv.nxt.
679 #if 0
680         // Only use this when accepting out-of-order packets.
681         else if(len == 0)
682                 if(c->rcv.wnd == 0)
683                         acceptable = hdr.seq == c->rcv.nxt;
684                 else
685                         acceptable = (seqdiff(hdr.seq, c->rcv.nxt) >= 0 && seqdiff(hdr.seq, c->rcv.nxt + c->rcv.wnd) < 0);
686         else
687                 if(c->rcv.wnd == 0)
688                         // We don't accept data when the receive window is zero.
689                         acceptable = false;
690                 else
691                         // Both start and end of packet must be within the receive window
692                         acceptable = (seqdiff(hdr.seq, c->rcv.nxt) >= 0 && seqdiff(hdr.seq, c->rcv.nxt + c->rcv.wnd) < 0)
693                                 || (seqdiff(hdr.seq + len + 1, c->rcv.nxt) >= 0 && seqdiff(hdr.seq + len - 1, c->rcv.nxt + c->rcv.wnd) < 0);
694 #else
695         if(c->state != SYN_SENT)
696                 acceptable = hdr.seq == c->rcv.nxt;
697 #endif
698
699         if(!acceptable) {
700                 debug("Packet not acceptable, %u <= %u + %zu < %u\n", c->rcv.nxt, hdr.seq, len, c->rcv.nxt + c->rcv.wnd);
701                 // Ignore unacceptable RST packets.
702                 if(hdr.ctl & RST)
703                         return 0;
704                 // Otherwise, send an ACK back in the hope things improve.
705                 ack(c, true);
706                 return 0;
707         }
708
709         c->snd.wnd = hdr.wnd; // TODO: move below
710
711         // 1c. Drop packets with an invalid ACK.
712         // ackno should not roll back, and it should also not be bigger than snd.nxt.
713
714         if(hdr.ctl & ACK && (seqdiff(hdr.ack, c->snd.nxt) > 0 || seqdiff(hdr.ack, c->snd.una) < 0)) {
715                 debug("Packet ack seqno out of range, %u %u %u\n", hdr.ack, c->snd.una, c->snd.nxt);
716                 // Ignore unacceptable RST packets.
717                 if(hdr.ctl & RST)
718                         return 0;
719                 goto reset;
720         }
721
722         // 2. Handle RST packets
723
724         if(hdr.ctl & RST) {
725                 switch(c->state) {
726                 case SYN_SENT:
727                         if(!(hdr.ctl & ACK))
728                                 return 0;
729                         // The peer has refused our connection.
730                         set_state(c, CLOSED);
731                         errno = ECONNREFUSED;
732                         if(c->recv)
733                                 c->recv(c, NULL, 0);
734                         return 0;
735                 case SYN_RECEIVED:
736                         if(hdr.ctl & ACK)
737                                 return 0;
738                         // We haven't told the application about this connection yet. Silently delete.
739                         free_connection(c);
740                         return 0;
741                 case ESTABLISHED:
742                 case FIN_WAIT_1:
743                 case FIN_WAIT_2:
744                 case CLOSE_WAIT:
745                         if(hdr.ctl & ACK)
746                                 return 0;
747                         // The peer has aborted our connection.
748                         set_state(c, CLOSED);
749                         errno = ECONNRESET;
750                         if(c->recv)
751                                 c->recv(c, NULL, 0);
752                         return 0;
753                 case CLOSING:
754                 case LAST_ACK:
755                 case TIME_WAIT:
756                         if(hdr.ctl & ACK)
757                                 return 0;
758                         // As far as the application is concerned, the connection has already been closed.
759                         // If it has called utcp_close() already, we can immediately free this connection.
760                         if(c->reapable) {
761                                 free_connection(c);
762                                 return 0;
763                         }
764                         // Otherwise, immediately move to the CLOSED state.
765                         set_state(c, CLOSED);
766                         return 0;
767                 default:
768 #ifdef UTCP_DEBUG
769                         abort();
770 #endif
771                         break;
772                 }
773         }
774
775         // 3. Advance snd.una
776
777         uint32_t advanced = seqdiff(hdr.ack, c->snd.una);
778         prevrcvnxt = c->rcv.nxt;
779
780         if(advanced) {
781                 int32_t data_acked = advanced;
782
783                 switch(c->state) {
784                         case SYN_SENT:
785                         case SYN_RECEIVED:
786                                 data_acked--;
787                                 break;
788                         // TODO: handle FIN as well.
789                         default:
790                                 break;
791                 }
792
793                 assert(data_acked >= 0);
794
795                 int32_t bufused = seqdiff(c->snd.last, c->snd.una);
796                 assert(data_acked <= bufused);
797
798                 if(data_acked)
799                         buffer_get(&c->sndbuf, NULL, data_acked);
800
801                 c->snd.una = hdr.ack;
802
803                 c->dupack = 0;
804                 c->snd.cwnd += utcp->mtu;
805                 if(c->snd.cwnd > c->sndbuf.maxsize)
806                         c->snd.cwnd = c->sndbuf.maxsize;
807
808                 // Check if we have sent a FIN that is now ACKed.
809                 switch(c->state) {
810                 case FIN_WAIT_1:
811                         if(c->snd.una == c->snd.last)
812                                 set_state(c, FIN_WAIT_2);
813                         break;
814                 case CLOSING:
815                         if(c->snd.una == c->snd.last) {
816                                 gettimeofday(&c->conn_timeout, NULL);
817                                 c->conn_timeout.tv_sec += 60;
818                                 set_state(c, TIME_WAIT);
819                         }
820                         break;
821                 default:
822                         break;
823                 }
824         } else {
825                 if(!len) {
826                         c->dupack++;
827                         if(c->dupack == 3) {
828                                 debug("Triplicate ACK\n");
829                                 //TODO: Resend one packet and go to fast recovery mode. See RFC 6582.
830                                 //We do a very simple variant here; reset the nxt pointer to the last acknowledged packet from the peer.
831                                 //This will cause us to start retransmitting, but at the same speed as the incoming ACKs arrive,
832                                 //thus preventing a drop in speed.
833                                 c->snd.nxt = c->snd.una;
834                         }
835                 }
836         }
837
838         // 4. Update timers
839
840         if(advanced) {
841                 timerclear(&c->conn_timeout); // It will be set anew in utcp_timeout() if c->snd.una != c->snd.nxt.
842                 if(c->snd.una == c->snd.nxt)
843                         timerclear(&c->rtrx_timeout);
844         }
845
846         // 5. Process SYN stuff
847
848         if(hdr.ctl & SYN) {
849                 switch(c->state) {
850                 case SYN_SENT:
851                         // This is a SYNACK. It should always have ACKed the SYN.
852                         if(!advanced)
853                                 goto reset;
854                         c->rcv.irs = hdr.seq;
855                         c->rcv.nxt = hdr.seq;
856                         set_state(c, ESTABLISHED);
857                         // TODO: notify application of this somehow.
858                         break;
859                 case SYN_RECEIVED:
860                 case ESTABLISHED:
861                 case FIN_WAIT_1:
862                 case FIN_WAIT_2:
863                 case CLOSE_WAIT:
864                 case CLOSING:
865                 case LAST_ACK:
866                 case TIME_WAIT:
867                         // Ehm, no. We should never receive a second SYN.
868                         goto reset;
869                 default:
870 #ifdef UTCP_DEBUG
871                         abort();
872 #endif
873                         return 0;
874                 }
875
876                 // SYN counts as one sequence number
877                 c->rcv.nxt++;
878         }
879
880         // 6. Process new data
881
882         if(c->state == SYN_RECEIVED) {
883                 // This is the ACK after the SYNACK. It should always have ACKed the SYNACK.
884                 if(!advanced)
885                         goto reset;
886
887                 // Are we still LISTENing?
888                 if(utcp->accept)
889                         utcp->accept(c, c->src);
890
891                 if(c->state != ESTABLISHED) {
892                         set_state(c, CLOSED);
893                         c->reapable = true;
894                         goto reset;
895                 }
896         }
897
898         if(len) {
899                 switch(c->state) {
900                 case SYN_SENT:
901                 case SYN_RECEIVED:
902                         // This should never happen.
903 #ifdef UTCP_DEBUG
904                         abort();
905 #endif
906                         return 0;
907                 case ESTABLISHED:
908                 case FIN_WAIT_1:
909                 case FIN_WAIT_2:
910                         break;
911                 case CLOSE_WAIT:
912                 case CLOSING:
913                 case LAST_ACK:
914                 case TIME_WAIT:
915                         // Ehm no, We should never receive more data after a FIN.
916                         goto reset;
917                 default:
918 #ifdef UTCP_DEBUG
919                         abort();
920 #endif
921                         return 0;
922                 }
923
924                 ssize_t rxd;
925
926                 if(c->recv) {
927                         rxd = c->recv(c, data, len);
928                         if(rxd != len) {
929                                 // TODO: once we have a receive buffer, handle the application not accepting all data.
930                                 abort();
931                         }
932                         if(rxd < 0)
933                                 rxd = 0;
934                         else if(rxd > len)
935                                 rxd = len; // Bad application, bad!
936                 } else {
937                         rxd = len;
938                 }
939
940                 c->rcv.nxt += len;
941         }
942
943         // 7. Process FIN stuff
944
945         if(hdr.ctl & FIN) {
946                 switch(c->state) {
947                 case SYN_SENT:
948                 case SYN_RECEIVED:
949                         // This should never happen.
950 #ifdef UTCP_DEBUG
951                         abort();
952 #endif
953                         break;
954                 case ESTABLISHED:
955                         set_state(c, CLOSE_WAIT);
956                         break;
957                 case FIN_WAIT_1:
958                         set_state(c, CLOSING);
959                         break;
960                 case FIN_WAIT_2:
961                         gettimeofday(&c->conn_timeout, NULL);
962                         c->conn_timeout.tv_sec += 60;
963                         set_state(c, TIME_WAIT);
964                         break;
965                 case CLOSE_WAIT:
966                 case CLOSING:
967                 case LAST_ACK:
968                 case TIME_WAIT:
969                         // Ehm, no. We should never receive a second FIN.
970                         goto reset;
971                 default:
972 #ifdef UTCP_DEBUG
973                         abort();
974 #endif
975                         break;
976                 }
977
978                 // FIN counts as one sequence number
979                 c->rcv.nxt++;
980                 len++;
981
982                 // Inform the application that the peer closed the connection.
983                 if(c->recv) {
984                         errno = 0;
985                         c->recv(c, NULL, 0);
986                 }
987         }
988
989         // Now we send something back if:
990         // - we advanced rcv.nxt (ie, we got some data that needs to be ACKed)
991         //   -> sendatleastone = true
992         // - or we got an ack, so we should maybe send a bit more data
993         //   -> sendatleastone = false
994
995 ack:
996         ack(c, prevrcvnxt != c->rcv.nxt);
997         return 0;
998
999 reset:
1000         swap_ports(&hdr);
1001         hdr.wnd = 0;
1002         if(hdr.ctl & ACK) {
1003                 hdr.seq = hdr.ack;
1004                 hdr.ctl = RST;
1005         } else {
1006                 hdr.ack = hdr.seq + len;
1007                 hdr.seq = 0;
1008                 hdr.ctl = RST | ACK;
1009         }
1010         print_packet(utcp, "send", &hdr, sizeof hdr);
1011         utcp->send(utcp, &hdr, sizeof hdr);
1012         return 0;
1013
1014 }
1015
1016 int utcp_shutdown(struct utcp_connection *c, int dir) {
1017         debug("%p shutdown %d at %u\n", c ? c->utcp : NULL, dir, c ? c->snd.last : 0);
1018         if(!c) {
1019                 errno = EFAULT;
1020                 return -1;
1021         }
1022
1023         if(c->reapable) {
1024                 debug("Error: shutdown() called on closed connection %p\n", c);
1025                 errno = EBADF;
1026                 return -1;
1027         }
1028
1029         if(!(dir == UTCP_SHUT_RD || dir == UTCP_SHUT_WR || dir == UTCP_SHUT_RDWR)) {
1030                 errno = EINVAL;
1031                 return -1;
1032         }
1033
1034         // TCP does not have a provision for stopping incoming packets.
1035         // The best we can do is to just ignore them.
1036         if(dir == UTCP_SHUT_RD || dir == UTCP_SHUT_RDWR)
1037                 c->recv = NULL;
1038
1039         // The rest of the code deals with shutting down writes.
1040         if(dir == UTCP_SHUT_RD)
1041                 return 0;
1042
1043         switch(c->state) {
1044         case CLOSED:
1045         case LISTEN:
1046                 errno = ENOTCONN;
1047                 return -1;
1048
1049         case SYN_SENT:
1050                 set_state(c, CLOSED);
1051                 return 0;
1052
1053         case SYN_RECEIVED:
1054         case ESTABLISHED:
1055                 set_state(c, FIN_WAIT_1);
1056                 break;
1057         case FIN_WAIT_1:
1058         case FIN_WAIT_2:
1059                 return 0;
1060         case CLOSE_WAIT:
1061                 set_state(c, CLOSING);
1062                 break;
1063
1064         case CLOSING:
1065         case LAST_ACK:
1066         case TIME_WAIT:
1067                 return 0;
1068         }
1069
1070         c->snd.last++;
1071
1072         ack(c, false);
1073         return 0;
1074 }
1075
1076 int utcp_close(struct utcp_connection *c) {
1077         if(utcp_shutdown(c, SHUT_RDWR))
1078                 return -1;
1079         c->recv = NULL;
1080         c->poll = NULL;
1081         c->reapable = true;
1082         return 0;
1083 }
1084
1085 int utcp_abort(struct utcp_connection *c) {
1086         if(!c) {
1087                 errno = EFAULT;
1088                 return -1;
1089         }
1090
1091         if(c->reapable) {
1092                 debug("Error: abort() called on closed connection %p\n", c);
1093                 errno = EBADF;
1094                 return -1;
1095         }
1096
1097         c->recv = NULL;
1098         c->poll = NULL;
1099         c->reapable = true;
1100
1101         switch(c->state) {
1102         case CLOSED:
1103                 return 0;
1104         case LISTEN:
1105         case SYN_SENT:
1106         case CLOSING:
1107         case LAST_ACK:
1108         case TIME_WAIT:
1109                 set_state(c, CLOSED);
1110                 return 0;
1111
1112         case SYN_RECEIVED:
1113         case ESTABLISHED:
1114         case FIN_WAIT_1:
1115         case FIN_WAIT_2:
1116         case CLOSE_WAIT:
1117                 set_state(c, CLOSED);
1118                 break;
1119         }
1120
1121         // Send RST
1122
1123         struct hdr hdr;
1124
1125         hdr.src = c->src;
1126         hdr.dst = c->dst;
1127         hdr.seq = c->snd.nxt;
1128         hdr.ack = 0;
1129         hdr.wnd = 0;
1130         hdr.ctl = RST;
1131
1132         print_packet(c->utcp, "send", &hdr, sizeof hdr);
1133         c->utcp->send(c->utcp, &hdr, sizeof hdr);
1134         return 0;
1135 }
1136
1137 /* Handle timeouts.
1138  * One call to this function will loop through all connections,
1139  * checking if something needs to be resent or not.
1140  * The return value is the time to the next timeout in milliseconds,
1141  * or maybe a negative value if the timeout is infinite.
1142  */
1143 struct timeval utcp_timeout(struct utcp *utcp) {
1144         struct timeval now;
1145         gettimeofday(&now, NULL);
1146         struct timeval next = {now.tv_sec + 3600, now.tv_usec};
1147
1148         for(int i = 0; i < utcp->nconnections; i++) {
1149                 struct utcp_connection *c = utcp->connections[i];
1150                 if(!c)
1151                         continue;
1152
1153                 if(c->state == CLOSED) {
1154                         if(c->reapable) {
1155                                 debug("Reaping %p\n", c);
1156                                 free_connection(c);
1157                                 i--;
1158                         }
1159                         continue;
1160                 }
1161
1162                 if(timerisset(&c->conn_timeout) && timercmp(&c->conn_timeout, &now, <)) {
1163                         errno = ETIMEDOUT;
1164                         c->state = CLOSED;
1165                         if(c->recv)
1166                                 c->recv(c, NULL, 0);
1167                         continue;
1168                 }
1169
1170                 if(timerisset(&c->rtrx_timeout) && timercmp(&c->rtrx_timeout, &now, <)) {
1171                         retransmit(c);
1172                 }
1173
1174                 if(c->poll && buffer_free(&c->sndbuf) && (c->state == ESTABLISHED || c->state == CLOSE_WAIT))
1175                         c->poll(c, buffer_free(&c->sndbuf));
1176
1177                 if(timerisset(&c->conn_timeout) && timercmp(&c->conn_timeout, &next, <))
1178                         next = c->conn_timeout;
1179
1180                 if(c->snd.nxt != c->snd.una) {
1181                         c->rtrx_timeout = now;
1182                         c->rtrx_timeout.tv_sec++;
1183                 } else {
1184                         timerclear(&c->rtrx_timeout);
1185                 }
1186
1187                 if(timerisset(&c->rtrx_timeout) && timercmp(&c->rtrx_timeout, &next, <))
1188                         next = c->rtrx_timeout;
1189         }
1190
1191         struct timeval diff;
1192         timersub(&next, &now, &diff);
1193         return diff;
1194 }
1195
1196 bool utcp_is_active(struct utcp *utcp) {
1197         if(!utcp)
1198                 return false;
1199
1200         for(int i = 0; i < utcp->nconnections; i++)
1201                 if(utcp->connections[i]->state != CLOSED && utcp->connections[i]->state != TIME_WAIT)
1202                         return true;
1203
1204         return false;
1205 }
1206
1207 struct utcp *utcp_init(utcp_accept_t accept, utcp_pre_accept_t pre_accept, utcp_send_t send, void *priv) {
1208         struct utcp *utcp = calloc(1, sizeof *utcp);
1209         if(!utcp)
1210                 return NULL;
1211
1212         if(!send) {
1213                 errno = EFAULT;
1214                 return NULL;
1215         }
1216
1217         utcp->accept = accept;
1218         utcp->pre_accept = pre_accept;
1219         utcp->send = send;
1220         utcp->priv = priv;
1221         utcp->mtu = 1000;
1222         utcp->timeout = 60;
1223
1224         return utcp;
1225 }
1226
1227 void utcp_exit(struct utcp *utcp) {
1228         if(!utcp)
1229                 return;
1230         for(int i = 0; i < utcp->nconnections; i++) {
1231                 if(!utcp->connections[i]->reapable)
1232                         debug("Warning, freeing unclosed connection %p\n", utcp->connections[i]);
1233                 buffer_exit(&utcp->connections[i]->sndbuf);
1234                 free(utcp->connections[i]);
1235         }
1236         free(utcp->connections);
1237         free(utcp);
1238 }
1239
1240 uint16_t utcp_get_mtu(struct utcp *utcp) {
1241         return utcp ? utcp->mtu : 0;
1242 }
1243
1244 void utcp_set_mtu(struct utcp *utcp, uint16_t mtu) {
1245         // TODO: handle overhead of the header
1246         if(utcp)
1247                 utcp->mtu = mtu;
1248 }
1249
1250 int utcp_get_user_timeout(struct utcp *u) {
1251         return u ? u->timeout : 0;
1252 }
1253
1254 void utcp_set_user_timeout(struct utcp *u, int timeout) {
1255         if(u)
1256                 u->timeout = timeout;
1257 }
1258
1259 size_t utcp_get_sndbuf(struct utcp_connection *c) {
1260         return c ? c->sndbuf.maxsize : 0;
1261 }
1262
1263 size_t utcp_get_sndbuf_free(struct utcp_connection *c) {
1264         if(c && (c->state == ESTABLISHED || c->state == CLOSE_WAIT))
1265                 return buffer_free(&c->sndbuf);
1266         else
1267                 return 0;
1268 }
1269
1270 void utcp_set_sndbuf(struct utcp_connection *c, size_t size) {
1271         if(!c)
1272                 return;
1273         c->sndbuf.maxsize = size;
1274         if(c->sndbuf.maxsize != size)
1275                 c->sndbuf.maxsize = -1;
1276 }
1277
1278 bool utcp_get_nodelay(struct utcp_connection *c) {
1279         return c ? c->nodelay : false;
1280 }
1281
1282 void utcp_set_nodelay(struct utcp_connection *c, bool nodelay) {
1283         if(c)
1284                 c->nodelay = nodelay;
1285 }
1286
1287 bool utcp_get_keepalive(struct utcp_connection *c) {
1288         return c ? c->keepalive : false;
1289 }
1290
1291 void utcp_set_keepalive(struct utcp_connection *c, bool keepalive) {
1292         if(c)
1293                 c->keepalive = keepalive;
1294 }
1295
1296 size_t utcp_get_outq(struct utcp_connection *c) {
1297         return c ? seqdiff(c->snd.nxt, c->snd.una) : 0;
1298 }
1299
1300 void utcp_set_recv_cb(struct utcp_connection *c, utcp_recv_t recv) {
1301         if(c)
1302                 c->recv = recv;
1303 }
1304
1305 void utcp_set_poll_cb(struct utcp_connection *c, utcp_poll_t poll) {
1306         if(c)
1307                 c->poll = poll;
1308 }
1309
1310 void utcp_set_accept_cb(struct utcp *utcp, utcp_accept_t accept, utcp_pre_accept_t pre_accept) {
1311         if(utcp) {
1312                 utcp->accept = accept;
1313                 utcp->pre_accept = pre_accept;
1314         }
1315 }