]> git.meshlink.io Git - utcp/blob - utcp.c
Fix warnings when compiling with -Wall -W.
[utcp] / utcp.c
1 /*
2     utcp.c -- Userspace TCP
3     Copyright (C) 2014-2017 Guus Sliepen <guus@tinc-vpn.org>
4
5     This program is free software; you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation; either version 2 of the License, or
8     (at your option) any later version.
9
10     This program is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14
15     You should have received a copy of the GNU General Public License along
16     with this program; if not, write to the Free Software Foundation, Inc.,
17     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #define _GNU_SOURCE
21
22 #include <assert.h>
23 #include <errno.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <stdint.h>
27 #include <stdbool.h>
28 #include <string.h>
29 #include <unistd.h>
30 #include <sys/time.h>
31 #include <sys/socket.h>
32
33 #include "utcp_priv.h"
34
35 #ifndef EBADMSG
36 #define EBADMSG         104
37 #endif
38
39 #ifndef SHUT_RDWR
40 #define SHUT_RDWR 2
41 #endif
42
43 #ifdef poll
44 #undef poll
45 #endif
46
47 #ifndef timersub
48 #define timersub(a, b, r) do {\
49         (r)->tv_sec = (a)->tv_sec - (b)->tv_sec;\
50         (r)->tv_usec = (a)->tv_usec - (b)->tv_usec;\
51         if((r)->tv_usec < 0)\
52                 (r)->tv_sec--, (r)->tv_usec += USEC_PER_SEC;\
53 } while (0)
54 #endif
55
56 static inline size_t max(size_t a, size_t b) {
57         return a > b ? a : b;
58 }
59
60 #ifdef UTCP_DEBUG
61 #include <stdarg.h>
62
63 static void debug(const char *format, ...) {
64         va_list ap;
65         va_start(ap, format);
66         vfprintf(stderr, format, ap);
67         va_end(ap);
68 }
69
70 static void print_packet(struct utcp *utcp, const char *dir, const void *pkt, size_t len) {
71         struct hdr hdr;
72         if(len < sizeof hdr) {
73                 debug("%p %s: short packet (%lu bytes)\n", utcp, dir, (unsigned long)len);
74                 return;
75         }
76
77         memcpy(&hdr, pkt, sizeof hdr);
78         debug("%p %s: len=%lu, src=%u dst=%u seq=%u ack=%u wnd=%u aux=%x ctl=", utcp, dir, (unsigned long)len, hdr.src, hdr.dst, hdr.seq, hdr.ack, hdr.wnd, hdr.aux);
79         if(hdr.ctl & SYN)
80                 debug("SYN");
81         if(hdr.ctl & RST)
82                 debug("RST");
83         if(hdr.ctl & FIN)
84                 debug("FIN");
85         if(hdr.ctl & ACK)
86                 debug("ACK");
87
88         if(len > sizeof hdr) {
89                 uint32_t datalen = len - sizeof hdr;
90                 const uint8_t *data = (uint8_t *)pkt + sizeof hdr;
91                 char str[datalen * 2 + 1];
92                 char *p = str;
93
94                 for(uint32_t i = 0; i < datalen; i++) {
95                         *p++ = "0123456789ABCDEF"[data[i] >> 4];
96                         *p++ = "0123456789ABCDEF"[data[i] & 15];
97                 }
98                 *p = 0;
99
100                 debug(" data=%s", str);
101         }
102
103         debug("\n");
104 }
105 #else
106 #define debug(...) do {} while(0)
107 #define print_packet(...) do {} while(0)
108 #endif
109
110 static void set_state(struct utcp_connection *c, enum state state) {
111         c->state = state;
112         if(state == ESTABLISHED)
113                 timerclear(&c->conn_timeout);
114         debug("%p new state: %s\n", c->utcp, strstate[state]);
115 }
116
117 static bool fin_wanted(struct utcp_connection *c, uint32_t seq) {
118         if(seq != c->snd.last)
119                 return false;
120         switch(c->state) {
121         case FIN_WAIT_1:
122         case CLOSING:
123         case LAST_ACK:
124                 return true;
125         default:
126                 return false;
127         }
128 }
129
130 static bool is_reliable(struct utcp_connection *c) {
131         return c->flags & UTCP_RELIABLE;
132 }
133
134 static int32_t seqdiff(uint32_t a, uint32_t b) {
135         return a - b;
136 }
137
138 // Buffer functions
139 // TODO: convert to ringbuffers to avoid memmove() operations.
140
141 // Store data into the buffer
142 static ssize_t buffer_put_at(struct buffer *buf, size_t offset, const void *data, size_t len) {
143         debug("buffer_put_at %lu %lu %lu\n", (unsigned long)buf->used, (unsigned long)offset, (unsigned long)len);
144
145         size_t required = offset + len;
146         if(required > buf->maxsize) {
147                 if(offset >= buf->maxsize)
148                         return 0;
149                 len = buf->maxsize - offset;
150                 required = buf->maxsize;
151         }
152
153         if(required > buf->size) {
154                 size_t newsize = buf->size;
155                 if(!newsize) {
156                         newsize = required;
157                 } else {
158                         do {
159                                 newsize *= 2;
160                         } while(newsize < required);
161                 }
162                 if(newsize > buf->maxsize)
163                         newsize = buf->maxsize;
164                 char *newdata = realloc(buf->data, newsize);
165                 if(!newdata)
166                         return -1;
167                 buf->data = newdata;
168                 buf->size = newsize;
169         }
170
171         memcpy(buf->data + offset, data, len);
172         if(required > buf->used)
173                 buf->used = required;
174         return len;
175 }
176
177 static ssize_t buffer_put(struct buffer *buf, const void *data, size_t len) {
178         return buffer_put_at(buf, buf->used, data, len);
179 }
180
181 // Get data from the buffer. data can be NULL.
182 static ssize_t buffer_get(struct buffer *buf, void *data, size_t len) {
183         if(len > buf->used)
184                 len = buf->used;
185         if(data)
186                 memcpy(data, buf->data, len);
187         if(len < buf->used)
188                 memmove(buf->data, buf->data + len, buf->used - len);
189         buf->used -= len;
190         return len;
191 }
192
193 // Copy data from the buffer without removing it.
194 static ssize_t buffer_copy(struct buffer *buf, void *data, size_t offset, size_t len) {
195         if(offset >= buf->used)
196                 return 0;
197         if(offset + len > buf->used)
198                 len = buf->used - offset;
199         memcpy(data, buf->data + offset, len);
200         return len;
201 }
202
203 static bool buffer_init(struct buffer *buf, uint32_t len, uint32_t maxlen) {
204         memset(buf, 0, sizeof *buf);
205         if(len) {
206                 buf->data = malloc(len);
207                 if(!buf->data)
208                         return false;
209         }
210         buf->size = len;
211         buf->maxsize = maxlen;
212         return true;
213 }
214
215 static void buffer_exit(struct buffer *buf) {
216         free(buf->data);
217         memset(buf, 0, sizeof *buf);
218 }
219
220 static uint32_t buffer_free(const struct buffer *buf) {
221         return buf->maxsize - buf->used;
222 }
223
224 // Connections are stored in a sorted list.
225 // This gives O(log(N)) lookup time, O(N log(N)) insertion time and O(N) deletion time.
226
227 static int compare(const void *va, const void *vb) {
228         assert(va && vb);
229
230         const struct utcp_connection *a = *(struct utcp_connection **)va;
231         const struct utcp_connection *b = *(struct utcp_connection **)vb;
232
233         assert(a && b);
234         assert(a->src && b->src);
235
236         int c = (int)a->src - (int)b->src;
237         if(c)
238                 return c;
239         c = (int)a->dst - (int)b->dst;
240         return c;
241 }
242
243 static struct utcp_connection *find_connection(const struct utcp *utcp, uint16_t src, uint16_t dst) {
244         if(!utcp->nconnections)
245                 return NULL;
246         struct utcp_connection key = {
247                 .src = src,
248                 .dst = dst,
249         }, *keyp = &key;
250         struct utcp_connection **match = bsearch(&keyp, utcp->connections, utcp->nconnections, sizeof *utcp->connections, compare);
251         return match ? *match : NULL;
252 }
253
254 static void free_connection(struct utcp_connection *c) {
255         struct utcp *utcp = c->utcp;
256         struct utcp_connection **cp = bsearch(&c, utcp->connections, utcp->nconnections, sizeof *utcp->connections, compare);
257
258         assert(cp);
259
260         int i = cp - utcp->connections;
261         memmove(cp, cp + 1, (utcp->nconnections - i - 1) * sizeof *cp);
262         utcp->nconnections--;
263
264         buffer_exit(&c->rcvbuf);
265         buffer_exit(&c->sndbuf);
266         free(c);
267 }
268
269 static struct utcp_connection *allocate_connection(struct utcp *utcp, uint16_t src, uint16_t dst) {
270         // Check whether this combination of src and dst is free
271
272         if(src) {
273                 if(find_connection(utcp, src, dst)) {
274                         errno = EADDRINUSE;
275                         return NULL;
276                 }
277         } else { // If src == 0, generate a random port number with the high bit set
278                 if(utcp->nconnections >= 32767) {
279                         errno = ENOMEM;
280                         return NULL;
281                 }
282                 src = rand() | 0x8000;
283                 while(find_connection(utcp, src, dst))
284                         src++;
285         }
286
287         // Allocate memory for the new connection
288
289         if(utcp->nconnections >= utcp->nallocated) {
290                 if(!utcp->nallocated)
291                         utcp->nallocated = 4;
292                 else
293                         utcp->nallocated *= 2;
294                 struct utcp_connection **new_array = realloc(utcp->connections, utcp->nallocated * sizeof *utcp->connections);
295                 if(!new_array)
296                         return NULL;
297                 utcp->connections = new_array;
298         }
299
300         struct utcp_connection *c = calloc(1, sizeof *c);
301         if(!c)
302                 return NULL;
303
304         if(!buffer_init(&c->sndbuf, DEFAULT_SNDBUFSIZE, DEFAULT_MAXSNDBUFSIZE)) {
305                 free(c);
306                 return NULL;
307         }
308
309         if(!buffer_init(&c->rcvbuf, DEFAULT_RCVBUFSIZE, DEFAULT_MAXRCVBUFSIZE)) {
310                 buffer_exit(&c->sndbuf);
311                 free(c);
312                 return NULL;
313         }
314
315         // Fill in the details
316
317         c->src = src;
318         c->dst = dst;
319 #ifdef UTCP_DEBUG
320         c->snd.iss = 0;
321 #else
322         c->snd.iss = rand();
323 #endif
324         c->snd.una = c->snd.iss;
325         c->snd.nxt = c->snd.iss + 1;
326         c->rcv.wnd = utcp->mtu;
327         c->snd.last = c->snd.nxt;
328         c->snd.cwnd = utcp->mtu;
329         c->utcp = utcp;
330
331         // Add it to the sorted list of connections
332
333         utcp->connections[utcp->nconnections++] = c;
334         qsort(utcp->connections, utcp->nconnections, sizeof *utcp->connections, compare);
335
336         return c;
337 }
338
339 static inline uint32_t absdiff(uint32_t a, uint32_t b) {
340         if(a > b)
341                 return a - b;
342         else
343                 return b - a;
344 }
345
346 // Update RTT variables. See RFC 6298.
347 static void update_rtt(struct utcp_connection *c, uint32_t rtt) {
348         if(!rtt) {
349                 debug("invalid rtt\n");
350                 return;
351         }
352
353         struct utcp *utcp = c->utcp;
354
355         if(!utcp->srtt) {
356                 utcp->srtt = rtt;
357                 utcp->rttvar = rtt / 2;
358                 utcp->rto = rtt + max(2 * rtt, CLOCK_GRANULARITY);
359         } else {
360                 utcp->rttvar = (utcp->rttvar * 3 + absdiff(utcp->srtt, rtt)) / 4;
361                 utcp->srtt = (utcp->srtt * 7 + rtt) / 8;
362                 utcp->rto = utcp->srtt + max(utcp->rttvar, CLOCK_GRANULARITY);
363         }
364
365         if(utcp->rto > MAX_RTO)
366                 utcp->rto = MAX_RTO;
367
368         debug("rtt %u srtt %u rttvar %u rto %u\n", rtt, utcp->srtt, utcp->rttvar, utcp->rto);
369 }
370
371 static void start_retransmit_timer(struct utcp_connection *c) {
372         gettimeofday(&c->rtrx_timeout, NULL);
373         c->rtrx_timeout.tv_usec += c->utcp->rto;
374         while(c->rtrx_timeout.tv_usec >= 1000000) {
375                 c->rtrx_timeout.tv_usec -= 1000000;
376                 c->rtrx_timeout.tv_sec++;
377         }
378         debug("timeout set to %lu.%06lu (%u)\n", c->rtrx_timeout.tv_sec, c->rtrx_timeout.tv_usec, c->utcp->rto);
379 }
380
381 static void stop_retransmit_timer(struct utcp_connection *c) {
382         timerclear(&c->rtrx_timeout);
383         debug("timeout cleared\n");
384 }
385
386 struct utcp_connection *utcp_connect_ex(struct utcp *utcp, uint16_t dst, utcp_recv_t recv, void *priv, uint32_t flags) {
387         struct utcp_connection *c = allocate_connection(utcp, 0, dst);
388         if(!c)
389                 return NULL;
390
391         assert((flags & ~0xf) == 0);
392
393         c->flags = flags;
394         c->recv = recv;
395         c->priv = priv;
396
397         struct {
398                 struct hdr hdr;
399                 uint8_t init[4];
400         } pkt;
401
402         pkt.hdr.src = c->src;
403         pkt.hdr.dst = c->dst;
404         pkt.hdr.seq = c->snd.iss;
405         pkt.hdr.ack = 0;
406         pkt.hdr.wnd = c->rcv.wnd;
407         pkt.hdr.ctl = SYN;
408         pkt.hdr.aux = 0x0101;
409         pkt.init[0] = 1;
410         pkt.init[1] = 0;
411         pkt.init[2] = 0;
412         pkt.init[3] = flags & 0x7;
413
414         set_state(c, SYN_SENT);
415
416         print_packet(utcp, "send", &pkt, sizeof pkt);
417         utcp->send(utcp, &pkt, sizeof pkt);
418
419         gettimeofday(&c->conn_timeout, NULL);
420         c->conn_timeout.tv_sec += utcp->timeout;
421
422         start_retransmit_timer(c);
423
424         return c;
425 }
426
427 struct utcp_connection *utcp_connect(struct utcp *utcp, uint16_t dst, utcp_recv_t recv, void *priv) {
428         return utcp_connect_ex(utcp, dst, recv, priv, UTCP_TCP);
429 }
430
431 void utcp_accept(struct utcp_connection *c, utcp_recv_t recv, void *priv) {
432         if(c->reapable || c->state != SYN_RECEIVED) {
433                 debug("Error: accept() called on invalid connection %p in state %s\n", c, strstate[c->state]);
434                 return;
435         }
436
437         debug("%p accepted, %p %p\n", c, recv, priv);
438         c->recv = recv;
439         c->priv = priv;
440         set_state(c, ESTABLISHED);
441 }
442
443 static void ack(struct utcp_connection *c, bool sendatleastone) {
444         int32_t left = seqdiff(c->snd.last, c->snd.nxt);
445         int32_t cwndleft = c->snd.cwnd - seqdiff(c->snd.nxt, c->snd.una);
446         debug("cwndleft = %d\n", cwndleft);
447
448         assert(left >= 0);
449
450         if(cwndleft <= 0)
451                 cwndleft = 0;
452
453         if(cwndleft < left)
454                 left = cwndleft;
455
456         if(!left && !sendatleastone)
457                 return;
458
459         struct {
460                 struct hdr hdr;
461                 uint8_t data[];
462         } *pkt;
463
464         pkt = malloc(sizeof pkt->hdr + c->utcp->mtu);
465         if(!pkt)
466                 return;
467
468         pkt->hdr.src = c->src;
469         pkt->hdr.dst = c->dst;
470         pkt->hdr.ack = c->rcv.nxt;
471         pkt->hdr.wnd = c->snd.wnd;
472         pkt->hdr.ctl = ACK;
473         pkt->hdr.aux = 0;
474
475         do {
476                 uint32_t seglen = left > c->utcp->mtu ? c->utcp->mtu : left;
477                 pkt->hdr.seq = c->snd.nxt;
478
479                 buffer_copy(&c->sndbuf, pkt->data, seqdiff(c->snd.nxt, c->snd.una), seglen);
480
481                 c->snd.nxt += seglen;
482                 left -= seglen;
483
484                 if(seglen && fin_wanted(c, c->snd.nxt)) {
485                         seglen--;
486                         pkt->hdr.ctl |= FIN;
487                 }
488
489                 if(!c->rtt_start.tv_sec) {
490                         // Start RTT measurement
491                         gettimeofday(&c->rtt_start, NULL);
492                         c->rtt_seq = pkt->hdr.seq + seglen;
493                         debug("Starting RTT measurement, expecting ack %u\n", c->rtt_seq);
494                 }
495
496                 print_packet(c->utcp, "send", pkt, sizeof pkt->hdr + seglen);
497                 c->utcp->send(c->utcp, pkt, sizeof pkt->hdr + seglen);
498         } while(left);
499
500         free(pkt);
501 }
502
503 ssize_t utcp_send(struct utcp_connection *c, const void *data, size_t len) {
504         if(c->reapable) {
505                 debug("Error: send() called on closed connection %p\n", c);
506                 errno = EBADF;
507                 return -1;
508         }
509
510         switch(c->state) {
511         case CLOSED:
512         case LISTEN:
513         case SYN_SENT:
514         case SYN_RECEIVED:
515                 debug("Error: send() called on unconnected connection %p\n", c);
516                 errno = ENOTCONN;
517                 return -1;
518         case ESTABLISHED:
519         case CLOSE_WAIT:
520                 break;
521         case FIN_WAIT_1:
522         case FIN_WAIT_2:
523         case CLOSING:
524         case LAST_ACK:
525         case TIME_WAIT:
526                 debug("Error: send() called on closing connection %p\n", c);
527                 errno = EPIPE;
528                 return -1;
529         }
530
531         // Exit early if we have nothing to send.
532
533         if(!len)
534                 return 0;
535
536         if(!data) {
537                 errno = EFAULT;
538                 return -1;
539         }
540
541         // Add data to send buffer.
542
543         len = buffer_put(&c->sndbuf, data, len);
544         if(len <= 0) {
545                 errno = EWOULDBLOCK;
546                 return 0;
547         }
548
549         c->snd.last += len;
550         ack(c, false);
551         if(!is_reliable(c)) {
552                 c->snd.una = c->snd.nxt = c->snd.last;
553                 buffer_get(&c->sndbuf, NULL, c->sndbuf.used);
554         }
555         if(is_reliable(c) && !timerisset(&c->rtrx_timeout))
556                 start_retransmit_timer(c);
557         return len;
558 }
559
560 static void swap_ports(struct hdr *hdr) {
561         uint16_t tmp = hdr->src;
562         hdr->src = hdr->dst;
563         hdr->dst = tmp;
564 }
565
566 static void retransmit(struct utcp_connection *c) {
567         if(c->state == CLOSED || c->snd.last == c->snd.una) {
568                 debug("Retransmit() called but nothing to retransmit!\n");
569                 stop_retransmit_timer(c);
570                 return;
571         }
572
573         struct utcp *utcp = c->utcp;
574
575         struct {
576                 struct hdr hdr;
577                 uint8_t data[];
578         } *pkt;
579
580         pkt = malloc(sizeof pkt->hdr + c->utcp->mtu);
581         if(!pkt)
582                 return;
583
584         pkt->hdr.src = c->src;
585         pkt->hdr.dst = c->dst;
586         pkt->hdr.wnd = c->rcv.wnd;
587         pkt->hdr.aux = 0;
588
589         switch(c->state) {
590                 case SYN_SENT:
591                         // Send our SYN again
592                         pkt->hdr.seq = c->snd.iss;
593                         pkt->hdr.ack = 0;
594                         pkt->hdr.ctl = SYN;
595                         pkt->hdr.aux = 0x0101;
596                         pkt->data[0] = 1;
597                         pkt->data[1] = 0;
598                         pkt->data[2] = 0;
599                         pkt->data[3] = c->flags & 0x7;
600                         print_packet(c->utcp, "rtrx", pkt, sizeof pkt->hdr + 4);
601                         utcp->send(utcp, pkt, sizeof pkt->hdr + 4);
602                         break;
603
604                 case SYN_RECEIVED:
605                         // Send SYNACK again
606                         pkt->hdr.seq = c->snd.nxt;
607                         pkt->hdr.ack = c->rcv.nxt;
608                         pkt->hdr.ctl = SYN | ACK;
609                         print_packet(c->utcp, "rtrx", pkt, sizeof pkt->hdr);
610                         utcp->send(utcp, pkt, sizeof pkt->hdr);
611                         break;
612
613                 case ESTABLISHED:
614                 case FIN_WAIT_1:
615                 case CLOSE_WAIT:
616                 case CLOSING:
617                 case LAST_ACK:
618                         // Send unacked data again.
619                         pkt->hdr.seq = c->snd.una;
620                         pkt->hdr.ack = c->rcv.nxt;
621                         pkt->hdr.ctl = ACK;
622                         uint32_t len = seqdiff(c->snd.last, c->snd.una);
623                         if(len > utcp->mtu)
624                                 len = utcp->mtu;
625                         if(fin_wanted(c, c->snd.una + len)) {
626                                 len--;
627                                 pkt->hdr.ctl |= FIN;
628                         }
629                         c->snd.nxt = c->snd.una + len;
630                         c->snd.cwnd = utcp->mtu; // reduce cwnd on retransmit
631                         buffer_copy(&c->sndbuf, pkt->data, 0, len);
632                         print_packet(c->utcp, "rtrx", pkt, sizeof pkt->hdr + len);
633                         utcp->send(utcp, pkt, sizeof pkt->hdr + len);
634                         break;
635
636                 case CLOSED:
637                 case LISTEN:
638                 case TIME_WAIT:
639                 case FIN_WAIT_2:
640                         // We shouldn't need to retransmit anything in this state.
641 #ifdef UTCP_DEBUG
642                         abort();
643 #endif
644                         stop_retransmit_timer(c);
645                         goto cleanup;
646         }
647
648         start_retransmit_timer(c);
649         utcp->rto *= 2;
650         if(utcp->rto > MAX_RTO)
651                 utcp->rto = MAX_RTO;
652         c->rtt_start.tv_sec = 0; // invalidate RTT timer
653
654 cleanup:
655         free(pkt);
656 }
657
658 /* Update receive buffer and SACK entries after consuming data.
659  *
660  * Situation:
661  *
662  * |.....0000..1111111111.....22222......3333|
663  * |---------------^
664  *
665  * 0..3 represent the SACK entries. The ^ indicates up to which point we want
666  * to remove data from the receive buffer. The idea is to substract "len"
667  * from the offset of all the SACK entries, and then remove/cut down entries
668  * that are shifted to before the start of the receive buffer.
669  *
670  * There are three cases:
671  * - the SACK entry is after ^, in that case just change the offset.
672  * - the SACK entry starts before and ends after ^, so we have to
673  *   change both its offset and size.
674  * - the SACK entry is completely before ^, in that case delete it.
675  */
676 static void sack_consume(struct utcp_connection *c, size_t len) {
677         debug("sack_consume %lu\n", (unsigned long)len);
678         if(len > c->rcvbuf.used) {
679                 debug("All SACK entries consumed");
680                 c->sacks[0].len = 0;
681                 return;
682         }
683
684         buffer_get(&c->rcvbuf, NULL, len);
685
686         for(int i = 0; i < NSACKS && c->sacks[i].len; ) {
687                 if(len < c->sacks[i].offset) {
688                         c->sacks[i].offset -= len;
689                         i++;
690                 } else if(len < c->sacks[i].offset + c->sacks[i].len) {
691                         c->sacks[i].len -= len - c->sacks[i].offset;
692                         c->sacks[i].offset = 0;
693                         i++;
694                 } else {
695                         if(i < NSACKS - 1) {
696                                 memmove(&c->sacks[i], &c->sacks[i + 1], (NSACKS - 1 - i) * sizeof c->sacks[i]);
697                                 c->sacks[NSACKS - 1].len = 0;
698                         } else {
699                                 c->sacks[i].len = 0;
700                                 break;
701                         }
702                 }
703         }
704
705         for(int i = 0; i < NSACKS && c->sacks[i].len; i++)
706                 debug("SACK[%d] offset %u len %u\n", i, c->sacks[i].offset, c->sacks[i].len);
707 }
708
709 static void handle_out_of_order(struct utcp_connection *c, uint32_t offset, const void *data, size_t len) {
710         debug("out of order packet, offset %u\n", offset);
711         // Packet loss or reordering occured. Store the data in the buffer.
712         ssize_t rxd = buffer_put_at(&c->rcvbuf, offset, data, len);
713         if(rxd < 0 || (size_t)rxd < len)
714                 abort();
715
716         // Make note of where we put it.
717         for(int i = 0; i < NSACKS; i++) {
718                 if(!c->sacks[i].len) { // nothing to merge, add new entry
719                         debug("New SACK entry %d\n", i);
720                         c->sacks[i].offset = offset;
721                         c->sacks[i].len = rxd;
722                         break;
723                 } else if(offset < c->sacks[i].offset) {
724                         if(offset + rxd < c->sacks[i].offset) { // insert before
725                                 if(!c->sacks[NSACKS - 1].len) { // only if room left
726                                         debug("Insert SACK entry at %d\n", i);
727                                         memmove(&c->sacks[i + 1], &c->sacks[i], (NSACKS - i - 1) * sizeof c->sacks[i]);
728                                         c->sacks[i].offset = offset;
729                                         c->sacks[i].len = rxd;
730                                 } else {
731                                         debug("SACK entries full, dropping packet\n");
732                                 }
733                                 break;
734                         } else { // merge
735                                 debug("Merge with start of SACK entry at %d\n", i);
736                                 c->sacks[i].offset = offset;
737                                 break;
738                         }
739                 } else if(offset <= c->sacks[i].offset + c->sacks[i].len) {
740                         if(offset + rxd > c->sacks[i].offset + c->sacks[i].len) { // merge
741                                 debug("Merge with end of SACK entry at %d\n", i);
742                                 c->sacks[i].len = offset + rxd - c->sacks[i].offset;
743                                 // TODO: handle potential merge with next entry
744                         }
745                         break;
746                 }
747         }
748
749         for(int i = 0; i < NSACKS && c->sacks[i].len; i++)
750                 debug("SACK[%d] offset %u len %u\n", i, c->sacks[i].offset, c->sacks[i].len);
751 }
752
753 static void handle_in_order(struct utcp_connection *c, const void *data, size_t len) {
754         // Check if we can process out-of-order data now.
755         if(c->sacks[0].len && len >= c->sacks[0].offset) { // TODO: handle overlap with second SACK
756                 debug("incoming packet len %lu connected with SACK at %u\n", (unsigned long)len, c->sacks[0].offset);
757                 buffer_put_at(&c->rcvbuf, 0, data, len); // TODO: handle return value
758                 len = max(len, c->sacks[0].offset + c->sacks[0].len);
759                 data = c->rcvbuf.data;
760         }
761
762         if(c->recv) {
763                 ssize_t rxd = c->recv(c, data, len);
764                 if(rxd < 0 || (size_t)rxd != len) {
765                         // TODO: handle the application not accepting all data.
766                         abort();
767                 }
768         }
769
770         if(c->rcvbuf.used)
771                 sack_consume(c, len);
772
773         c->rcv.nxt += len;
774 }
775
776
777 static void handle_incoming_data(struct utcp_connection *c, uint32_t seq, const void *data, size_t len) {
778         if(!is_reliable(c)) {
779                 c->recv(c, data, len);
780                 c->rcv.nxt = seq + len;
781                 return;
782         }
783
784         uint32_t offset = seqdiff(seq, c->rcv.nxt);
785         if(offset + len > c->rcvbuf.maxsize)
786                 abort();
787
788         if(offset)
789                 handle_out_of_order(c, offset, data, len);
790         else
791                 handle_in_order(c, data, len);
792 }
793
794
795 ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) {
796         if(!utcp) {
797                 errno = EFAULT;
798                 return -1;
799         }
800
801         if(!len)
802                 return 0;
803
804         if(!data) {
805                 errno = EFAULT;
806                 return -1;
807         }
808
809         print_packet(utcp, "recv", data, len);
810
811         // Drop packets smaller than the header
812
813         struct hdr hdr;
814         if(len < sizeof hdr) {
815                 errno = EBADMSG;
816                 return -1;
817         }
818
819         // Make a copy from the potentially unaligned data to a struct hdr
820
821         memcpy(&hdr, data, sizeof hdr);
822         data += sizeof hdr;
823         len -= sizeof hdr;
824
825         // Drop packets with an unknown CTL flag
826
827         if(hdr.ctl & ~(SYN | ACK | RST | FIN)) {
828                 errno = EBADMSG;
829                 return -1;
830         }
831
832         // Check for auxiliary headers
833
834         const uint8_t *init = NULL;
835
836         uint16_t aux = hdr.aux;
837         while(aux) {
838                 size_t auxlen = 4 * (aux >> 8) & 0xf;
839                 uint8_t auxtype = aux & 0xff;
840
841                 if(len < auxlen) {
842                         errno = EBADMSG;
843                         return -1;
844                 }
845
846                 switch(auxtype) {
847                 case AUX_INIT:
848                         if(!(hdr.ctl & SYN) || auxlen != 4) {
849                                 errno = EBADMSG;
850                                 return -1;
851                         }
852                         init = data;
853                         break;
854                 default:
855                         errno = EBADMSG;
856                         return -1;
857                 }
858
859                 len -= auxlen;
860                 data += auxlen;
861
862                 if(!(aux & 0x800))
863                         break;
864
865                 if(len < 2) {
866                         errno = EBADMSG;
867                         return -1;
868                 }
869
870                 memcpy(&aux, data, 2);
871                 len -= 2;
872                 data += 2;
873         }
874
875         // Try to match the packet to an existing connection
876
877         struct utcp_connection *c = find_connection(utcp, hdr.dst, hdr.src);
878
879         // Is it for a new connection?
880
881         if(!c) {
882                 // Ignore RST packets
883
884                 if(hdr.ctl & RST)
885                         return 0;
886
887                 // Is it a SYN packet and are we LISTENing?
888
889                 if(hdr.ctl & SYN && !(hdr.ctl & ACK) && utcp->accept) {
890                         // If we don't want to accept it, send a RST back
891                         if((utcp->pre_accept && !utcp->pre_accept(utcp, hdr.dst))) {
892                                 len = 1;
893                                 goto reset;
894                         }
895
896                         // Try to allocate memory, otherwise send a RST back
897                         c = allocate_connection(utcp, hdr.dst, hdr.src);
898                         if(!c) {
899                                 len = 1;
900                                 goto reset;
901                         }
902
903                         // Parse auxilliary information
904                         if(init) {
905                                 if(init[0] < 1) {
906                                         len = 1;
907                                         goto reset;
908                                 }
909                                 c->flags = init[3] & 0x7;
910                         } else {
911                                 c->flags = UTCP_TCP;
912                         }
913
914                         // Return SYN+ACK, go to SYN_RECEIVED state
915                         c->snd.wnd = hdr.wnd;
916                         c->rcv.irs = hdr.seq;
917                         c->rcv.nxt = c->rcv.irs + 1;
918                         set_state(c, SYN_RECEIVED);
919
920                         struct {
921                                 struct hdr hdr;
922                                 uint8_t data[4];
923                         } pkt;
924
925                         pkt.hdr.src = c->src;
926                         pkt.hdr.dst = c->dst;
927                         pkt.hdr.ack = c->rcv.irs + 1;
928                         pkt.hdr.seq = c->snd.iss;
929                         pkt.hdr.wnd = c->rcv.wnd;
930                         pkt.hdr.ctl = SYN | ACK;
931                         if(init) {
932                                 pkt.hdr.aux = 0x0101;
933                                 pkt.data[0] = 1;
934                                 pkt.data[1] = 0;
935                                 pkt.data[2] = 0;
936                                 pkt.data[3] = c->flags & 0x7;
937                                 print_packet(c->utcp, "send", &pkt, sizeof hdr + 4);
938                                 utcp->send(utcp, &pkt, sizeof hdr + 4);
939                         } else {
940                                 pkt.hdr.aux = 0;
941                                 print_packet(c->utcp, "send", &pkt, sizeof hdr);
942                                 utcp->send(utcp, &pkt, sizeof hdr);
943                         }
944                 } else {
945                         // No, we don't want your packets, send a RST back
946                         len = 1;
947                         goto reset;
948                 }
949
950                 return 0;
951         }
952
953         debug("%p state %s\n", c->utcp, strstate[c->state]);
954
955         // In case this is for a CLOSED connection, ignore the packet.
956         // TODO: make it so incoming packets can never match a CLOSED connection.
957
958         if(c->state == CLOSED) {
959                 debug("Got packet for closed connection\n");
960                 return 0;
961         }
962
963         // It is for an existing connection.
964
965         uint32_t prevrcvnxt = c->rcv.nxt;
966
967         // 1. Drop invalid packets.
968
969         // 1a. Drop packets that should not happen in our current state.
970
971         switch(c->state) {
972         case SYN_SENT:
973         case SYN_RECEIVED:
974         case ESTABLISHED:
975         case FIN_WAIT_1:
976         case FIN_WAIT_2:
977         case CLOSE_WAIT:
978         case CLOSING:
979         case LAST_ACK:
980         case TIME_WAIT:
981                 break;
982         default:
983 #ifdef UTCP_DEBUG
984                 abort();
985 #endif
986                 break;
987         }
988
989         // 1b. Drop packets with a sequence number not in our receive window.
990
991         bool acceptable;
992
993         if(c->state == SYN_SENT)
994                 acceptable = true;
995         else if(len == 0)
996                 acceptable = seqdiff(hdr.seq, c->rcv.nxt) >= 0;
997         else {
998                 int32_t rcv_offset = seqdiff(hdr.seq, c->rcv.nxt);
999
1000                 // cut already accepted front overlapping
1001                 if(rcv_offset < 0) {
1002                         acceptable = len > (size_t)-rcv_offset;
1003                         if(acceptable) {
1004                                 data -= rcv_offset;
1005                                 len += rcv_offset;
1006                                 hdr.seq -= rcv_offset;
1007                         }
1008                 } else {
1009                         acceptable = seqdiff(hdr.seq, c->rcv.nxt) >= 0 && seqdiff(hdr.seq, c->rcv.nxt) + len <= c->rcvbuf.maxsize;
1010                 }
1011         }
1012
1013         if(!acceptable) {
1014                 debug("Packet not acceptable, %u <= %u + %lu < %u\n", c->rcv.nxt, hdr.seq, (unsigned long)len, c->rcv.nxt + c->rcvbuf.maxsize);
1015                 // Ignore unacceptable RST packets.
1016                 if(hdr.ctl & RST)
1017                         return 0;
1018                 // Otherwise, continue processing.
1019                 len = 0;
1020         }
1021
1022         c->snd.wnd = hdr.wnd; // TODO: move below
1023
1024         // 1c. Drop packets with an invalid ACK.
1025         // ackno should not roll back, and it should also not be bigger than what we ever could have sent
1026         // (= snd.una + c->sndbuf.used).
1027
1028         if(hdr.ctl & ACK && (seqdiff(hdr.ack, c->snd.last) > 0 || seqdiff(hdr.ack, c->snd.una) < 0)) {
1029                 debug("Packet ack seqno out of range, %u <= %u < %u\n", c->snd.una, hdr.ack, c->snd.una + c->sndbuf.used);
1030                 // Ignore unacceptable RST packets.
1031                 if(hdr.ctl & RST)
1032                         return 0;
1033                 goto reset;
1034         }
1035
1036         // 2. Handle RST packets
1037
1038         if(hdr.ctl & RST) {
1039                 switch(c->state) {
1040                 case SYN_SENT:
1041                         if(!(hdr.ctl & ACK))
1042                                 return 0;
1043                         // The peer has refused our connection.
1044                         set_state(c, CLOSED);
1045                         errno = ECONNREFUSED;
1046                         if(c->recv)
1047                                 c->recv(c, NULL, 0);
1048                         return 0;
1049                 case SYN_RECEIVED:
1050                         if(hdr.ctl & ACK)
1051                                 return 0;
1052                         // We haven't told the application about this connection yet. Silently delete.
1053                         free_connection(c);
1054                         return 0;
1055                 case ESTABLISHED:
1056                 case FIN_WAIT_1:
1057                 case FIN_WAIT_2:
1058                 case CLOSE_WAIT:
1059                         if(hdr.ctl & ACK)
1060                                 return 0;
1061                         // The peer has aborted our connection.
1062                         set_state(c, CLOSED);
1063                         errno = ECONNRESET;
1064                         if(c->recv)
1065                                 c->recv(c, NULL, 0);
1066                         return 0;
1067                 case CLOSING:
1068                 case LAST_ACK:
1069                 case TIME_WAIT:
1070                         if(hdr.ctl & ACK)
1071                                 return 0;
1072                         // As far as the application is concerned, the connection has already been closed.
1073                         // If it has called utcp_close() already, we can immediately free this connection.
1074                         if(c->reapable) {
1075                                 free_connection(c);
1076                                 return 0;
1077                         }
1078                         // Otherwise, immediately move to the CLOSED state.
1079                         set_state(c, CLOSED);
1080                         return 0;
1081                 default:
1082 #ifdef UTCP_DEBUG
1083                         abort();
1084 #endif
1085                         break;
1086                 }
1087         }
1088
1089         if(!(hdr.ctl & ACK))
1090                 goto skip_ack;
1091
1092         // 3. Advance snd.una
1093
1094         uint32_t advanced = seqdiff(hdr.ack, c->snd.una);
1095         prevrcvnxt = c->rcv.nxt;
1096
1097         if(advanced) {
1098                 // RTT measurement
1099                 if(c->rtt_start.tv_sec) {
1100                         if(c->rtt_seq == hdr.ack) {
1101                                 struct timeval now, diff;
1102                                 gettimeofday(&now, NULL);
1103                                 timersub(&now, &c->rtt_start, &diff);
1104                                 update_rtt(c, diff.tv_sec * 1000000 + diff.tv_usec);
1105                                 c->rtt_start.tv_sec = 0;
1106                         } else if(c->rtt_seq < hdr.ack) {
1107                                 debug("Cancelling RTT measurement: %u < %u\n", c->rtt_seq, hdr.ack);
1108                                 c->rtt_start.tv_sec = 0;
1109                         }
1110                 }
1111
1112                 int32_t data_acked = advanced;
1113
1114                 switch(c->state) {
1115                         case SYN_SENT:
1116                         case SYN_RECEIVED:
1117                                 data_acked--;
1118                                 break;
1119                         // TODO: handle FIN as well.
1120                         default:
1121                                 break;
1122                 }
1123
1124                 assert(data_acked >= 0);
1125
1126                 int32_t bufused = seqdiff(c->snd.last, c->snd.una);
1127                 assert(data_acked <= bufused);
1128
1129                 if(data_acked)
1130                         buffer_get(&c->sndbuf, NULL, data_acked);
1131
1132                 // Also advance snd.nxt if possible
1133                 if(seqdiff(c->snd.nxt, hdr.ack) < 0)
1134                         c->snd.nxt = hdr.ack;
1135
1136                 c->snd.una = hdr.ack;
1137
1138                 c->dupack = 0;
1139                 c->snd.cwnd += utcp->mtu;
1140                 if(c->snd.cwnd > c->sndbuf.maxsize)
1141                         c->snd.cwnd = c->sndbuf.maxsize;
1142
1143                 // Check if we have sent a FIN that is now ACKed.
1144                 switch(c->state) {
1145                 case FIN_WAIT_1:
1146                         if(c->snd.una == c->snd.last)
1147                                 set_state(c, FIN_WAIT_2);
1148                         break;
1149                 case CLOSING:
1150                         if(c->snd.una == c->snd.last) {
1151                                 gettimeofday(&c->conn_timeout, NULL);
1152                                 c->conn_timeout.tv_sec += 60;
1153                                 set_state(c, TIME_WAIT);
1154                         }
1155                         break;
1156                 default:
1157                         break;
1158                 }
1159         } else {
1160                 if(!len && is_reliable(c)) {
1161                         c->dupack++;
1162                         if(c->dupack == 3) {
1163                                 debug("Triplicate ACK\n");
1164                                 //TODO: Resend one packet and go to fast recovery mode. See RFC 6582.
1165                                 //We do a very simple variant here; reset the nxt pointer to the last acknowledged packet from the peer.
1166                                 //Reset the congestion window so we wait for ACKs.
1167                                 c->snd.nxt = c->snd.una;
1168                                 c->snd.cwnd = utcp->mtu;
1169                                 start_retransmit_timer(c);
1170                         }
1171                 }
1172         }
1173
1174         // 4. Update timers
1175
1176         if(advanced) {
1177                 timerclear(&c->conn_timeout); // It will be set anew in utcp_timeout() if c->snd.una != c->snd.nxt.
1178                 if(c->snd.una == c->snd.last)
1179                         stop_retransmit_timer(c);
1180                 else if(is_reliable(c))
1181                         start_retransmit_timer(c);
1182         }
1183
1184 skip_ack:
1185         // 5. Process SYN stuff
1186
1187         if(hdr.ctl & SYN) {
1188                 switch(c->state) {
1189                 case SYN_SENT:
1190                         // This is a SYNACK. It should always have ACKed the SYN.
1191                         if(!advanced)
1192                                 goto reset;
1193                         c->rcv.irs = hdr.seq;
1194                         c->rcv.nxt = hdr.seq;
1195                         set_state(c, ESTABLISHED);
1196                         // TODO: notify application of this somehow.
1197                         break;
1198                 case SYN_RECEIVED:
1199                 case ESTABLISHED:
1200                 case FIN_WAIT_1:
1201                 case FIN_WAIT_2:
1202                 case CLOSE_WAIT:
1203                 case CLOSING:
1204                 case LAST_ACK:
1205                 case TIME_WAIT:
1206                         // Ehm, no. We should never receive a second SYN.
1207                         return 0;
1208                 default:
1209 #ifdef UTCP_DEBUG
1210                         abort();
1211 #endif
1212                         return 0;
1213                 }
1214
1215                 // SYN counts as one sequence number
1216                 c->rcv.nxt++;
1217         }
1218
1219         // 6. Process new data
1220
1221         if(c->state == SYN_RECEIVED) {
1222                 // This is the ACK after the SYNACK. It should always have ACKed the SYNACK.
1223                 if(!advanced)
1224                         goto reset;
1225
1226                 // Are we still LISTENing?
1227                 if(utcp->accept)
1228                         utcp->accept(c, c->src);
1229
1230                 if(c->state != ESTABLISHED) {
1231                         set_state(c, CLOSED);
1232                         c->reapable = true;
1233                         goto reset;
1234                 }
1235         }
1236
1237         if(len) {
1238                 switch(c->state) {
1239                 case SYN_SENT:
1240                 case SYN_RECEIVED:
1241                         // This should never happen.
1242 #ifdef UTCP_DEBUG
1243                         abort();
1244 #endif
1245                         return 0;
1246                 case ESTABLISHED:
1247                 case FIN_WAIT_1:
1248                 case FIN_WAIT_2:
1249                         break;
1250                 case CLOSE_WAIT:
1251                 case CLOSING:
1252                 case LAST_ACK:
1253                 case TIME_WAIT:
1254                         // Ehm no, We should never receive more data after a FIN.
1255                         goto reset;
1256                 default:
1257 #ifdef UTCP_DEBUG
1258                         abort();
1259 #endif
1260                         return 0;
1261                 }
1262
1263                 handle_incoming_data(c, hdr.seq, data, len);
1264         }
1265
1266         // 7. Process FIN stuff
1267
1268         if((hdr.ctl & FIN) && hdr.seq + len == c->rcv.nxt) {
1269                 switch(c->state) {
1270                 case SYN_SENT:
1271                 case SYN_RECEIVED:
1272                         // This should never happen.
1273 #ifdef UTCP_DEBUG
1274                         abort();
1275 #endif
1276                         break;
1277                 case ESTABLISHED:
1278                         set_state(c, CLOSE_WAIT);
1279                         break;
1280                 case FIN_WAIT_1:
1281                         set_state(c, CLOSING);
1282                         break;
1283                 case FIN_WAIT_2:
1284                         gettimeofday(&c->conn_timeout, NULL);
1285                         c->conn_timeout.tv_sec += 60;
1286                         set_state(c, TIME_WAIT);
1287                         break;
1288                 case CLOSE_WAIT:
1289                 case CLOSING:
1290                 case LAST_ACK:
1291                 case TIME_WAIT:
1292                         // Ehm, no. We should never receive a second FIN.
1293                         goto reset;
1294                 default:
1295 #ifdef UTCP_DEBUG
1296                         abort();
1297 #endif
1298                         break;
1299                 }
1300
1301                 // FIN counts as one sequence number
1302                 c->rcv.nxt++;
1303                 len++;
1304
1305                 // Inform the application that the peer closed the connection.
1306                 if(c->recv) {
1307                         errno = 0;
1308                         c->recv(c, NULL, 0);
1309                 }
1310         }
1311
1312         // Now we send something back if:
1313         // - we advanced rcv.nxt (ie, we got some data that needs to be ACKed)
1314         //   -> sendatleastone = true
1315         // - or we got an ack, so we should maybe send a bit more data
1316         //   -> sendatleastone = false
1317
1318         ack(c, len || prevrcvnxt != c->rcv.nxt);
1319         return 0;
1320
1321 reset:
1322         swap_ports(&hdr);
1323         hdr.wnd = 0;
1324         hdr.aux = 0;
1325         if(hdr.ctl & ACK) {
1326                 hdr.seq = hdr.ack;
1327                 hdr.ctl = RST;
1328         } else {
1329                 hdr.ack = hdr.seq + len;
1330                 hdr.seq = 0;
1331                 hdr.ctl = RST | ACK;
1332         }
1333         print_packet(utcp, "send", &hdr, sizeof hdr);
1334         utcp->send(utcp, &hdr, sizeof hdr);
1335         return 0;
1336
1337 }
1338
1339 int utcp_shutdown(struct utcp_connection *c, int dir) {
1340         debug("%p shutdown %d at %u\n", c ? c->utcp : NULL, dir, c ? c->snd.last : 0);
1341         if(!c) {
1342                 errno = EFAULT;
1343                 return -1;
1344         }
1345
1346         if(c->reapable) {
1347                 debug("Error: shutdown() called on closed connection %p\n", c);
1348                 errno = EBADF;
1349                 return -1;
1350         }
1351
1352         if(!(dir == UTCP_SHUT_RD || dir == UTCP_SHUT_WR || dir == UTCP_SHUT_RDWR)) {
1353                 errno = EINVAL;
1354                 return -1;
1355         }
1356
1357         // TCP does not have a provision for stopping incoming packets.
1358         // The best we can do is to just ignore them.
1359         if(dir == UTCP_SHUT_RD || dir == UTCP_SHUT_RDWR)
1360                 c->recv = NULL;
1361
1362         // The rest of the code deals with shutting down writes.
1363         if(dir == UTCP_SHUT_RD)
1364                 return 0;
1365
1366         switch(c->state) {
1367         case CLOSED:
1368         case LISTEN:
1369                 errno = ENOTCONN;
1370                 return -1;
1371
1372         case SYN_SENT:
1373                 set_state(c, CLOSED);
1374                 return 0;
1375
1376         case SYN_RECEIVED:
1377         case ESTABLISHED:
1378                 set_state(c, FIN_WAIT_1);
1379                 break;
1380         case FIN_WAIT_1:
1381         case FIN_WAIT_2:
1382                 return 0;
1383         case CLOSE_WAIT:
1384                 set_state(c, CLOSING);
1385                 break;
1386
1387         case CLOSING:
1388         case LAST_ACK:
1389         case TIME_WAIT:
1390                 return 0;
1391         }
1392
1393         c->snd.last++;
1394
1395         ack(c, false);
1396         if(!timerisset(&c->rtrx_timeout))
1397                 start_retransmit_timer(c);
1398         return 0;
1399 }
1400
1401 int utcp_close(struct utcp_connection *c) {
1402         if(utcp_shutdown(c, SHUT_RDWR) && errno != ENOTCONN)
1403                 return -1;
1404         c->recv = NULL;
1405         c->poll = NULL;
1406         c->reapable = true;
1407         return 0;
1408 }
1409
1410 int utcp_abort(struct utcp_connection *c) {
1411         if(!c) {
1412                 errno = EFAULT;
1413                 return -1;
1414         }
1415
1416         if(c->reapable) {
1417                 debug("Error: abort() called on closed connection %p\n", c);
1418                 errno = EBADF;
1419                 return -1;
1420         }
1421
1422         c->recv = NULL;
1423         c->poll = NULL;
1424         c->reapable = true;
1425
1426         switch(c->state) {
1427         case CLOSED:
1428                 return 0;
1429         case LISTEN:
1430         case SYN_SENT:
1431         case CLOSING:
1432         case LAST_ACK:
1433         case TIME_WAIT:
1434                 set_state(c, CLOSED);
1435                 return 0;
1436
1437         case SYN_RECEIVED:
1438         case ESTABLISHED:
1439         case FIN_WAIT_1:
1440         case FIN_WAIT_2:
1441         case CLOSE_WAIT:
1442                 set_state(c, CLOSED);
1443                 break;
1444         }
1445
1446         // Send RST
1447
1448         struct hdr hdr;
1449
1450         hdr.src = c->src;
1451         hdr.dst = c->dst;
1452         hdr.seq = c->snd.nxt;
1453         hdr.ack = 0;
1454         hdr.wnd = 0;
1455         hdr.ctl = RST;
1456
1457         print_packet(c->utcp, "send", &hdr, sizeof hdr);
1458         c->utcp->send(c->utcp, &hdr, sizeof hdr);
1459         return 0;
1460 }
1461
1462 /* Handle timeouts.
1463  * One call to this function will loop through all connections,
1464  * checking if something needs to be resent or not.
1465  * The return value is the time to the next timeout in milliseconds,
1466  * or maybe a negative value if the timeout is infinite.
1467  */
1468 struct timeval utcp_timeout(struct utcp *utcp) {
1469         struct timeval now;
1470         gettimeofday(&now, NULL);
1471         struct timeval next = {now.tv_sec + 3600, now.tv_usec};
1472
1473         for(int i = 0; i < utcp->nconnections; i++) {
1474                 struct utcp_connection *c = utcp->connections[i];
1475                 if(!c)
1476                         continue;
1477
1478                 // delete connections that have been utcp_close()d.
1479                 if(c->state == CLOSED) {
1480                         if(c->reapable) {
1481                                 debug("Reaping %p\n", c);
1482                                 free_connection(c);
1483                                 i--;
1484                         }
1485                         continue;
1486                 }
1487
1488                 if(timerisset(&c->conn_timeout) && timercmp(&c->conn_timeout, &now, <)) {
1489                         errno = ETIMEDOUT;
1490                         c->state = CLOSED;
1491                         if(c->recv)
1492                                 c->recv(c, NULL, 0);
1493                         continue;
1494                 }
1495
1496                 if(timerisset(&c->rtrx_timeout) && timercmp(&c->rtrx_timeout, &now, <)) {
1497                         debug("retransmit()\n");
1498                         retransmit(c);
1499                 }
1500
1501                 if(c->poll) {
1502                         if((c->state == ESTABLISHED || c->state == CLOSE_WAIT)) {
1503                                 uint32_t len =  buffer_free(&c->sndbuf);
1504                                 if(len)
1505                                         c->poll(c, len);
1506                         } else if(c->state == CLOSED) {
1507                                 c->poll(c, 0);
1508                         }
1509                 }
1510
1511                 if(timerisset(&c->conn_timeout) && timercmp(&c->conn_timeout, &next, <))
1512                         next = c->conn_timeout;
1513
1514                 if(timerisset(&c->rtrx_timeout) && timercmp(&c->rtrx_timeout, &next, <))
1515                         next = c->rtrx_timeout;
1516         }
1517
1518         struct timeval diff;
1519         timersub(&next, &now, &diff);
1520         return diff;
1521 }
1522
1523 bool utcp_is_active(struct utcp *utcp) {
1524         if(!utcp)
1525                 return false;
1526
1527         for(int i = 0; i < utcp->nconnections; i++)
1528                 if(utcp->connections[i]->state != CLOSED && utcp->connections[i]->state != TIME_WAIT)
1529                         return true;
1530
1531         return false;
1532 }
1533
1534 struct utcp *utcp_init(utcp_accept_t accept, utcp_pre_accept_t pre_accept, utcp_send_t send, void *priv) {
1535         if(!send) {
1536                 errno = EFAULT;
1537                 return NULL;
1538         }
1539
1540         struct utcp *utcp = calloc(1, sizeof *utcp);
1541         if(!utcp)
1542                 return NULL;
1543
1544         utcp->accept = accept;
1545         utcp->pre_accept = pre_accept;
1546         utcp->send = send;
1547         utcp->priv = priv;
1548         utcp->mtu = DEFAULT_MTU;
1549         utcp->timeout = DEFAULT_USER_TIMEOUT; // sec
1550         utcp->rto = START_RTO; // usec
1551
1552         return utcp;
1553 }
1554
1555 void utcp_exit(struct utcp *utcp) {
1556         if(!utcp)
1557                 return;
1558         for(int i = 0; i < utcp->nconnections; i++) {
1559                 struct utcp_connection *c = utcp->connections[i];
1560                 if(!c->reapable)
1561                         if(c->recv)
1562                                 c->recv(c, NULL, 0);
1563                 buffer_exit(&c->rcvbuf);
1564                 buffer_exit(&c->sndbuf);
1565                 free(c);
1566         }
1567         free(utcp->connections);
1568         free(utcp);
1569 }
1570
1571 uint16_t utcp_get_mtu(struct utcp *utcp) {
1572         return utcp ? utcp->mtu : 0;
1573 }
1574
1575 void utcp_set_mtu(struct utcp *utcp, uint16_t mtu) {
1576         // TODO: handle overhead of the header
1577         if(utcp)
1578                 utcp->mtu = mtu;
1579 }
1580
1581 void utcp_reset_timers(struct utcp *utcp) {
1582         if(!utcp)
1583                 return;
1584         struct timeval now, then;
1585         gettimeofday(&now, NULL);
1586         then = now;
1587         then.tv_sec += utcp->timeout;
1588         for(int i = 0; i < utcp->nconnections; i++) {
1589                 utcp->connections[i]->rtrx_timeout = now;
1590                 utcp->connections[i]->conn_timeout = then;
1591                 utcp->connections[i]->rtt_start.tv_sec = 0;
1592         }
1593         if(utcp->rto > START_RTO)
1594                 utcp->rto = START_RTO;
1595 }
1596
1597 int utcp_get_user_timeout(struct utcp *u) {
1598         return u ? u->timeout : 0;
1599 }
1600
1601 void utcp_set_user_timeout(struct utcp *u, int timeout) {
1602         if(u)
1603                 u->timeout = timeout;
1604 }
1605
1606 size_t utcp_get_sndbuf(struct utcp_connection *c) {
1607         return c ? c->sndbuf.maxsize : 0;
1608 }
1609
1610 size_t utcp_get_sndbuf_free(struct utcp_connection *c) {
1611         if(c && (c->state == ESTABLISHED || c->state == CLOSE_WAIT))
1612                 return buffer_free(&c->sndbuf);
1613         else
1614                 return 0;
1615 }
1616
1617 void utcp_set_sndbuf(struct utcp_connection *c, size_t size) {
1618         if(!c)
1619                 return;
1620         c->sndbuf.maxsize = size;
1621         if(c->sndbuf.maxsize != size)
1622                 c->sndbuf.maxsize = -1;
1623 }
1624
1625 size_t utcp_get_rcvbuf(struct utcp_connection *c) {
1626         return c ? c->rcvbuf.maxsize : 0;
1627 }
1628
1629 size_t utcp_get_rcvbuf_free(struct utcp_connection *c) {
1630         if(c && (c->state == ESTABLISHED || c->state == CLOSE_WAIT))
1631                 return buffer_free(&c->rcvbuf);
1632         else
1633                 return 0;
1634 }
1635
1636 void utcp_set_rcvbuf(struct utcp_connection *c, size_t size) {
1637         if(!c)
1638                 return;
1639         c->rcvbuf.maxsize = size;
1640         if(c->rcvbuf.maxsize != size)
1641                 c->rcvbuf.maxsize = -1;
1642 }
1643
1644 bool utcp_get_nodelay(struct utcp_connection *c) {
1645         return c ? c->nodelay : false;
1646 }
1647
1648 void utcp_set_nodelay(struct utcp_connection *c, bool nodelay) {
1649         if(c)
1650                 c->nodelay = nodelay;
1651 }
1652
1653 bool utcp_get_keepalive(struct utcp_connection *c) {
1654         return c ? c->keepalive : false;
1655 }
1656
1657 void utcp_set_keepalive(struct utcp_connection *c, bool keepalive) {
1658         if(c)
1659                 c->keepalive = keepalive;
1660 }
1661
1662 size_t utcp_get_outq(struct utcp_connection *c) {
1663         return c ? seqdiff(c->snd.nxt, c->snd.una) : 0;
1664 }
1665
1666 void utcp_set_recv_cb(struct utcp_connection *c, utcp_recv_t recv) {
1667         if(c)
1668                 c->recv = recv;
1669 }
1670
1671 void utcp_set_poll_cb(struct utcp_connection *c, utcp_poll_t poll) {
1672         if(c)
1673                 c->poll = poll;
1674 }
1675
1676 void utcp_set_accept_cb(struct utcp *utcp, utcp_accept_t accept, utcp_pre_accept_t pre_accept) {
1677         if(utcp) {
1678                 utcp->accept = accept;
1679                 utcp->pre_accept = pre_accept;
1680         }
1681 }