]> git.meshlink.io Git - meshlink/blob - src/utcp.c
Fix potential channel buffer corruption when using an application-provided buffer.
[meshlink] / src / utcp.c
1 /*
2     utcp.c -- Userspace TCP
3     Copyright (C) 2014-2017 Guus Sliepen <guus@tinc-vpn.org>
4
5     This program is free software; you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation; either version 2 of the License, or
8     (at your option) any later version.
9
10     This program is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14
15     You should have received a copy of the GNU General Public License along
16     with this program; if not, write to the Free Software Foundation, Inc.,
17     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #include "system.h"
21 #include <time.h>
22
23 #include "utcp_priv.h"
24
25 #ifndef EBADMSG
26 #define EBADMSG         104
27 #endif
28
29 #ifndef SHUT_RDWR
30 #define SHUT_RDWR 2
31 #endif
32
33 #ifdef poll
34 #undef poll
35 #endif
36
37 #ifndef UTCP_CLOCK
38 #if defined(CLOCK_MONOTONIC_RAW) && defined(__x86_64__)
39 #define UTCP_CLOCK CLOCK_MONOTONIC_RAW
40 #else
41 #define UTCP_CLOCK CLOCK_MONOTONIC
42 #endif
43 #endif
44
45 static void timespec_sub(const struct timespec *a, const struct timespec *b, struct timespec *r) {
46         r->tv_sec = a->tv_sec - b->tv_sec;
47         r->tv_nsec = a->tv_nsec - b->tv_nsec;
48
49         if(r->tv_nsec < 0) {
50                 r->tv_sec--, r->tv_nsec += NSEC_PER_SEC;
51         }
52 }
53
54 static int32_t timespec_diff_usec(const struct timespec *a, const struct timespec *b) {
55         return (a->tv_sec - b->tv_sec) * 1000000 + (a->tv_nsec - b->tv_nsec) / 1000;
56 }
57
58 static bool timespec_lt(const struct timespec *a, const struct timespec *b) {
59         if(a->tv_sec == b->tv_sec) {
60                 return a->tv_nsec < b->tv_nsec;
61         } else {
62                 return a->tv_sec < b->tv_sec;
63         }
64 }
65
66 static void timespec_clear(struct timespec *a) {
67         a->tv_sec = 0;
68         a->tv_nsec = 0;
69 }
70
71 static bool timespec_isset(const struct timespec *a) {
72         return a->tv_sec;
73 }
74
75 static long CLOCK_GRANULARITY; // usec
76
77 static inline size_t min(size_t a, size_t b) {
78         return a < b ? a : b;
79 }
80
81 static inline size_t max(size_t a, size_t b) {
82         return a > b ? a : b;
83 }
84
85 #ifdef UTCP_DEBUG
86 #include <stdarg.h>
87
88 #ifndef UTCP_DEBUG_DATALEN
89 #define UTCP_DEBUG_DATALEN 20
90 #endif
91
92 static void debug(struct utcp_connection *c, const char *format, ...) {
93         struct timespec tv;
94         char buf[1024];
95         int len;
96
97         clock_gettime(CLOCK_REALTIME, &tv);
98         len = snprintf(buf, sizeof(buf), "%ld.%06lu %u:%u ", (long)tv.tv_sec, tv.tv_nsec / 1000, c ? c->src : 0, c ? c->dst : 0);
99         va_list ap;
100         va_start(ap, format);
101         len += vsnprintf(buf + len, sizeof(buf) - len, format, ap);
102         va_end(ap);
103
104         if(len > 0 && (size_t)len < sizeof(buf)) {
105                 fwrite(buf, len, 1, stderr);
106         }
107 }
108
109 static void print_packet(struct utcp_connection *c, const char *dir, const void *pkt, size_t len) {
110         struct hdr hdr;
111
112         if(len < sizeof(hdr)) {
113                 debug(c, "%s: short packet (%lu bytes)\n", dir, (unsigned long)len);
114                 return;
115         }
116
117         memcpy(&hdr, pkt, sizeof(hdr));
118
119         uint32_t datalen;
120
121         if(len > sizeof(hdr)) {
122                 datalen = min(len - sizeof(hdr), UTCP_DEBUG_DATALEN);
123         } else {
124                 datalen = 0;
125         }
126
127
128         const uint8_t *data = (uint8_t *)pkt + sizeof(hdr);
129         char str[datalen * 2 + 1];
130         char *p = str;
131
132         for(uint32_t i = 0; i < datalen; i++) {
133                 *p++ = "0123456789ABCDEF"[data[i] >> 4];
134                 *p++ = "0123456789ABCDEF"[data[i] & 15];
135         }
136
137         *p = 0;
138
139         debug(c, "%s: len %lu src %u dst %u seq %u ack %u wnd %u aux %x ctl %s%s%s%s%s data %s\n",
140               dir, (unsigned long)len, hdr.src, hdr.dst, hdr.seq, hdr.ack, hdr.wnd, hdr.aux,
141               hdr.ctl & SYN ? "SYN" : "",
142               hdr.ctl & RST ? "RST" : "",
143               hdr.ctl & FIN ? "FIN" : "",
144               hdr.ctl & ACK ? "ACK" : "",
145               hdr.ctl & MF ? "MF" : "",
146               str
147              );
148 }
149
150 static void debug_cwnd(struct utcp_connection *c) {
151         debug(c, "snd.cwnd %u snd.ssthresh %u\n", c->snd.cwnd, ~c->snd.ssthresh ? c->snd.ssthresh : 0);
152 }
153 #else
154 #define debug(...) do {} while(0)
155 #define print_packet(...) do {} while(0)
156 #define debug_cwnd(...) do {} while(0)
157 #endif
158
159 static void set_state(struct utcp_connection *c, enum state state) {
160         c->state = state;
161
162         if(state == ESTABLISHED) {
163                 timespec_clear(&c->conn_timeout);
164         }
165
166         debug(c, "state %s\n", strstate[state]);
167 }
168
169 static bool fin_wanted(struct utcp_connection *c, uint32_t seq) {
170         if(seq != c->snd.last) {
171                 return false;
172         }
173
174         switch(c->state) {
175         case FIN_WAIT_1:
176         case CLOSING:
177         case LAST_ACK:
178                 return true;
179
180         default:
181                 return false;
182         }
183 }
184
185 static bool is_reliable(struct utcp_connection *c) {
186         return c->flags & UTCP_RELIABLE;
187 }
188
189 static int32_t seqdiff(uint32_t a, uint32_t b) {
190         return a - b;
191 }
192
193 // Buffer functions
194 static bool buffer_wraps(struct buffer *buf) {
195         return buf->size - buf->offset < buf->used;
196 }
197
198 static bool buffer_resize(struct buffer *buf, uint32_t newsize) {
199         char *newdata = realloc(buf->data, newsize);
200
201         if(!newdata) {
202                 return false;
203         }
204
205         buf->data = newdata;
206
207         if(buffer_wraps(buf)) {
208                 // Shift the right part of the buffer until it hits the end of the new buffer.
209                 // Old situation:
210                 // [345......012]
211                 // New situation:
212                 // [345.........|........012]
213                 uint32_t tailsize = buf->size - buf->offset;
214                 uint32_t newoffset = newsize - tailsize;
215                 memmove(buf->data + newoffset, buf->data + buf->offset, tailsize);
216                 buf->offset = newoffset;
217         }
218
219         buf->size = newsize;
220         return true;
221 }
222
223 // Store data into the buffer
224 static ssize_t buffer_put_at(struct buffer *buf, size_t offset, const void *data, size_t len) {
225         debug(NULL, "buffer_put_at %lu %lu %lu\n", (unsigned long)buf->used, (unsigned long)offset, (unsigned long)len);
226
227         // Ensure we don't store more than maxsize bytes in total
228         size_t required = offset + len;
229
230         if(required > buf->maxsize) {
231                 if(offset >= buf->maxsize) {
232                         return 0;
233                 }
234
235                 len = buf->maxsize - offset;
236                 required = buf->maxsize;
237         }
238
239         // Check if we need to resize the buffer
240         if(required > buf->size) {
241                 size_t newsize = buf->size;
242
243                 if(!newsize) {
244                         newsize = 4096;
245                 }
246
247                 do {
248                         newsize *= 2;
249                 } while(newsize < required);
250
251                 if(newsize > buf->maxsize) {
252                         newsize = buf->maxsize;
253                 }
254
255                 if(!buffer_resize(buf, newsize)) {
256                         return -1;
257                 }
258         }
259
260         uint32_t realoffset = buf->offset + offset;
261
262         if(buf->size - buf->offset <= offset) {
263                 // The offset wrapped
264                 realoffset -= buf->size;
265         }
266
267         if(buf->size - realoffset < len) {
268                 // The new chunk of data must be wrapped
269                 memcpy(buf->data + realoffset, data, buf->size - realoffset);
270                 memcpy(buf->data, (char *)data + buf->size - realoffset, len - (buf->size - realoffset));
271         } else {
272                 memcpy(buf->data + realoffset, data, len);
273         }
274
275         if(required > buf->used) {
276                 buf->used = required;
277         }
278
279         return len;
280 }
281
282 static ssize_t buffer_put(struct buffer *buf, const void *data, size_t len) {
283         return buffer_put_at(buf, buf->used, data, len);
284 }
285
286 // Copy data from the buffer without removing it.
287 static ssize_t buffer_copy(struct buffer *buf, void *data, size_t offset, size_t len) {
288         // Ensure we don't copy more than is actually stored in the buffer
289         if(offset >= buf->used) {
290                 return 0;
291         }
292
293         if(buf->used - offset < len) {
294                 len = buf->used - offset;
295         }
296
297         uint32_t realoffset = buf->offset + offset;
298
299         if(buf->size - buf->offset <= offset) {
300                 // The offset wrapped
301                 realoffset -= buf->size;
302         }
303
304         if(buf->size - realoffset < len) {
305                 // The data is wrapped
306                 memcpy(data, buf->data + realoffset, buf->size - realoffset);
307                 memcpy((char *)data + buf->size - realoffset, buf->data, len - (buf->size - realoffset));
308         } else {
309                 memcpy(data, buf->data + realoffset, len);
310         }
311
312         return len;
313 }
314
315 // Copy data from the buffer without removing it.
316 static ssize_t buffer_call(struct utcp_connection *c, struct buffer *buf, size_t offset, size_t len) {
317         if(!c->recv) {
318                 return len;
319         }
320
321         // Ensure we don't copy more than is actually stored in the buffer
322         if(offset >= buf->used) {
323                 return 0;
324         }
325
326         if(buf->used - offset < len) {
327                 len = buf->used - offset;
328         }
329
330         uint32_t realoffset = buf->offset + offset;
331
332         if(buf->size - buf->offset <= offset) {
333                 // The offset wrapped
334                 realoffset -= buf->size;
335         }
336
337         if(buf->size - realoffset < len) {
338                 // The data is wrapped
339                 ssize_t rx1 = c->recv(c, buf->data + realoffset, buf->size - realoffset);
340
341                 if(rx1 < buf->size - realoffset) {
342                         return rx1;
343                 }
344
345                 // The channel might have been closed by the previous callback
346                 if(!c->recv) {
347                         return len;
348                 }
349
350                 ssize_t rx2 = c->recv(c, buf->data, len - (buf->size - realoffset));
351
352                 if(rx2 < 0) {
353                         return rx2;
354                 } else {
355                         return rx1 + rx2;
356                 }
357         } else {
358                 return c->recv(c, buf->data + realoffset, len);
359         }
360 }
361
362 // Discard data from the buffer.
363 static ssize_t buffer_discard(struct buffer *buf, size_t len) {
364         if(buf->used < len) {
365                 len = buf->used;
366         }
367
368         if(buf->size - buf->offset <= len) {
369                 buf->offset -= buf->size;
370         }
371
372         if(buf->used == len) {
373                 buf->offset = 0;
374         } else {
375                 buf->offset += len;
376         }
377
378         buf->used -= len;
379
380         return len;
381 }
382
383 static void buffer_clear(struct buffer *buf) {
384         buf->used = 0;
385         buf->offset = 0;
386 }
387
388 static bool buffer_set_size(struct buffer *buf, uint32_t minsize, uint32_t maxsize) {
389         if(maxsize < minsize) {
390                 maxsize = minsize;
391         }
392
393         buf->maxsize = maxsize;
394
395         return buf->size >= minsize || buffer_resize(buf, minsize);
396 }
397
398 static void buffer_exit(struct buffer *buf) {
399         if(!buf->external) {
400                 free(buf->data);
401         }
402
403         memset(buf, 0, sizeof(*buf));
404 }
405
406 static uint32_t buffer_free(const struct buffer *buf) {
407         return buf->maxsize > buf->used ? buf->maxsize - buf->used : 0;
408 }
409
410 // Connections are stored in a sorted list.
411 // This gives O(log(N)) lookup time, O(N log(N)) insertion time and O(N) deletion time.
412
413 static int compare(const void *va, const void *vb) {
414         assert(va && vb);
415
416         const struct utcp_connection *a = *(struct utcp_connection **)va;
417         const struct utcp_connection *b = *(struct utcp_connection **)vb;
418
419         assert(a && b);
420
421         int c = (int)a->src - (int)b->src;
422
423         if(c) {
424                 return c;
425         }
426
427         c = (int)a->dst - (int)b->dst;
428         return c;
429 }
430
431 static struct utcp_connection *find_connection(const struct utcp *utcp, uint16_t src, uint16_t dst) {
432         if(!utcp->nconnections) {
433                 return NULL;
434         }
435
436         struct utcp_connection key = {
437                 .src = src,
438                 .dst = dst,
439         }, *keyp = &key;
440         struct utcp_connection **match = bsearch(&keyp, utcp->connections, utcp->nconnections, sizeof(*utcp->connections), compare);
441         return match ? *match : NULL;
442 }
443
444 static void free_connection(struct utcp_connection *c) {
445         struct utcp *utcp = c->utcp;
446         struct utcp_connection **cp = bsearch(&c, utcp->connections, utcp->nconnections, sizeof(*utcp->connections), compare);
447
448         assert(cp);
449
450         int i = cp - utcp->connections;
451         memmove(cp, cp + 1, (utcp->nconnections - i - 1) * sizeof(*cp));
452         utcp->nconnections--;
453
454         buffer_exit(&c->rcvbuf);
455         buffer_exit(&c->sndbuf);
456         free(c);
457 }
458
459 static struct utcp_connection *allocate_connection(struct utcp *utcp, uint16_t src, uint16_t dst) {
460         // Check whether this combination of src and dst is free
461
462         if(src) {
463                 if(find_connection(utcp, src, dst)) {
464                         errno = EADDRINUSE;
465                         return NULL;
466                 }
467         } else { // If src == 0, generate a random port number with the high bit set
468                 if(utcp->nconnections >= 32767) {
469                         errno = ENOMEM;
470                         return NULL;
471                 }
472
473                 src = rand() | 0x8000;
474
475                 while(find_connection(utcp, src, dst)) {
476                         src++;
477                 }
478         }
479
480         // Allocate memory for the new connection
481
482         if(utcp->nconnections >= utcp->nallocated) {
483                 if(!utcp->nallocated) {
484                         utcp->nallocated = 4;
485                 } else {
486                         utcp->nallocated *= 2;
487                 }
488
489                 struct utcp_connection **new_array = realloc(utcp->connections, utcp->nallocated * sizeof(*utcp->connections));
490
491                 if(!new_array) {
492                         return NULL;
493                 }
494
495                 utcp->connections = new_array;
496         }
497
498         struct utcp_connection *c = calloc(1, sizeof(*c));
499
500         if(!c) {
501                 return NULL;
502         }
503
504         if(!buffer_set_size(&c->sndbuf, DEFAULT_SNDBUFSIZE, DEFAULT_MAXSNDBUFSIZE)) {
505                 free(c);
506                 return NULL;
507         }
508
509         if(!buffer_set_size(&c->rcvbuf, DEFAULT_RCVBUFSIZE, DEFAULT_MAXRCVBUFSIZE)) {
510                 buffer_exit(&c->sndbuf);
511                 free(c);
512                 return NULL;
513         }
514
515         // Fill in the details
516
517         c->src = src;
518         c->dst = dst;
519 #ifdef UTCP_DEBUG
520         c->snd.iss = 0;
521 #else
522         c->snd.iss = rand();
523 #endif
524         c->snd.una = c->snd.iss;
525         c->snd.nxt = c->snd.iss + 1;
526         c->snd.last = c->snd.nxt;
527         c->snd.cwnd = (utcp->mss > 2190 ? 2 : utcp->mss > 1095 ? 3 : 4) * utcp->mss;
528         c->snd.ssthresh = ~0;
529         debug_cwnd(c);
530         c->srtt = 0;
531         c->rttvar = 0;
532         c->rto = START_RTO;
533         c->utcp = utcp;
534
535         // Add it to the sorted list of connections
536
537         utcp->connections[utcp->nconnections++] = c;
538         qsort(utcp->connections, utcp->nconnections, sizeof(*utcp->connections), compare);
539
540         return c;
541 }
542
543 static inline uint32_t absdiff(uint32_t a, uint32_t b) {
544         if(a > b) {
545                 return a - b;
546         } else {
547                 return b - a;
548         }
549 }
550
551 // Update RTT variables. See RFC 6298.
552 static void update_rtt(struct utcp_connection *c, uint32_t rtt) {
553         if(!rtt) {
554                 debug(c, "invalid rtt\n");
555                 return;
556         }
557
558         if(!c->srtt) {
559                 c->srtt = rtt;
560                 c->rttvar = rtt / 2;
561         } else {
562                 c->rttvar = (c->rttvar * 3 + absdiff(c->srtt, rtt)) / 4;
563                 c->srtt = (c->srtt * 7 + rtt) / 8;
564         }
565
566         c->rto = c->srtt + max(4 * c->rttvar, CLOCK_GRANULARITY);
567
568         if(c->rto > MAX_RTO) {
569                 c->rto = MAX_RTO;
570         }
571
572         debug(c, "rtt %u srtt %u rttvar %u rto %u\n", rtt, c->srtt, c->rttvar, c->rto);
573 }
574
575 static void start_retransmit_timer(struct utcp_connection *c) {
576         clock_gettime(UTCP_CLOCK, &c->rtrx_timeout);
577
578         uint32_t rto = c->rto;
579
580         while(rto > USEC_PER_SEC) {
581                 c->rtrx_timeout.tv_sec++;
582                 rto -= USEC_PER_SEC;
583         }
584
585         c->rtrx_timeout.tv_nsec += rto * 1000;
586
587         if(c->rtrx_timeout.tv_nsec >= NSEC_PER_SEC) {
588                 c->rtrx_timeout.tv_nsec -= NSEC_PER_SEC;
589                 c->rtrx_timeout.tv_sec++;
590         }
591
592         debug(c, "rtrx_timeout %ld.%06lu\n", c->rtrx_timeout.tv_sec, c->rtrx_timeout.tv_nsec);
593 }
594
595 static void stop_retransmit_timer(struct utcp_connection *c) {
596         timespec_clear(&c->rtrx_timeout);
597         debug(c, "rtrx_timeout cleared\n");
598 }
599
600 struct utcp_connection *utcp_connect_ex(struct utcp *utcp, uint16_t dst, utcp_recv_t recv, void *priv, uint32_t flags) {
601         struct utcp_connection *c = allocate_connection(utcp, 0, dst);
602
603         if(!c) {
604                 return NULL;
605         }
606
607         assert((flags & ~0x1f) == 0);
608
609         c->flags = flags;
610         c->recv = recv;
611         c->priv = priv;
612
613         struct {
614                 struct hdr hdr;
615                 uint8_t init[4];
616         } pkt;
617
618         pkt.hdr.src = c->src;
619         pkt.hdr.dst = c->dst;
620         pkt.hdr.seq = c->snd.iss;
621         pkt.hdr.ack = 0;
622         pkt.hdr.wnd = c->rcvbuf.maxsize;
623         pkt.hdr.ctl = SYN;
624         pkt.hdr.aux = 0x0101;
625         pkt.init[0] = 1;
626         pkt.init[1] = 0;
627         pkt.init[2] = 0;
628         pkt.init[3] = flags & 0x7;
629
630         set_state(c, SYN_SENT);
631
632         print_packet(c, "send", &pkt, sizeof(pkt));
633         utcp->send(utcp, &pkt, sizeof(pkt));
634
635         clock_gettime(UTCP_CLOCK, &c->conn_timeout);
636         c->conn_timeout.tv_sec += utcp->timeout;
637
638         start_retransmit_timer(c);
639
640         return c;
641 }
642
643 struct utcp_connection *utcp_connect(struct utcp *utcp, uint16_t dst, utcp_recv_t recv, void *priv) {
644         return utcp_connect_ex(utcp, dst, recv, priv, UTCP_TCP);
645 }
646
647 void utcp_accept(struct utcp_connection *c, utcp_recv_t recv, void *priv) {
648         if(c->reapable || c->state != SYN_RECEIVED) {
649                 debug(c, "accept() called on invalid connection in state %s\n", c, strstate[c->state]);
650                 return;
651         }
652
653         debug(c, "accepted %p %p\n", c, recv, priv);
654         c->recv = recv;
655         c->priv = priv;
656         c->do_poll = true;
657         set_state(c, ESTABLISHED);
658 }
659
660 static void ack(struct utcp_connection *c, bool sendatleastone) {
661         int32_t left = seqdiff(c->snd.last, c->snd.nxt);
662         int32_t cwndleft = is_reliable(c) ? min(c->snd.cwnd, c->snd.wnd) - seqdiff(c->snd.nxt, c->snd.una) : MAX_UNRELIABLE_SIZE;
663
664         assert(left >= 0);
665
666         if(cwndleft <= 0) {
667                 left = 0;
668         } else if(cwndleft < left) {
669                 left = cwndleft;
670
671                 if(!sendatleastone || cwndleft > c->utcp->mss) {
672                         left -= left % c->utcp->mss;
673                 }
674         }
675
676         debug(c, "cwndleft %d left %d\n", cwndleft, left);
677
678         if(!left && !sendatleastone) {
679                 return;
680         }
681
682         struct {
683                 struct hdr hdr;
684                 uint8_t data[];
685         } *pkt = c->utcp->pkt;
686
687         pkt->hdr.src = c->src;
688         pkt->hdr.dst = c->dst;
689         pkt->hdr.ack = c->rcv.nxt;
690         pkt->hdr.wnd = is_reliable(c) ? c->rcvbuf.maxsize : 0;
691         pkt->hdr.ctl = ACK;
692         pkt->hdr.aux = 0;
693
694         do {
695                 uint32_t seglen = left > c->utcp->mss ? c->utcp->mss : left;
696                 pkt->hdr.seq = c->snd.nxt;
697
698                 buffer_copy(&c->sndbuf, pkt->data, seqdiff(c->snd.nxt, c->snd.una), seglen);
699
700                 c->snd.nxt += seglen;
701                 left -= seglen;
702
703                 if(!is_reliable(c)) {
704                         if(left) {
705                                 pkt->hdr.ctl |= MF;
706                         } else {
707                                 pkt->hdr.ctl &= ~MF;
708                         }
709                 }
710
711                 if(seglen && fin_wanted(c, c->snd.nxt)) {
712                         seglen--;
713                         pkt->hdr.ctl |= FIN;
714                 }
715
716                 if(!c->rtt_start.tv_sec) {
717                         // Start RTT measurement
718                         clock_gettime(UTCP_CLOCK, &c->rtt_start);
719                         c->rtt_seq = pkt->hdr.seq + seglen;
720                         debug(c, "starting RTT measurement, expecting ack %u\n", c->rtt_seq);
721                 }
722
723                 print_packet(c, "send", pkt, sizeof(pkt->hdr) + seglen);
724                 c->utcp->send(c->utcp, pkt, sizeof(pkt->hdr) + seglen);
725
726                 if(left && !is_reliable(c)) {
727                         pkt->hdr.wnd += seglen;
728                 }
729         } while(left);
730 }
731
732 ssize_t utcp_send(struct utcp_connection *c, const void *data, size_t len) {
733         if(c->reapable) {
734                 debug(c, "send() called on closed connection\n");
735                 errno = EBADF;
736                 return -1;
737         }
738
739         switch(c->state) {
740         case CLOSED:
741         case LISTEN:
742                 debug(c, "send() called on unconnected connection\n");
743                 errno = ENOTCONN;
744                 return -1;
745
746         case SYN_SENT:
747         case SYN_RECEIVED:
748         case ESTABLISHED:
749         case CLOSE_WAIT:
750                 break;
751
752         case FIN_WAIT_1:
753         case FIN_WAIT_2:
754         case CLOSING:
755         case LAST_ACK:
756         case TIME_WAIT:
757                 debug(c, "send() called on closed connection\n");
758                 errno = EPIPE;
759                 return -1;
760         }
761
762         // Exit early if we have nothing to send.
763
764         if(!len) {
765                 return 0;
766         }
767
768         if(!data) {
769                 errno = EFAULT;
770                 return -1;
771         }
772
773         // Check if we need to be able to buffer all data
774
775         if(c->flags & UTCP_NO_PARTIAL) {
776                 if(len > buffer_free(&c->sndbuf)) {
777                         if(len > c->sndbuf.maxsize) {
778                                 errno = EMSGSIZE;
779                                 return -1;
780                         } else {
781                                 errno = EWOULDBLOCK;
782                                 return 0;
783                         }
784                 }
785         }
786
787         // Add data to send buffer.
788
789         if(is_reliable(c)) {
790                 len = buffer_put(&c->sndbuf, data, len);
791         } else if(c->state != SYN_SENT && c->state != SYN_RECEIVED) {
792                 if(len > MAX_UNRELIABLE_SIZE || buffer_put(&c->sndbuf, data, len) != (ssize_t)len) {
793                         errno = EMSGSIZE;
794                         return -1;
795                 }
796         } else {
797                 return 0;
798         }
799
800         if(len <= 0) {
801                 if(is_reliable(c)) {
802                         errno = EWOULDBLOCK;
803                         return 0;
804                 } else {
805                         return len;
806                 }
807         }
808
809         c->snd.last += len;
810
811         // Don't send anything yet if the connection has not fully established yet
812
813         if(c->state == SYN_SENT || c->state == SYN_RECEIVED) {
814                 return len;
815         }
816
817         ack(c, false);
818
819         if(!is_reliable(c)) {
820                 c->snd.una = c->snd.nxt = c->snd.last;
821                 buffer_discard(&c->sndbuf, c->sndbuf.used);
822         }
823
824         if(is_reliable(c) && !timespec_isset(&c->rtrx_timeout)) {
825                 start_retransmit_timer(c);
826         }
827
828         if(is_reliable(c) && !timespec_isset(&c->conn_timeout)) {
829                 clock_gettime(UTCP_CLOCK, &c->conn_timeout);
830                 c->conn_timeout.tv_sec += c->utcp->timeout;
831         }
832
833         return len;
834 }
835
836 static void swap_ports(struct hdr *hdr) {
837         uint16_t tmp = hdr->src;
838         hdr->src = hdr->dst;
839         hdr->dst = tmp;
840 }
841
842 static void fast_retransmit(struct utcp_connection *c) {
843         if(c->state == CLOSED || c->snd.last == c->snd.una) {
844                 debug(c, "fast_retransmit() called but nothing to retransmit!\n");
845                 return;
846         }
847
848         struct utcp *utcp = c->utcp;
849
850         struct {
851                 struct hdr hdr;
852                 uint8_t data[];
853         } *pkt = c->utcp->pkt;
854
855         pkt->hdr.src = c->src;
856         pkt->hdr.dst = c->dst;
857         pkt->hdr.wnd = c->rcvbuf.maxsize;
858         pkt->hdr.aux = 0;
859
860         switch(c->state) {
861         case ESTABLISHED:
862         case FIN_WAIT_1:
863         case CLOSE_WAIT:
864         case CLOSING:
865         case LAST_ACK:
866                 // Send unacked data again.
867                 pkt->hdr.seq = c->snd.una;
868                 pkt->hdr.ack = c->rcv.nxt;
869                 pkt->hdr.ctl = ACK;
870                 uint32_t len = min(seqdiff(c->snd.last, c->snd.una), utcp->mss);
871
872                 if(fin_wanted(c, c->snd.una + len)) {
873                         len--;
874                         pkt->hdr.ctl |= FIN;
875                 }
876
877                 buffer_copy(&c->sndbuf, pkt->data, 0, len);
878                 print_packet(c, "rtrx", pkt, sizeof(pkt->hdr) + len);
879                 utcp->send(utcp, pkt, sizeof(pkt->hdr) + len);
880                 break;
881
882         default:
883                 break;
884         }
885 }
886
887 static void retransmit(struct utcp_connection *c) {
888         if(c->state == CLOSED || c->snd.last == c->snd.una) {
889                 debug(c, "retransmit() called but nothing to retransmit!\n");
890                 stop_retransmit_timer(c);
891                 return;
892         }
893
894         struct utcp *utcp = c->utcp;
895
896         if(utcp->retransmit) {
897                 utcp->retransmit(c);
898         }
899
900         struct {
901                 struct hdr hdr;
902                 uint8_t data[];
903         } *pkt = c->utcp->pkt;
904
905         pkt->hdr.src = c->src;
906         pkt->hdr.dst = c->dst;
907         pkt->hdr.wnd = c->rcvbuf.maxsize;
908         pkt->hdr.aux = 0;
909
910         switch(c->state) {
911         case SYN_SENT:
912                 // Send our SYN again
913                 pkt->hdr.seq = c->snd.iss;
914                 pkt->hdr.ack = 0;
915                 pkt->hdr.ctl = SYN;
916                 pkt->hdr.aux = 0x0101;
917                 pkt->data[0] = 1;
918                 pkt->data[1] = 0;
919                 pkt->data[2] = 0;
920                 pkt->data[3] = c->flags & 0x7;
921                 print_packet(c, "rtrx", pkt, sizeof(pkt->hdr) + 4);
922                 utcp->send(utcp, pkt, sizeof(pkt->hdr) + 4);
923                 break;
924
925         case SYN_RECEIVED:
926                 // Send SYNACK again
927                 pkt->hdr.seq = c->snd.nxt;
928                 pkt->hdr.ack = c->rcv.nxt;
929                 pkt->hdr.ctl = SYN | ACK;
930                 print_packet(c, "rtrx", pkt, sizeof(pkt->hdr));
931                 utcp->send(utcp, pkt, sizeof(pkt->hdr));
932                 break;
933
934         case ESTABLISHED:
935         case FIN_WAIT_1:
936         case CLOSE_WAIT:
937         case CLOSING:
938         case LAST_ACK:
939                 // Send unacked data again.
940                 pkt->hdr.seq = c->snd.una;
941                 pkt->hdr.ack = c->rcv.nxt;
942                 pkt->hdr.ctl = ACK;
943                 uint32_t len = min(seqdiff(c->snd.last, c->snd.una), utcp->mss);
944
945                 if(fin_wanted(c, c->snd.una + len)) {
946                         len--;
947                         pkt->hdr.ctl |= FIN;
948                 }
949
950                 // RFC 5681 slow start after timeout
951                 uint32_t flightsize = seqdiff(c->snd.nxt, c->snd.una);
952                 c->snd.ssthresh = max(flightsize / 2, utcp->mss * 2); // eq. 4
953                 c->snd.cwnd = utcp->mss;
954                 debug_cwnd(c);
955
956                 buffer_copy(&c->sndbuf, pkt->data, 0, len);
957                 print_packet(c, "rtrx", pkt, sizeof(pkt->hdr) + len);
958                 utcp->send(utcp, pkt, sizeof(pkt->hdr) + len);
959
960                 c->snd.nxt = c->snd.una + len;
961                 break;
962
963         case CLOSED:
964         case LISTEN:
965         case TIME_WAIT:
966         case FIN_WAIT_2:
967                 // We shouldn't need to retransmit anything in this state.
968 #ifdef UTCP_DEBUG
969                 abort();
970 #endif
971                 stop_retransmit_timer(c);
972                 goto cleanup;
973         }
974
975         start_retransmit_timer(c);
976         c->rto *= 2;
977
978         if(c->rto > MAX_RTO) {
979                 c->rto = MAX_RTO;
980         }
981
982         c->rtt_start.tv_sec = 0; // invalidate RTT timer
983         c->dupack = 0; // cancel any ongoing fast recovery
984
985 cleanup:
986         return;
987 }
988
989 /* Update receive buffer and SACK entries after consuming data.
990  *
991  * Situation:
992  *
993  * |.....0000..1111111111.....22222......3333|
994  * |---------------^
995  *
996  * 0..3 represent the SACK entries. The ^ indicates up to which point we want
997  * to remove data from the receive buffer. The idea is to substract "len"
998  * from the offset of all the SACK entries, and then remove/cut down entries
999  * that are shifted to before the start of the receive buffer.
1000  *
1001  * There are three cases:
1002  * - the SACK entry is after ^, in that case just change the offset.
1003  * - the SACK entry starts before and ends after ^, so we have to
1004  *   change both its offset and size.
1005  * - the SACK entry is completely before ^, in that case delete it.
1006  */
1007 static void sack_consume(struct utcp_connection *c, size_t len) {
1008         debug(c, "sack_consume %lu\n", (unsigned long)len);
1009
1010         if(len > c->rcvbuf.used) {
1011                 debug(c, "all SACK entries consumed\n");
1012                 c->sacks[0].len = 0;
1013                 return;
1014         }
1015
1016         buffer_discard(&c->rcvbuf, len);
1017
1018         for(int i = 0; i < NSACKS && c->sacks[i].len;) {
1019                 if(len < c->sacks[i].offset) {
1020                         c->sacks[i].offset -= len;
1021                         i++;
1022                 } else if(len < c->sacks[i].offset + c->sacks[i].len) {
1023                         c->sacks[i].len -= len - c->sacks[i].offset;
1024                         c->sacks[i].offset = 0;
1025                         i++;
1026                 } else {
1027                         if(i < NSACKS - 1) {
1028                                 memmove(&c->sacks[i], &c->sacks[i + 1], (NSACKS - 1 - i) * sizeof(c->sacks)[i]);
1029                                 c->sacks[NSACKS - 1].len = 0;
1030                         } else {
1031                                 c->sacks[i].len = 0;
1032                                 break;
1033                         }
1034                 }
1035         }
1036
1037         for(int i = 0; i < NSACKS && c->sacks[i].len; i++) {
1038                 debug(c, "SACK[%d] offset %u len %u\n", i, c->sacks[i].offset, c->sacks[i].len);
1039         }
1040 }
1041
1042 static void handle_out_of_order(struct utcp_connection *c, uint32_t offset, const void *data, size_t len) {
1043         debug(c, "out of order packet, offset %u\n", offset);
1044         // Packet loss or reordering occured. Store the data in the buffer.
1045         ssize_t rxd = buffer_put_at(&c->rcvbuf, offset, data, len);
1046
1047         if(rxd <= 0) {
1048                 debug(c, "packet outside receive buffer, dropping\n");
1049                 return;
1050         }
1051
1052         if((size_t)rxd < len) {
1053                 debug(c, "packet partially outside receive buffer\n");
1054                 len = rxd;
1055         }
1056
1057         // Make note of where we put it.
1058         for(int i = 0; i < NSACKS; i++) {
1059                 if(!c->sacks[i].len) { // nothing to merge, add new entry
1060                         debug(c, "new SACK entry %d\n", i);
1061                         c->sacks[i].offset = offset;
1062                         c->sacks[i].len = rxd;
1063                         break;
1064                 } else if(offset < c->sacks[i].offset) {
1065                         if(offset + rxd < c->sacks[i].offset) { // insert before
1066                                 if(!c->sacks[NSACKS - 1].len) { // only if room left
1067                                         debug(c, "insert SACK entry at %d\n", i);
1068                                         memmove(&c->sacks[i + 1], &c->sacks[i], (NSACKS - i - 1) * sizeof(c->sacks)[i]);
1069                                         c->sacks[i].offset = offset;
1070                                         c->sacks[i].len = rxd;
1071                                 } else {
1072                                         debug(c, "SACK entries full, dropping packet\n");
1073                                 }
1074
1075                                 break;
1076                         } else { // merge
1077                                 debug(c, "merge with start of SACK entry at %d\n", i);
1078                                 c->sacks[i].offset = offset;
1079                                 break;
1080                         }
1081                 } else if(offset <= c->sacks[i].offset + c->sacks[i].len) {
1082                         if(offset + rxd > c->sacks[i].offset + c->sacks[i].len) { // merge
1083                                 debug(c, "merge with end of SACK entry at %d\n", i);
1084                                 c->sacks[i].len = offset + rxd - c->sacks[i].offset;
1085                                 // TODO: handle potential merge with next entry
1086                         }
1087
1088                         break;
1089                 }
1090         }
1091
1092         for(int i = 0; i < NSACKS && c->sacks[i].len; i++) {
1093                 debug(c, "SACK[%d] offset %u len %u\n", i, c->sacks[i].offset, c->sacks[i].len);
1094         }
1095 }
1096
1097 static void handle_in_order(struct utcp_connection *c, const void *data, size_t len) {
1098         if(c->recv) {
1099                 ssize_t rxd = c->recv(c, data, len);
1100
1101                 if(rxd != (ssize_t)len) {
1102                         // TODO: handle the application not accepting all data.
1103                         abort();
1104                 }
1105         }
1106
1107         // Check if we can process out-of-order data now.
1108         if(c->sacks[0].len && len >= c->sacks[0].offset) {
1109                 debug(c, "incoming packet len %lu connected with SACK at %u\n", (unsigned long)len, c->sacks[0].offset);
1110
1111                 if(len < c->sacks[0].offset + c->sacks[0].len) {
1112                         size_t offset = len;
1113                         len = c->sacks[0].offset + c->sacks[0].len;
1114                         size_t remainder = len - offset;
1115
1116                         ssize_t rxd = buffer_call(c, &c->rcvbuf, offset, remainder);
1117
1118                         if(rxd != (ssize_t)remainder) {
1119                                 // TODO: handle the application not accepting all data.
1120                                 abort();
1121                         }
1122                 }
1123         }
1124
1125         if(c->rcvbuf.used) {
1126                 sack_consume(c, len);
1127         }
1128
1129         c->rcv.nxt += len;
1130 }
1131
1132 static void handle_unreliable(struct utcp_connection *c, const struct hdr *hdr, const void *data, size_t len) {
1133         // Fast path for unfragmented packets
1134         if(!hdr->wnd && !(hdr->ctl & MF)) {
1135                 if(c->recv) {
1136                         c->recv(c, data, len);
1137                 }
1138
1139                 c->rcv.nxt = hdr->seq + len;
1140                 return;
1141         }
1142
1143         // Ensure reassembled packet are not larger than 64 kiB
1144         if(hdr->wnd >= MAX_UNRELIABLE_SIZE || hdr->wnd + len > MAX_UNRELIABLE_SIZE) {
1145                 return;
1146         }
1147
1148         // Don't accept out of order fragments
1149         if(hdr->wnd && hdr->seq != c->rcv.nxt) {
1150                 return;
1151         }
1152
1153         // Reset the receive buffer for the first fragment
1154         if(!hdr->wnd) {
1155                 buffer_clear(&c->rcvbuf);
1156         }
1157
1158         ssize_t rxd = buffer_put_at(&c->rcvbuf, hdr->wnd, data, len);
1159
1160         if(rxd != (ssize_t)len) {
1161                 return;
1162         }
1163
1164         // Send the packet if it's the final fragment
1165         if(!(hdr->ctl & MF)) {
1166                 buffer_call(c, &c->rcvbuf, 0, hdr->wnd + len);
1167         }
1168
1169         c->rcv.nxt = hdr->seq + len;
1170 }
1171
1172 static void handle_incoming_data(struct utcp_connection *c, const struct hdr *hdr, const void *data, size_t len) {
1173         if(!is_reliable(c)) {
1174                 handle_unreliable(c, hdr, data, len);
1175                 return;
1176         }
1177
1178         uint32_t offset = seqdiff(hdr->seq, c->rcv.nxt);
1179
1180         if(offset) {
1181                 handle_out_of_order(c, offset, data, len);
1182         } else {
1183                 handle_in_order(c, data, len);
1184         }
1185 }
1186
1187
1188 ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) {
1189         const uint8_t *ptr = data;
1190
1191         if(!utcp) {
1192                 errno = EFAULT;
1193                 return -1;
1194         }
1195
1196         if(!len) {
1197                 return 0;
1198         }
1199
1200         if(!data) {
1201                 errno = EFAULT;
1202                 return -1;
1203         }
1204
1205         // Drop packets smaller than the header
1206
1207         struct hdr hdr;
1208
1209         if(len < sizeof(hdr)) {
1210                 print_packet(NULL, "recv", data, len);
1211                 errno = EBADMSG;
1212                 return -1;
1213         }
1214
1215         // Make a copy from the potentially unaligned data to a struct hdr
1216
1217         memcpy(&hdr, ptr, sizeof(hdr));
1218
1219         // Try to match the packet to an existing connection
1220
1221         struct utcp_connection *c = find_connection(utcp, hdr.dst, hdr.src);
1222         print_packet(c, "recv", data, len);
1223
1224         // Process the header
1225
1226         ptr += sizeof(hdr);
1227         len -= sizeof(hdr);
1228
1229         // Drop packets with an unknown CTL flag
1230
1231         if(hdr.ctl & ~(SYN | ACK | RST | FIN | MF)) {
1232                 print_packet(NULL, "recv", data, len);
1233                 errno = EBADMSG;
1234                 return -1;
1235         }
1236
1237         // Check for auxiliary headers
1238
1239         const uint8_t *init = NULL;
1240
1241         uint16_t aux = hdr.aux;
1242
1243         while(aux) {
1244                 size_t auxlen = 4 * (aux >> 8) & 0xf;
1245                 uint8_t auxtype = aux & 0xff;
1246
1247                 if(len < auxlen) {
1248                         errno = EBADMSG;
1249                         return -1;
1250                 }
1251
1252                 switch(auxtype) {
1253                 case AUX_INIT:
1254                         if(!(hdr.ctl & SYN) || auxlen != 4) {
1255                                 errno = EBADMSG;
1256                                 return -1;
1257                         }
1258
1259                         init = ptr;
1260                         break;
1261
1262                 default:
1263                         errno = EBADMSG;
1264                         return -1;
1265                 }
1266
1267                 len -= auxlen;
1268                 ptr += auxlen;
1269
1270                 if(!(aux & 0x800)) {
1271                         break;
1272                 }
1273
1274                 if(len < 2) {
1275                         errno = EBADMSG;
1276                         return -1;
1277                 }
1278
1279                 memcpy(&aux, ptr, 2);
1280                 len -= 2;
1281                 ptr += 2;
1282         }
1283
1284         bool has_data = len || (hdr.ctl & (SYN | FIN));
1285
1286         // Is it for a new connection?
1287
1288         if(!c) {
1289                 // Ignore RST packets
1290
1291                 if(hdr.ctl & RST) {
1292                         return 0;
1293                 }
1294
1295                 // Is it a SYN packet and are we LISTENing?
1296
1297                 if(hdr.ctl & SYN && !(hdr.ctl & ACK) && utcp->accept) {
1298                         // If we don't want to accept it, send a RST back
1299                         if((utcp->listen && !utcp->listen(utcp, hdr.dst))) {
1300                                 len = 1;
1301                                 goto reset;
1302                         }
1303
1304                         // Try to allocate memory, otherwise send a RST back
1305                         c = allocate_connection(utcp, hdr.dst, hdr.src);
1306
1307                         if(!c) {
1308                                 len = 1;
1309                                 goto reset;
1310                         }
1311
1312                         // Parse auxilliary information
1313                         if(init) {
1314                                 if(init[0] < 1) {
1315                                         len = 1;
1316                                         goto reset;
1317                                 }
1318
1319                                 c->flags = init[3] & 0x7;
1320                         } else {
1321                                 c->flags = UTCP_TCP;
1322                         }
1323
1324 synack:
1325                         // Return SYN+ACK, go to SYN_RECEIVED state
1326                         c->snd.wnd = hdr.wnd;
1327                         c->rcv.irs = hdr.seq;
1328                         c->rcv.nxt = c->rcv.irs + 1;
1329                         set_state(c, SYN_RECEIVED);
1330
1331                         struct {
1332                                 struct hdr hdr;
1333                                 uint8_t data[4];
1334                         } pkt;
1335
1336                         pkt.hdr.src = c->src;
1337                         pkt.hdr.dst = c->dst;
1338                         pkt.hdr.ack = c->rcv.irs + 1;
1339                         pkt.hdr.seq = c->snd.iss;
1340                         pkt.hdr.wnd = c->rcvbuf.maxsize;
1341                         pkt.hdr.ctl = SYN | ACK;
1342
1343                         if(init) {
1344                                 pkt.hdr.aux = 0x0101;
1345                                 pkt.data[0] = 1;
1346                                 pkt.data[1] = 0;
1347                                 pkt.data[2] = 0;
1348                                 pkt.data[3] = c->flags & 0x7;
1349                                 print_packet(c, "send", &pkt, sizeof(hdr) + 4);
1350                                 utcp->send(utcp, &pkt, sizeof(hdr) + 4);
1351                         } else {
1352                                 pkt.hdr.aux = 0;
1353                                 print_packet(c, "send", &pkt, sizeof(hdr));
1354                                 utcp->send(utcp, &pkt, sizeof(hdr));
1355                         }
1356
1357                         start_retransmit_timer(c);
1358                 } else {
1359                         // No, we don't want your packets, send a RST back
1360                         len = 1;
1361                         goto reset;
1362                 }
1363
1364                 return 0;
1365         }
1366
1367         debug(c, "state %s\n", strstate[c->state]);
1368
1369         // In case this is for a CLOSED connection, ignore the packet.
1370         // TODO: make it so incoming packets can never match a CLOSED connection.
1371
1372         if(c->state == CLOSED) {
1373                 debug(c, "got packet for closed connection\n");
1374                 goto reset;
1375         }
1376
1377         // It is for an existing connection.
1378
1379         // 1. Drop invalid packets.
1380
1381         // 1a. Drop packets that should not happen in our current state.
1382
1383         switch(c->state) {
1384         case SYN_SENT:
1385         case SYN_RECEIVED:
1386         case ESTABLISHED:
1387         case FIN_WAIT_1:
1388         case FIN_WAIT_2:
1389         case CLOSE_WAIT:
1390         case CLOSING:
1391         case LAST_ACK:
1392         case TIME_WAIT:
1393                 break;
1394
1395         default:
1396 #ifdef UTCP_DEBUG
1397                 abort();
1398 #endif
1399                 break;
1400         }
1401
1402         // 1b. Discard data that is not in our receive window.
1403
1404         if(is_reliable(c)) {
1405                 bool acceptable;
1406
1407                 if(c->state == SYN_SENT) {
1408                         acceptable = true;
1409                 } else if(len == 0) {
1410                         acceptable = seqdiff(hdr.seq, c->rcv.nxt) >= 0;
1411                 } else {
1412                         int32_t rcv_offset = seqdiff(hdr.seq, c->rcv.nxt);
1413
1414                         // cut already accepted front overlapping
1415                         if(rcv_offset < 0) {
1416                                 acceptable = len > (size_t) - rcv_offset;
1417
1418                                 if(acceptable) {
1419                                         ptr -= rcv_offset;
1420                                         len += rcv_offset;
1421                                         hdr.seq -= rcv_offset;
1422                                 }
1423                         } else {
1424                                 acceptable = seqdiff(hdr.seq, c->rcv.nxt) >= 0 && seqdiff(hdr.seq, c->rcv.nxt) + len <= c->rcvbuf.maxsize;
1425                         }
1426                 }
1427
1428                 if(!acceptable) {
1429                         debug(c, "packet not acceptable, %u <= %u + %lu < %u\n", c->rcv.nxt, hdr.seq, (unsigned long)len, c->rcv.nxt + c->rcvbuf.maxsize);
1430
1431                         // Ignore unacceptable RST packets.
1432                         if(hdr.ctl & RST) {
1433                                 return 0;
1434                         }
1435
1436                         // Otherwise, continue processing.
1437                         len = 0;
1438                 }
1439         } else {
1440 #if UTCP_DEBUG
1441                 int32_t rcv_offset = seqdiff(hdr.seq, c->rcv.nxt);
1442
1443                 if(rcv_offset) {
1444                         debug(c, "packet out of order, offset %u bytes", rcv_offset);
1445                 }
1446
1447 #endif
1448         }
1449
1450         c->snd.wnd = hdr.wnd; // TODO: move below
1451
1452         // 1c. Drop packets with an invalid ACK.
1453         // ackno should not roll back, and it should also not be bigger than what we ever could have sent
1454         // (= snd.una + c->sndbuf.used).
1455
1456         if(!is_reliable(c)) {
1457                 if(hdr.ack != c->snd.last && c->state >= ESTABLISHED) {
1458                         hdr.ack = c->snd.una;
1459                 }
1460         }
1461
1462         // 2. Handle RST packets
1463
1464         if(hdr.ctl & RST) {
1465                 switch(c->state) {
1466                 case SYN_SENT:
1467                         if(!(hdr.ctl & ACK)) {
1468                                 return 0;
1469                         }
1470
1471                         // The peer has refused our connection.
1472                         set_state(c, CLOSED);
1473                         errno = ECONNREFUSED;
1474
1475                         if(c->recv) {
1476                                 c->recv(c, NULL, 0);
1477                         }
1478
1479                         if(c->poll && !c->reapable) {
1480                                 c->poll(c, 0);
1481                         }
1482
1483                         return 0;
1484
1485                 case SYN_RECEIVED:
1486                         if(hdr.ctl & ACK) {
1487                                 return 0;
1488                         }
1489
1490                         // We haven't told the application about this connection yet. Silently delete.
1491                         free_connection(c);
1492                         return 0;
1493
1494                 case ESTABLISHED:
1495                 case FIN_WAIT_1:
1496                 case FIN_WAIT_2:
1497                 case CLOSE_WAIT:
1498                         if(hdr.ctl & ACK) {
1499                                 return 0;
1500                         }
1501
1502                         // The peer has aborted our connection.
1503                         set_state(c, CLOSED);
1504                         errno = ECONNRESET;
1505
1506                         if(c->recv) {
1507                                 c->recv(c, NULL, 0);
1508                         }
1509
1510                         if(c->poll && !c->reapable) {
1511                                 c->poll(c, 0);
1512                         }
1513
1514                         return 0;
1515
1516                 case CLOSING:
1517                 case LAST_ACK:
1518                 case TIME_WAIT:
1519                         if(hdr.ctl & ACK) {
1520                                 return 0;
1521                         }
1522
1523                         // As far as the application is concerned, the connection has already been closed.
1524                         // If it has called utcp_close() already, we can immediately free this connection.
1525                         if(c->reapable) {
1526                                 free_connection(c);
1527                                 return 0;
1528                         }
1529
1530                         // Otherwise, immediately move to the CLOSED state.
1531                         set_state(c, CLOSED);
1532                         return 0;
1533
1534                 default:
1535 #ifdef UTCP_DEBUG
1536                         abort();
1537 #endif
1538                         break;
1539                 }
1540         }
1541
1542         uint32_t advanced;
1543
1544         if(!(hdr.ctl & ACK)) {
1545                 advanced = 0;
1546                 goto skip_ack;
1547         }
1548
1549         // 3. Advance snd.una
1550
1551         if(seqdiff(hdr.ack, c->snd.last) > 0 || seqdiff(hdr.ack, c->snd.una) < 0) {
1552                 debug(c, "packet ack seqno out of range, %u <= %u < %u\n", c->snd.una, hdr.ack, c->snd.una + c->sndbuf.used);
1553                 goto reset;
1554         }
1555
1556         advanced = seqdiff(hdr.ack, c->snd.una);
1557
1558         if(advanced) {
1559                 // RTT measurement
1560                 if(c->rtt_start.tv_sec) {
1561                         if(c->rtt_seq == hdr.ack) {
1562                                 struct timespec now;
1563                                 clock_gettime(UTCP_CLOCK, &now);
1564                                 int32_t diff = timespec_diff_usec(&now, &c->rtt_start);
1565                                 update_rtt(c, diff);
1566                                 c->rtt_start.tv_sec = 0;
1567                         } else if(c->rtt_seq < hdr.ack) {
1568                                 debug(c, "cancelling RTT measurement: %u < %u\n", c->rtt_seq, hdr.ack);
1569                                 c->rtt_start.tv_sec = 0;
1570                         }
1571                 }
1572
1573                 int32_t data_acked = advanced;
1574
1575                 switch(c->state) {
1576                 case SYN_SENT:
1577                 case SYN_RECEIVED:
1578                         data_acked--;
1579                         break;
1580
1581                 // TODO: handle FIN as well.
1582                 default:
1583                         break;
1584                 }
1585
1586                 assert(data_acked >= 0);
1587
1588 #ifndef NDEBUG
1589                 int32_t bufused = seqdiff(c->snd.last, c->snd.una);
1590                 assert(data_acked <= bufused);
1591 #endif
1592
1593                 if(data_acked) {
1594                         buffer_discard(&c->sndbuf, data_acked);
1595
1596                         if(is_reliable(c)) {
1597                                 c->do_poll = true;
1598                         }
1599                 }
1600
1601                 // Also advance snd.nxt if possible
1602                 if(seqdiff(c->snd.nxt, hdr.ack) < 0) {
1603                         c->snd.nxt = hdr.ack;
1604                 }
1605
1606                 c->snd.una = hdr.ack;
1607
1608                 if(c->dupack) {
1609                         if(c->dupack >= 3) {
1610                                 debug(c, "fast recovery ended\n");
1611                                 c->snd.cwnd = c->snd.ssthresh;
1612                         }
1613
1614                         c->dupack = 0;
1615                 }
1616
1617                 // Increase the congestion window according to RFC 5681
1618                 if(c->snd.cwnd < c->snd.ssthresh) {
1619                         c->snd.cwnd += min(advanced, utcp->mss); // eq. 2
1620                 } else {
1621                         c->snd.cwnd += max(1, (utcp->mss * utcp->mss) / c->snd.cwnd); // eq. 3
1622                 }
1623
1624                 if(c->snd.cwnd > c->sndbuf.maxsize) {
1625                         c->snd.cwnd = c->sndbuf.maxsize;
1626                 }
1627
1628                 debug_cwnd(c);
1629
1630                 // Check if we have sent a FIN that is now ACKed.
1631                 switch(c->state) {
1632                 case FIN_WAIT_1:
1633                         if(c->snd.una == c->snd.last) {
1634                                 set_state(c, FIN_WAIT_2);
1635                         }
1636
1637                         break;
1638
1639                 case CLOSING:
1640                         if(c->snd.una == c->snd.last) {
1641                                 clock_gettime(UTCP_CLOCK, &c->conn_timeout);
1642                                 c->conn_timeout.tv_sec += utcp->timeout;
1643                                 set_state(c, TIME_WAIT);
1644                         }
1645
1646                         break;
1647
1648                 default:
1649                         break;
1650                 }
1651         } else {
1652                 if(!len && is_reliable(c) && c->snd.una != c->snd.last) {
1653                         c->dupack++;
1654                         debug(c, "duplicate ACK %d\n", c->dupack);
1655
1656                         if(c->dupack == 3) {
1657                                 // RFC 5681 fast recovery
1658                                 debug(c, "fast recovery started\n", c->dupack);
1659                                 uint32_t flightsize = seqdiff(c->snd.nxt, c->snd.una);
1660                                 c->snd.ssthresh = max(flightsize / 2, utcp->mss * 2); // eq. 4
1661                                 c->snd.cwnd = min(c->snd.ssthresh + 3 * utcp->mss, c->sndbuf.maxsize);
1662
1663                                 if(c->snd.cwnd > c->sndbuf.maxsize) {
1664                                         c->snd.cwnd = c->sndbuf.maxsize;
1665                                 }
1666
1667                                 debug_cwnd(c);
1668
1669                                 fast_retransmit(c);
1670                         } else if(c->dupack > 3) {
1671                                 c->snd.cwnd += utcp->mss;
1672
1673                                 if(c->snd.cwnd > c->sndbuf.maxsize) {
1674                                         c->snd.cwnd = c->sndbuf.maxsize;
1675                                 }
1676
1677                                 debug_cwnd(c);
1678                         }
1679
1680                         // We got an ACK which indicates the other side did get one of our packets.
1681                         // Reset the retransmission timer to avoid going to slow start,
1682                         // but don't touch the connection timeout.
1683                         start_retransmit_timer(c);
1684                 }
1685         }
1686
1687         // 4. Update timers
1688
1689         if(advanced) {
1690                 if(c->snd.una == c->snd.last) {
1691                         stop_retransmit_timer(c);
1692                         timespec_clear(&c->conn_timeout);
1693                 } else if(is_reliable(c)) {
1694                         start_retransmit_timer(c);
1695                         clock_gettime(UTCP_CLOCK, &c->conn_timeout);
1696                         c->conn_timeout.tv_sec += utcp->timeout;
1697                 }
1698         }
1699
1700 skip_ack:
1701         // 5. Process SYN stuff
1702
1703         if(hdr.ctl & SYN) {
1704                 switch(c->state) {
1705                 case SYN_SENT:
1706
1707                         // This is a SYNACK. It should always have ACKed the SYN.
1708                         if(!advanced) {
1709                                 goto reset;
1710                         }
1711
1712                         c->rcv.irs = hdr.seq;
1713                         c->rcv.nxt = hdr.seq + 1;
1714
1715                         if(c->shut_wr) {
1716                                 c->snd.last++;
1717                                 set_state(c, FIN_WAIT_1);
1718                         } else {
1719                                 c->do_poll = true;
1720                                 set_state(c, ESTABLISHED);
1721                         }
1722
1723                         break;
1724
1725                 case SYN_RECEIVED:
1726                         // This is a retransmit of a SYN, send back the SYNACK.
1727                         goto synack;
1728
1729                 case ESTABLISHED:
1730                 case FIN_WAIT_1:
1731                 case FIN_WAIT_2:
1732                 case CLOSE_WAIT:
1733                 case CLOSING:
1734                 case LAST_ACK:
1735                 case TIME_WAIT:
1736                         // This could be a retransmission. Ignore the SYN flag, but send an ACK back.
1737                         break;
1738
1739                 default:
1740 #ifdef UTCP_DEBUG
1741                         abort();
1742 #endif
1743                         return 0;
1744                 }
1745         }
1746
1747         // 6. Process new data
1748
1749         if(c->state == SYN_RECEIVED) {
1750                 // This is the ACK after the SYNACK. It should always have ACKed the SYNACK.
1751                 if(!advanced) {
1752                         goto reset;
1753                 }
1754
1755                 // Are we still LISTENing?
1756                 if(utcp->accept) {
1757                         utcp->accept(c, c->src);
1758                 }
1759
1760                 if(c->state != ESTABLISHED) {
1761                         set_state(c, CLOSED);
1762                         c->reapable = true;
1763                         goto reset;
1764                 }
1765         }
1766
1767         if(len) {
1768                 switch(c->state) {
1769                 case SYN_SENT:
1770                 case SYN_RECEIVED:
1771                         // This should never happen.
1772 #ifdef UTCP_DEBUG
1773                         abort();
1774 #endif
1775                         return 0;
1776
1777                 case ESTABLISHED:
1778                         break;
1779
1780                 case FIN_WAIT_1:
1781                 case FIN_WAIT_2:
1782                         if(c->reapable) {
1783                                 // We already closed the connection and are not interested in more data.
1784                                 goto reset;
1785                         }
1786
1787                         break;
1788
1789                 case CLOSE_WAIT:
1790                 case CLOSING:
1791                 case LAST_ACK:
1792                 case TIME_WAIT:
1793                         // Ehm no, We should never receive more data after a FIN.
1794                         goto reset;
1795
1796                 default:
1797 #ifdef UTCP_DEBUG
1798                         abort();
1799 #endif
1800                         return 0;
1801                 }
1802
1803                 handle_incoming_data(c, &hdr, ptr, len);
1804         }
1805
1806         // 7. Process FIN stuff
1807
1808         if((hdr.ctl & FIN) && (!is_reliable(c) || hdr.seq + len == c->rcv.nxt)) {
1809                 switch(c->state) {
1810                 case SYN_SENT:
1811                 case SYN_RECEIVED:
1812                         // This should never happen.
1813 #ifdef UTCP_DEBUG
1814                         abort();
1815 #endif
1816                         break;
1817
1818                 case ESTABLISHED:
1819                         set_state(c, CLOSE_WAIT);
1820                         break;
1821
1822                 case FIN_WAIT_1:
1823                         set_state(c, CLOSING);
1824                         break;
1825
1826                 case FIN_WAIT_2:
1827                         clock_gettime(UTCP_CLOCK, &c->conn_timeout);
1828                         c->conn_timeout.tv_sec += utcp->timeout;
1829                         set_state(c, TIME_WAIT);
1830                         break;
1831
1832                 case CLOSE_WAIT:
1833                 case CLOSING:
1834                 case LAST_ACK:
1835                 case TIME_WAIT:
1836                         // Ehm, no. We should never receive a second FIN.
1837                         goto reset;
1838
1839                 default:
1840 #ifdef UTCP_DEBUG
1841                         abort();
1842 #endif
1843                         break;
1844                 }
1845
1846                 // FIN counts as one sequence number
1847                 c->rcv.nxt++;
1848                 len++;
1849
1850                 // Inform the application that the peer closed its end of the connection.
1851                 if(c->recv) {
1852                         errno = 0;
1853                         c->recv(c, NULL, 0);
1854                 }
1855         }
1856
1857         // Now we send something back if:
1858         // - we received data, so we have to send back an ACK
1859         //   -> sendatleastone = true
1860         // - or we got an ack, so we should maybe send a bit more data
1861         //   -> sendatleastone = false
1862
1863         if(is_reliable(c) || hdr.ctl & SYN || hdr.ctl & FIN) {
1864                 ack(c, has_data);
1865         }
1866
1867         return 0;
1868
1869 reset:
1870         swap_ports(&hdr);
1871         hdr.wnd = 0;
1872         hdr.aux = 0;
1873
1874         if(hdr.ctl & ACK) {
1875                 hdr.seq = hdr.ack;
1876                 hdr.ctl = RST;
1877         } else {
1878                 hdr.ack = hdr.seq + len;
1879                 hdr.seq = 0;
1880                 hdr.ctl = RST | ACK;
1881         }
1882
1883         print_packet(c, "send", &hdr, sizeof(hdr));
1884         utcp->send(utcp, &hdr, sizeof(hdr));
1885         return 0;
1886
1887 }
1888
1889 int utcp_shutdown(struct utcp_connection *c, int dir) {
1890         debug(c, "shutdown %d at %u\n", dir, c ? c->snd.last : 0);
1891
1892         if(!c) {
1893                 errno = EFAULT;
1894                 return -1;
1895         }
1896
1897         if(c->reapable) {
1898                 debug(c, "shutdown() called on closed connection\n");
1899                 errno = EBADF;
1900                 return -1;
1901         }
1902
1903         if(!(dir == UTCP_SHUT_RD || dir == UTCP_SHUT_WR || dir == UTCP_SHUT_RDWR)) {
1904                 errno = EINVAL;
1905                 return -1;
1906         }
1907
1908         // TCP does not have a provision for stopping incoming packets.
1909         // The best we can do is to just ignore them.
1910         if(dir == UTCP_SHUT_RD || dir == UTCP_SHUT_RDWR) {
1911                 c->recv = NULL;
1912         }
1913
1914         // The rest of the code deals with shutting down writes.
1915         if(dir == UTCP_SHUT_RD) {
1916                 return 0;
1917         }
1918
1919         // Only process shutting down writes once.
1920         if(c->shut_wr) {
1921                 return 0;
1922         }
1923
1924         c->shut_wr = true;
1925
1926         switch(c->state) {
1927         case CLOSED:
1928         case LISTEN:
1929                 errno = ENOTCONN;
1930                 return -1;
1931
1932         case SYN_SENT:
1933                 return 0;
1934
1935         case SYN_RECEIVED:
1936         case ESTABLISHED:
1937                 set_state(c, FIN_WAIT_1);
1938                 break;
1939
1940         case FIN_WAIT_1:
1941         case FIN_WAIT_2:
1942                 return 0;
1943
1944         case CLOSE_WAIT:
1945                 set_state(c, CLOSING);
1946                 break;
1947
1948         case CLOSING:
1949         case LAST_ACK:
1950         case TIME_WAIT:
1951                 return 0;
1952         }
1953
1954         c->snd.last++;
1955
1956         ack(c, false);
1957
1958         if(!timespec_isset(&c->rtrx_timeout)) {
1959                 start_retransmit_timer(c);
1960         }
1961
1962         return 0;
1963 }
1964
1965 static bool reset_connection(struct utcp_connection *c) {
1966         if(!c) {
1967                 errno = EFAULT;
1968                 return false;
1969         }
1970
1971         if(c->reapable) {
1972                 debug(c, "abort() called on closed connection\n");
1973                 errno = EBADF;
1974                 return false;
1975         }
1976
1977         c->recv = NULL;
1978         c->poll = NULL;
1979
1980         switch(c->state) {
1981         case CLOSED:
1982                 return true;
1983
1984         case LISTEN:
1985         case SYN_SENT:
1986         case CLOSING:
1987         case LAST_ACK:
1988         case TIME_WAIT:
1989                 set_state(c, CLOSED);
1990                 return true;
1991
1992         case SYN_RECEIVED:
1993         case ESTABLISHED:
1994         case FIN_WAIT_1:
1995         case FIN_WAIT_2:
1996         case CLOSE_WAIT:
1997                 set_state(c, CLOSED);
1998                 break;
1999         }
2000
2001         // Send RST
2002
2003         struct hdr hdr;
2004
2005         hdr.src = c->src;
2006         hdr.dst = c->dst;
2007         hdr.seq = c->snd.nxt;
2008         hdr.ack = c->rcv.nxt;
2009         hdr.wnd = 0;
2010         hdr.ctl = RST;
2011
2012         print_packet(c, "send", &hdr, sizeof(hdr));
2013         c->utcp->send(c->utcp, &hdr, sizeof(hdr));
2014         return true;
2015 }
2016
2017 // Closes all the opened connections
2018 void utcp_abort_all_connections(struct utcp *utcp) {
2019         if(!utcp) {
2020                 errno = EINVAL;
2021                 return;
2022         }
2023
2024         for(int i = 0; i < utcp->nconnections; i++) {
2025                 struct utcp_connection *c = utcp->connections[i];
2026
2027                 if(c->reapable || c->state == CLOSED) {
2028                         continue;
2029                 }
2030
2031                 utcp_recv_t old_recv = c->recv;
2032                 utcp_poll_t old_poll = c->poll;
2033
2034                 reset_connection(c);
2035
2036                 if(old_recv) {
2037                         errno = 0;
2038                         old_recv(c, NULL, 0);
2039                 }
2040
2041                 if(old_poll && !c->reapable) {
2042                         errno = 0;
2043                         old_poll(c, 0);
2044                 }
2045         }
2046
2047         return;
2048 }
2049
2050 int utcp_close(struct utcp_connection *c) {
2051         if(utcp_shutdown(c, SHUT_RDWR) && errno != ENOTCONN) {
2052                 return -1;
2053         }
2054
2055         c->recv = NULL;
2056         c->poll = NULL;
2057         c->reapable = true;
2058         return 0;
2059 }
2060
2061 int utcp_abort(struct utcp_connection *c) {
2062         if(!reset_connection(c)) {
2063                 return -1;
2064         }
2065
2066         c->reapable = true;
2067         return 0;
2068 }
2069
2070 /* Handle timeouts.
2071  * One call to this function will loop through all connections,
2072  * checking if something needs to be resent or not.
2073  * The return value is the time to the next timeout in milliseconds,
2074  * or maybe a negative value if the timeout is infinite.
2075  */
2076 struct timespec utcp_timeout(struct utcp *utcp) {
2077         struct timespec now;
2078         clock_gettime(UTCP_CLOCK, &now);
2079         struct timespec next = {now.tv_sec + 3600, now.tv_nsec};
2080
2081         for(int i = 0; i < utcp->nconnections; i++) {
2082                 struct utcp_connection *c = utcp->connections[i];
2083
2084                 if(!c) {
2085                         continue;
2086                 }
2087
2088                 // delete connections that have been utcp_close()d.
2089                 if(c->state == CLOSED) {
2090                         if(c->reapable) {
2091                                 debug(c, "reaping\n");
2092                                 free_connection(c);
2093                                 i--;
2094                         }
2095
2096                         continue;
2097                 }
2098
2099                 if(timespec_isset(&c->conn_timeout) && timespec_lt(&c->conn_timeout, &now)) {
2100                         errno = ETIMEDOUT;
2101                         c->state = CLOSED;
2102
2103                         if(c->recv) {
2104                                 c->recv(c, NULL, 0);
2105                         }
2106
2107                         if(c->poll && !c->reapable) {
2108                                 c->poll(c, 0);
2109                         }
2110
2111                         continue;
2112                 }
2113
2114                 if(timespec_isset(&c->rtrx_timeout) && timespec_lt(&c->rtrx_timeout, &now)) {
2115                         debug(c, "retransmitting after timeout\n");
2116                         retransmit(c);
2117                 }
2118
2119                 if(c->poll) {
2120                         if((c->state == ESTABLISHED || c->state == CLOSE_WAIT) && c->do_poll) {
2121                                 c->do_poll = false;
2122                                 uint32_t len = buffer_free(&c->sndbuf);
2123
2124                                 if(len) {
2125                                         c->poll(c, len);
2126                                 }
2127                         } else if(c->state == CLOSED) {
2128                                 c->poll(c, 0);
2129                         }
2130                 }
2131
2132                 if(timespec_isset(&c->conn_timeout) && timespec_lt(&c->conn_timeout, &next)) {
2133                         next = c->conn_timeout;
2134                 }
2135
2136                 if(timespec_isset(&c->rtrx_timeout) && timespec_lt(&c->rtrx_timeout, &next)) {
2137                         next = c->rtrx_timeout;
2138                 }
2139         }
2140
2141         struct timespec diff;
2142
2143         timespec_sub(&next, &now, &diff);
2144
2145         return diff;
2146 }
2147
2148 bool utcp_is_active(struct utcp *utcp) {
2149         if(!utcp) {
2150                 return false;
2151         }
2152
2153         for(int i = 0; i < utcp->nconnections; i++)
2154                 if(utcp->connections[i]->state != CLOSED && utcp->connections[i]->state != TIME_WAIT) {
2155                         return true;
2156                 }
2157
2158         return false;
2159 }
2160
2161 struct utcp *utcp_init(utcp_accept_t accept, utcp_listen_t listen, utcp_send_t send, void *priv) {
2162         if(!send) {
2163                 errno = EFAULT;
2164                 return NULL;
2165         }
2166
2167         struct utcp *utcp = calloc(1, sizeof(*utcp));
2168
2169         if(!utcp) {
2170                 return NULL;
2171         }
2172
2173         utcp_set_mtu(utcp, DEFAULT_MTU);
2174
2175         if(!utcp->pkt) {
2176                 free(utcp);
2177                 return NULL;
2178         }
2179
2180         if(!CLOCK_GRANULARITY) {
2181                 struct timespec res;
2182                 clock_getres(UTCP_CLOCK, &res);
2183                 CLOCK_GRANULARITY = res.tv_sec * USEC_PER_SEC + res.tv_nsec / 1000;
2184         }
2185
2186         utcp->accept = accept;
2187         utcp->listen = listen;
2188         utcp->send = send;
2189         utcp->priv = priv;
2190         utcp->timeout = DEFAULT_USER_TIMEOUT; // sec
2191
2192         return utcp;
2193 }
2194
2195 void utcp_exit(struct utcp *utcp) {
2196         if(!utcp) {
2197                 return;
2198         }
2199
2200         for(int i = 0; i < utcp->nconnections; i++) {
2201                 struct utcp_connection *c = utcp->connections[i];
2202
2203                 if(!c->reapable) {
2204                         if(c->recv) {
2205                                 c->recv(c, NULL, 0);
2206                         }
2207
2208                         if(c->poll && !c->reapable) {
2209                                 c->poll(c, 0);
2210                         }
2211                 }
2212
2213                 buffer_exit(&c->rcvbuf);
2214                 buffer_exit(&c->sndbuf);
2215                 free(c);
2216         }
2217
2218         free(utcp->connections);
2219         free(utcp->pkt);
2220         free(utcp);
2221 }
2222
2223 uint16_t utcp_get_mtu(struct utcp *utcp) {
2224         return utcp ? utcp->mtu : 0;
2225 }
2226
2227 uint16_t utcp_get_mss(struct utcp *utcp) {
2228         return utcp ? utcp->mss : 0;
2229 }
2230
2231 void utcp_set_mtu(struct utcp *utcp, uint16_t mtu) {
2232         if(!utcp) {
2233                 return;
2234         }
2235
2236         if(mtu <= sizeof(struct hdr)) {
2237                 return;
2238         }
2239
2240         if(mtu > utcp->mtu) {
2241                 char *new = realloc(utcp->pkt, mtu + sizeof(struct hdr));
2242
2243                 if(!new) {
2244                         return;
2245                 }
2246
2247                 utcp->pkt = new;
2248         }
2249
2250         utcp->mtu = mtu;
2251         utcp->mss = mtu - sizeof(struct hdr);
2252 }
2253
2254 void utcp_reset_timers(struct utcp *utcp) {
2255         if(!utcp) {
2256                 return;
2257         }
2258
2259         struct timespec now, then;
2260
2261         clock_gettime(UTCP_CLOCK, &now);
2262
2263         then = now;
2264
2265         then.tv_sec += utcp->timeout;
2266
2267         for(int i = 0; i < utcp->nconnections; i++) {
2268                 struct utcp_connection *c = utcp->connections[i];
2269
2270                 if(c->reapable) {
2271                         continue;
2272                 }
2273
2274                 if(timespec_isset(&c->rtrx_timeout)) {
2275                         c->rtrx_timeout = now;
2276                 }
2277
2278                 if(timespec_isset(&c->conn_timeout)) {
2279                         c->conn_timeout = then;
2280                 }
2281
2282                 c->rtt_start.tv_sec = 0;
2283
2284                 if(c->rto > START_RTO) {
2285                         c->rto = START_RTO;
2286                 }
2287         }
2288 }
2289
2290 int utcp_get_user_timeout(struct utcp *u) {
2291         return u ? u->timeout : 0;
2292 }
2293
2294 void utcp_set_user_timeout(struct utcp *u, int timeout) {
2295         if(u) {
2296                 u->timeout = timeout;
2297         }
2298 }
2299
2300 size_t utcp_get_sndbuf(struct utcp_connection *c) {
2301         return c ? c->sndbuf.maxsize : 0;
2302 }
2303
2304 size_t utcp_get_sndbuf_free(struct utcp_connection *c) {
2305         if(!c) {
2306                 return 0;
2307         }
2308
2309         switch(c->state) {
2310         case SYN_SENT:
2311         case SYN_RECEIVED:
2312         case ESTABLISHED:
2313         case CLOSE_WAIT:
2314                 return buffer_free(&c->sndbuf);
2315
2316         default:
2317                 return 0;
2318         }
2319 }
2320
2321 static void buffer_transfer(struct buffer *buf, char *newdata, size_t newsize) {
2322         if(buffer_wraps(buf)) {
2323                 // Old situation:
2324                 // [345......012]
2325                 // New situation:
2326                 // [012345......]
2327                 uint32_t tailsize = buf->size - buf->offset;
2328                 memcpy(newdata, buf->data + buf->offset, tailsize);
2329                 memcpy(newdata + tailsize, buf->data, buf->used - tailsize);
2330         } else {
2331                 // Old situation:
2332                 // [....012345..]
2333                 // New situation:
2334                 // [012345......]
2335                 memcpy(newdata, buf->data + buf->offset, buf->used);
2336         }
2337
2338         buf->offset = 0;
2339         buf->size = newsize;
2340 }
2341
2342 static void set_buffer_storage(struct buffer *buf, char *data, size_t size) {
2343         if(size > UINT32_MAX) {
2344                 size = UINT32_MAX;
2345         }
2346
2347         buf->maxsize = size;
2348
2349         if(data) {
2350                 if(buf->external) {
2351                         // Don't allow resizing an external buffer
2352                         abort();
2353                 }
2354
2355                 if(size < buf->used) {
2356                         // Ignore requests for an external buffer if we are already using more than it can store
2357                         return;
2358                 }
2359
2360                 // Transition from internal to external buffer
2361                 buffer_transfer(buf, data, size);
2362                 free(buf->data);
2363                 buf->data = data;
2364                 buf->external = true;
2365         } else if(buf->external) {
2366                 // Transition from external to internal buf
2367                 size_t minsize = buf->used < DEFAULT_SNDBUFSIZE ? DEFAULT_SNDBUFSIZE : buf->used;
2368                 data = malloc(minsize);
2369
2370                 if(!data) {
2371                         // Cannot handle this
2372                         abort();
2373                 }
2374
2375                 buffer_transfer(buf, data, minsize);
2376                 buf->data = data;
2377                 buf->external = false;
2378         }
2379 }
2380
2381 void utcp_set_sndbuf(struct utcp_connection *c, void *data, size_t size) {
2382         if(!c) {
2383                 return;
2384         }
2385
2386         set_buffer_storage(&c->sndbuf, data, size);
2387
2388         c->do_poll = is_reliable(c) && buffer_free(&c->sndbuf);
2389 }
2390
2391 size_t utcp_get_rcvbuf(struct utcp_connection *c) {
2392         return c ? c->rcvbuf.maxsize : 0;
2393 }
2394
2395 size_t utcp_get_rcvbuf_free(struct utcp_connection *c) {
2396         if(c && (c->state == ESTABLISHED || c->state == CLOSE_WAIT)) {
2397                 return buffer_free(&c->rcvbuf);
2398         } else {
2399                 return 0;
2400         }
2401 }
2402
2403 void utcp_set_rcvbuf(struct utcp_connection *c, void *data, size_t size) {
2404         if(!c) {
2405                 return;
2406         }
2407
2408         set_buffer_storage(&c->rcvbuf, data, size);
2409 }
2410
2411 size_t utcp_get_sendq(struct utcp_connection *c) {
2412         return c->sndbuf.used;
2413 }
2414
2415 size_t utcp_get_recvq(struct utcp_connection *c) {
2416         return c->rcvbuf.used;
2417 }
2418
2419 bool utcp_get_nodelay(struct utcp_connection *c) {
2420         return c ? c->nodelay : false;
2421 }
2422
2423 void utcp_set_nodelay(struct utcp_connection *c, bool nodelay) {
2424         if(c) {
2425                 c->nodelay = nodelay;
2426         }
2427 }
2428
2429 bool utcp_get_keepalive(struct utcp_connection *c) {
2430         return c ? c->keepalive : false;
2431 }
2432
2433 void utcp_set_keepalive(struct utcp_connection *c, bool keepalive) {
2434         if(c) {
2435                 c->keepalive = keepalive;
2436         }
2437 }
2438
2439 size_t utcp_get_outq(struct utcp_connection *c) {
2440         return c ? seqdiff(c->snd.nxt, c->snd.una) : 0;
2441 }
2442
2443 void utcp_set_recv_cb(struct utcp_connection *c, utcp_recv_t recv) {
2444         if(c) {
2445                 c->recv = recv;
2446         }
2447 }
2448
2449 void utcp_set_poll_cb(struct utcp_connection *c, utcp_poll_t poll) {
2450         if(c) {
2451                 c->poll = poll;
2452                 c->do_poll = is_reliable(c) && buffer_free(&c->sndbuf);
2453         }
2454 }
2455
2456 void utcp_set_accept_cb(struct utcp *utcp, utcp_accept_t accept, utcp_listen_t listen) {
2457         if(utcp) {
2458                 utcp->accept = accept;
2459                 utcp->listen = listen;
2460         }
2461 }
2462
2463 void utcp_expect_data(struct utcp_connection *c, bool expect) {
2464         if(!c || c->reapable) {
2465                 return;
2466         }
2467
2468         if(!(c->state == ESTABLISHED || c->state == FIN_WAIT_1 || c->state == FIN_WAIT_2)) {
2469                 return;
2470         }
2471
2472         if(expect) {
2473                 // If we expect data, start the connection timer.
2474                 if(!timespec_isset(&c->conn_timeout)) {
2475                         clock_gettime(UTCP_CLOCK, &c->conn_timeout);
2476                         c->conn_timeout.tv_sec += c->utcp->timeout;
2477                 }
2478         } else {
2479                 // If we want to cancel expecting data, only clear the timer when there is no unACKed data.
2480                 if(c->snd.una == c->snd.last) {
2481                         timespec_clear(&c->conn_timeout);
2482                 }
2483         }
2484 }
2485
2486 void utcp_set_flags(struct utcp_connection *c, uint32_t flags) {
2487         c->flags &= ~UTCP_CHANGEABLE_FLAGS;
2488         c->flags |= flags & UTCP_CHANGEABLE_FLAGS;
2489 }
2490
2491 void utcp_offline(struct utcp *utcp, bool offline) {
2492         struct timespec now;
2493         clock_gettime(UTCP_CLOCK, &now);
2494
2495         for(int i = 0; i < utcp->nconnections; i++) {
2496                 struct utcp_connection *c = utcp->connections[i];
2497
2498                 if(c->reapable) {
2499                         continue;
2500                 }
2501
2502                 utcp_expect_data(c, offline);
2503
2504                 if(!offline) {
2505                         if(timespec_isset(&c->rtrx_timeout)) {
2506                                 c->rtrx_timeout = now;
2507                         }
2508
2509                         utcp->connections[i]->rtt_start.tv_sec = 0;
2510
2511                         if(c->rto > START_RTO) {
2512                                 c->rto = START_RTO;
2513                         }
2514                 }
2515         }
2516 }
2517
2518 void utcp_set_retransmit_cb(struct utcp *utcp, utcp_retransmit_t cb) {
2519         utcp->retransmit = cb;
2520 }
2521
2522 void utcp_set_clock_granularity(long granularity) {
2523         CLOCK_GRANULARITY = granularity;
2524 }