]> git.meshlink.io Git - meshlink/blob - src/utcp.c
ec74923dd128a85c4b730fefa015127e40a40c8a
[meshlink] / src / utcp.c
1 /*
2     utcp.c -- Userspace TCP
3     Copyright (C) 2014-2017 Guus Sliepen <guus@tinc-vpn.org>
4
5     This program is free software; you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation; either version 2 of the License, or
8     (at your option) any later version.
9
10     This program is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14
15     You should have received a copy of the GNU General Public License along
16     with this program; if not, write to the Free Software Foundation, Inc.,
17     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #define _GNU_SOURCE
21
22 #include <assert.h>
23 #include <errno.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <stdint.h>
27 #include <stdbool.h>
28 #include <string.h>
29 #include <unistd.h>
30 #include <time.h>
31
32 #include "utcp_priv.h"
33
34 #ifndef EBADMSG
35 #define EBADMSG         104
36 #endif
37
38 #ifndef SHUT_RDWR
39 #define SHUT_RDWR 2
40 #endif
41
42 #ifdef poll
43 #undef poll
44 #endif
45
46 #ifndef UTCP_CLOCK
47 #if defined(CLOCK_MONOTONIC_RAW) && defined(__x86_64__)
48 #define UTCP_CLOCK CLOCK_MONOTONIC_RAW
49 #else
50 #define UTCP_CLOCK CLOCK_MONOTONIC
51 #endif
52 #endif
53
54 static void timespec_sub(const struct timespec *a, const struct timespec *b, struct timespec *r) {
55         r->tv_sec = a->tv_sec - b->tv_sec;
56         r->tv_nsec = a->tv_nsec - b->tv_nsec;
57
58         if(r->tv_nsec < 0) {
59                 r->tv_sec--, r->tv_nsec += NSEC_PER_SEC;
60         }
61 }
62
63 static int32_t timespec_diff_usec(const struct timespec *a, const struct timespec *b) {
64         return (a->tv_sec - b->tv_sec) * 1000000 + (a->tv_nsec - b->tv_nsec) / 1000;
65 }
66
67 static bool timespec_lt(const struct timespec *a, const struct timespec *b) {
68         if(a->tv_sec == b->tv_sec) {
69                 return a->tv_nsec < b->tv_nsec;
70         } else {
71                 return a->tv_sec < b->tv_sec;
72         }
73 }
74
75 static void timespec_clear(struct timespec *a) {
76         a->tv_sec = 0;
77         a->tv_nsec = 0;
78 }
79
80 static bool timespec_isset(const struct timespec *a) {
81         return a->tv_sec;
82 }
83
84 static long CLOCK_GRANULARITY; // usec
85
86 static inline size_t min(size_t a, size_t b) {
87         return a < b ? a : b;
88 }
89
90 static inline size_t max(size_t a, size_t b) {
91         return a > b ? a : b;
92 }
93
94 #ifdef UTCP_DEBUG
95 #include <stdarg.h>
96
97 #ifndef UTCP_DEBUG_DATALEN
98 #define UTCP_DEBUG_DATALEN 20
99 #endif
100
101 static void debug(struct utcp_connection *c, const char *format, ...) {
102         struct timespec tv;
103         char buf[1024];
104         int len;
105
106         clock_gettime(CLOCK_REALTIME, &tv);
107         len = snprintf(buf, sizeof(buf), "%ld.%06lu %u:%u ", (long)tv.tv_sec, tv.tv_nsec / 1000, c ? c->src : 0, c ? c->dst : 0);
108         va_list ap;
109         va_start(ap, format);
110         len += vsnprintf(buf + len, sizeof(buf) - len, format, ap);
111         va_end(ap);
112
113         if(len > 0 && (size_t)len < sizeof(buf)) {
114                 fwrite(buf, len, 1, stderr);
115         }
116 }
117
118 static void print_packet(struct utcp_connection *c, const char *dir, const void *pkt, size_t len) {
119         struct hdr hdr;
120
121         if(len < sizeof(hdr)) {
122                 debug(c, "%s: short packet (%lu bytes)\n", dir, (unsigned long)len);
123                 return;
124         }
125
126         memcpy(&hdr, pkt, sizeof(hdr));
127
128         uint32_t datalen;
129
130         if(len > sizeof(hdr)) {
131                 datalen = min(len - sizeof(hdr), UTCP_DEBUG_DATALEN);
132         } else {
133                 datalen = 0;
134         }
135
136
137         const uint8_t *data = (uint8_t *)pkt + sizeof(hdr);
138         char str[datalen * 2 + 1];
139         char *p = str;
140
141         for(uint32_t i = 0; i < datalen; i++) {
142                 *p++ = "0123456789ABCDEF"[data[i] >> 4];
143                 *p++ = "0123456789ABCDEF"[data[i] & 15];
144         }
145
146         *p = 0;
147
148         debug(c, "%s: len %lu src %u dst %u seq %u ack %u wnd %u aux %x ctl %s%s%s%s%s data %s\n",
149               dir, (unsigned long)len, hdr.src, hdr.dst, hdr.seq, hdr.ack, hdr.wnd, hdr.aux,
150               hdr.ctl & SYN ? "SYN" : "",
151               hdr.ctl & RST ? "RST" : "",
152               hdr.ctl & FIN ? "FIN" : "",
153               hdr.ctl & ACK ? "ACK" : "",
154               hdr.ctl & MF ? "MF" : "",
155               str
156              );
157 }
158
159 static void debug_cwnd(struct utcp_connection *c) {
160         debug(c, "snd.cwnd %u snd.ssthresh %u\n", c->snd.cwnd, ~c->snd.ssthresh ? c->snd.ssthresh : 0);
161 }
162 #else
163 #define debug(...) do {} while(0)
164 #define print_packet(...) do {} while(0)
165 #define debug_cwnd(...) do {} while(0)
166 #endif
167
168 static void set_state(struct utcp_connection *c, enum state state) {
169         c->state = state;
170
171         if(state == ESTABLISHED) {
172                 timespec_clear(&c->conn_timeout);
173         }
174
175         debug(c, "state %s\n", strstate[state]);
176 }
177
178 static bool fin_wanted(struct utcp_connection *c, uint32_t seq) {
179         if(seq != c->snd.last) {
180                 return false;
181         }
182
183         switch(c->state) {
184         case FIN_WAIT_1:
185         case CLOSING:
186         case LAST_ACK:
187                 return true;
188
189         default:
190                 return false;
191         }
192 }
193
194 static bool is_reliable(struct utcp_connection *c) {
195         return c->flags & UTCP_RELIABLE;
196 }
197
198 static int32_t seqdiff(uint32_t a, uint32_t b) {
199         return a - b;
200 }
201
202 // Buffer functions
203 static bool buffer_wraps(struct buffer *buf) {
204         return buf->size - buf->offset < buf->used;
205 }
206
207 static bool buffer_resize(struct buffer *buf, uint32_t newsize) {
208         char *newdata = realloc(buf->data, newsize);
209
210         if(!newdata) {
211                 return false;
212         }
213
214         buf->data = newdata;
215
216         if(buffer_wraps(buf)) {
217                 // Shift the right part of the buffer until it hits the end of the new buffer.
218                 // Old situation:
219                 // [345......012]
220                 // New situation:
221                 // [345.........|........012]
222                 uint32_t tailsize = buf->size - buf->offset;
223                 uint32_t newoffset = newsize - tailsize;
224                 memmove(buf->data + newoffset, buf->data + buf->offset, tailsize);
225                 buf->offset = newoffset;
226         }
227
228         buf->size = newsize;
229         return true;
230 }
231
232 // Store data into the buffer
233 static ssize_t buffer_put_at(struct buffer *buf, size_t offset, const void *data, size_t len) {
234         debug(NULL, "buffer_put_at %lu %lu %lu\n", (unsigned long)buf->used, (unsigned long)offset, (unsigned long)len);
235
236         // Ensure we don't store more than maxsize bytes in total
237         size_t required = offset + len;
238
239         if(required > buf->maxsize) {
240                 if(offset >= buf->maxsize) {
241                         return 0;
242                 }
243
244                 len = buf->maxsize - offset;
245                 required = buf->maxsize;
246         }
247
248         // Check if we need to resize the buffer
249         if(required > buf->size) {
250                 size_t newsize = buf->size;
251
252                 if(!newsize) {
253                         newsize = 4096;
254                 }
255
256                 do {
257                         newsize *= 2;
258                 } while(newsize < required);
259
260                 if(newsize > buf->maxsize) {
261                         newsize = buf->maxsize;
262                 }
263
264                 if(!buffer_resize(buf, newsize)) {
265                         return -1;
266                 }
267         }
268
269         uint32_t realoffset = buf->offset + offset;
270
271         if(buf->size - buf->offset <= offset) {
272                 // The offset wrapped
273                 realoffset -= buf->size;
274         }
275
276         if(buf->size - realoffset < len) {
277                 // The new chunk of data must be wrapped
278                 memcpy(buf->data + realoffset, data, buf->size - realoffset);
279                 memcpy(buf->data, (char *)data + buf->size - realoffset, len - (buf->size - realoffset));
280         } else {
281                 memcpy(buf->data + realoffset, data, len);
282         }
283
284         if(required > buf->used) {
285                 buf->used = required;
286         }
287
288         return len;
289 }
290
291 static ssize_t buffer_put(struct buffer *buf, const void *data, size_t len) {
292         return buffer_put_at(buf, buf->used, data, len);
293 }
294
295 // Copy data from the buffer without removing it.
296 static ssize_t buffer_copy(struct buffer *buf, void *data, size_t offset, size_t len) {
297         // Ensure we don't copy more than is actually stored in the buffer
298         if(offset >= buf->used) {
299                 return 0;
300         }
301
302         if(buf->used - offset < len) {
303                 len = buf->used - offset;
304         }
305
306         uint32_t realoffset = buf->offset + offset;
307
308         if(buf->size - buf->offset <= offset) {
309                 // The offset wrapped
310                 realoffset -= buf->size;
311         }
312
313         if(buf->size - realoffset < len) {
314                 // The data is wrapped
315                 memcpy(data, buf->data + realoffset, buf->size - realoffset);
316                 memcpy((char *)data + buf->size - realoffset, buf->data, len - (buf->size - realoffset));
317         } else {
318                 memcpy(data, buf->data + realoffset, len);
319         }
320
321         return len;
322 }
323
324 // Copy data from the buffer without removing it.
325 static ssize_t buffer_call(struct utcp_connection *c, struct buffer *buf, size_t offset, size_t len) {
326         if(!c->recv) {
327                 return len;
328         }
329
330         // Ensure we don't copy more than is actually stored in the buffer
331         if(offset >= buf->used) {
332                 return 0;
333         }
334
335         if(buf->used - offset < len) {
336                 len = buf->used - offset;
337         }
338
339         uint32_t realoffset = buf->offset + offset;
340
341         if(buf->size - buf->offset <= offset) {
342                 // The offset wrapped
343                 realoffset -= buf->size;
344         }
345
346         if(buf->size - realoffset < len) {
347                 // The data is wrapped
348                 ssize_t rx1 = c->recv(c, buf->data + realoffset, buf->size - realoffset);
349
350                 if(rx1 < buf->size - realoffset) {
351                         return rx1;
352                 }
353
354                 // The channel might have been closed by the previous callback
355                 if(!c->recv) {
356                         return len;
357                 }
358
359                 ssize_t rx2 = c->recv(c, buf->data, len - (buf->size - realoffset));
360
361                 if(rx2 < 0) {
362                         return rx2;
363                 } else {
364                         return rx1 + rx2;
365                 }
366         } else {
367                 return c->recv(c, buf->data + realoffset, len);
368         }
369 }
370
371 // Discard data from the buffer.
372 static ssize_t buffer_discard(struct buffer *buf, size_t len) {
373         if(buf->used < len) {
374                 len = buf->used;
375         }
376
377         if(buf->size - buf->offset <= len) {
378                 buf->offset -= buf->size;
379         }
380
381         if(buf->used == len) {
382                 buf->offset = 0;
383         } else {
384                 buf->offset += len;
385         }
386
387         buf->used -= len;
388
389         return len;
390 }
391
392 static void buffer_clear(struct buffer *buf) {
393         buf->used = 0;
394         buf->offset = 0;
395 }
396
397 static bool buffer_set_size(struct buffer *buf, uint32_t minsize, uint32_t maxsize) {
398         if(maxsize < minsize) {
399                 maxsize = minsize;
400         }
401
402         buf->maxsize = maxsize;
403
404         return buf->size >= minsize || buffer_resize(buf, minsize);
405 }
406
407 static void buffer_exit(struct buffer *buf) {
408         free(buf->data);
409         memset(buf, 0, sizeof(*buf));
410 }
411
412 static uint32_t buffer_free(const struct buffer *buf) {
413         return buf->maxsize > buf->used ? buf->maxsize - buf->used : 0;
414 }
415
416 // Connections are stored in a sorted list.
417 // This gives O(log(N)) lookup time, O(N log(N)) insertion time and O(N) deletion time.
418
419 static int compare(const void *va, const void *vb) {
420         assert(va && vb);
421
422         const struct utcp_connection *a = *(struct utcp_connection **)va;
423         const struct utcp_connection *b = *(struct utcp_connection **)vb;
424
425         assert(a && b);
426
427         int c = (int)a->src - (int)b->src;
428
429         if(c) {
430                 return c;
431         }
432
433         c = (int)a->dst - (int)b->dst;
434         return c;
435 }
436
437 static struct utcp_connection *find_connection(const struct utcp *utcp, uint16_t src, uint16_t dst) {
438         if(!utcp->nconnections) {
439                 return NULL;
440         }
441
442         struct utcp_connection key = {
443                 .src = src,
444                 .dst = dst,
445         }, *keyp = &key;
446         struct utcp_connection **match = bsearch(&keyp, utcp->connections, utcp->nconnections, sizeof(*utcp->connections), compare);
447         return match ? *match : NULL;
448 }
449
450 static void free_connection(struct utcp_connection *c) {
451         struct utcp *utcp = c->utcp;
452         struct utcp_connection **cp = bsearch(&c, utcp->connections, utcp->nconnections, sizeof(*utcp->connections), compare);
453
454         assert(cp);
455
456         int i = cp - utcp->connections;
457         memmove(cp, cp + 1, (utcp->nconnections - i - 1) * sizeof(*cp));
458         utcp->nconnections--;
459
460         buffer_exit(&c->rcvbuf);
461         buffer_exit(&c->sndbuf);
462         free(c);
463 }
464
465 static struct utcp_connection *allocate_connection(struct utcp *utcp, uint16_t src, uint16_t dst) {
466         // Check whether this combination of src and dst is free
467
468         if(src) {
469                 if(find_connection(utcp, src, dst)) {
470                         errno = EADDRINUSE;
471                         return NULL;
472                 }
473         } else { // If src == 0, generate a random port number with the high bit set
474                 if(utcp->nconnections >= 32767) {
475                         errno = ENOMEM;
476                         return NULL;
477                 }
478
479                 src = rand() | 0x8000;
480
481                 while(find_connection(utcp, src, dst)) {
482                         src++;
483                 }
484         }
485
486         // Allocate memory for the new connection
487
488         if(utcp->nconnections >= utcp->nallocated) {
489                 if(!utcp->nallocated) {
490                         utcp->nallocated = 4;
491                 } else {
492                         utcp->nallocated *= 2;
493                 }
494
495                 struct utcp_connection **new_array = realloc(utcp->connections, utcp->nallocated * sizeof(*utcp->connections));
496
497                 if(!new_array) {
498                         return NULL;
499                 }
500
501                 utcp->connections = new_array;
502         }
503
504         struct utcp_connection *c = calloc(1, sizeof(*c));
505
506         if(!c) {
507                 return NULL;
508         }
509
510         if(!buffer_set_size(&c->sndbuf, DEFAULT_SNDBUFSIZE, DEFAULT_MAXSNDBUFSIZE)) {
511                 free(c);
512                 return NULL;
513         }
514
515         if(!buffer_set_size(&c->rcvbuf, DEFAULT_RCVBUFSIZE, DEFAULT_MAXRCVBUFSIZE)) {
516                 buffer_exit(&c->sndbuf);
517                 free(c);
518                 return NULL;
519         }
520
521         // Fill in the details
522
523         c->src = src;
524         c->dst = dst;
525 #ifdef UTCP_DEBUG
526         c->snd.iss = 0;
527 #else
528         c->snd.iss = rand();
529 #endif
530         c->snd.una = c->snd.iss;
531         c->snd.nxt = c->snd.iss + 1;
532         c->snd.last = c->snd.nxt;
533         c->snd.cwnd = (utcp->mss > 2190 ? 2 : utcp->mss > 1095 ? 3 : 4) * utcp->mss;
534         c->snd.ssthresh = ~0;
535         debug_cwnd(c);
536         c->srtt = 0;
537         c->rttvar = 0;
538         c->rto = START_RTO;
539         c->utcp = utcp;
540
541         // Add it to the sorted list of connections
542
543         utcp->connections[utcp->nconnections++] = c;
544         qsort(utcp->connections, utcp->nconnections, sizeof(*utcp->connections), compare);
545
546         return c;
547 }
548
549 static inline uint32_t absdiff(uint32_t a, uint32_t b) {
550         if(a > b) {
551                 return a - b;
552         } else {
553                 return b - a;
554         }
555 }
556
557 // Update RTT variables. See RFC 6298.
558 static void update_rtt(struct utcp_connection *c, uint32_t rtt) {
559         if(!rtt) {
560                 debug(c, "invalid rtt\n");
561                 return;
562         }
563
564         if(!c->srtt) {
565                 c->srtt = rtt;
566                 c->rttvar = rtt / 2;
567         } else {
568                 c->rttvar = (c->rttvar * 3 + absdiff(c->srtt, rtt)) / 4;
569                 c->srtt = (c->srtt * 7 + rtt) / 8;
570         }
571
572         c->rto = c->srtt + max(4 * c->rttvar, CLOCK_GRANULARITY);
573
574         if(c->rto > MAX_RTO) {
575                 c->rto = MAX_RTO;
576         }
577
578         debug(c, "rtt %u srtt %u rttvar %u rto %u\n", rtt, c->srtt, c->rttvar, c->rto);
579 }
580
581 static void start_retransmit_timer(struct utcp_connection *c) {
582         clock_gettime(UTCP_CLOCK, &c->rtrx_timeout);
583
584         uint32_t rto = c->rto;
585
586         while(rto > USEC_PER_SEC) {
587                 c->rtrx_timeout.tv_sec++;
588                 rto -= USEC_PER_SEC;
589         }
590
591         c->rtrx_timeout.tv_nsec += rto * 1000;
592
593         if(c->rtrx_timeout.tv_nsec >= NSEC_PER_SEC) {
594                 c->rtrx_timeout.tv_nsec -= NSEC_PER_SEC;
595                 c->rtrx_timeout.tv_sec++;
596         }
597
598         debug(c, "rtrx_timeout %ld.%06lu\n", c->rtrx_timeout.tv_sec, c->rtrx_timeout.tv_nsec);
599 }
600
601 static void stop_retransmit_timer(struct utcp_connection *c) {
602         timespec_clear(&c->rtrx_timeout);
603         debug(c, "rtrx_timeout cleared\n");
604 }
605
606 struct utcp_connection *utcp_connect_ex(struct utcp *utcp, uint16_t dst, utcp_recv_t recv, void *priv, uint32_t flags) {
607         struct utcp_connection *c = allocate_connection(utcp, 0, dst);
608
609         if(!c) {
610                 return NULL;
611         }
612
613         assert((flags & ~0x1f) == 0);
614
615         c->flags = flags;
616         c->recv = recv;
617         c->priv = priv;
618
619         struct {
620                 struct hdr hdr;
621                 uint8_t init[4];
622         } pkt;
623
624         pkt.hdr.src = c->src;
625         pkt.hdr.dst = c->dst;
626         pkt.hdr.seq = c->snd.iss;
627         pkt.hdr.ack = 0;
628         pkt.hdr.wnd = c->rcvbuf.maxsize;
629         pkt.hdr.ctl = SYN;
630         pkt.hdr.aux = 0x0101;
631         pkt.init[0] = 1;
632         pkt.init[1] = 0;
633         pkt.init[2] = 0;
634         pkt.init[3] = flags & 0x7;
635
636         set_state(c, SYN_SENT);
637
638         print_packet(c, "send", &pkt, sizeof(pkt));
639         utcp->send(utcp, &pkt, sizeof(pkt));
640
641         clock_gettime(UTCP_CLOCK, &c->conn_timeout);
642         c->conn_timeout.tv_sec += utcp->timeout;
643
644         start_retransmit_timer(c);
645
646         return c;
647 }
648
649 struct utcp_connection *utcp_connect(struct utcp *utcp, uint16_t dst, utcp_recv_t recv, void *priv) {
650         return utcp_connect_ex(utcp, dst, recv, priv, UTCP_TCP);
651 }
652
653 void utcp_accept(struct utcp_connection *c, utcp_recv_t recv, void *priv) {
654         if(c->reapable || c->state != SYN_RECEIVED) {
655                 debug(c, "accept() called on invalid connection in state %s\n", c, strstate[c->state]);
656                 return;
657         }
658
659         debug(c, "accepted %p %p\n", c, recv, priv);
660         c->recv = recv;
661         c->priv = priv;
662         set_state(c, ESTABLISHED);
663 }
664
665 static void ack(struct utcp_connection *c, bool sendatleastone) {
666         int32_t left = seqdiff(c->snd.last, c->snd.nxt);
667         int32_t cwndleft = is_reliable(c) ? min(c->snd.cwnd, c->snd.wnd) - seqdiff(c->snd.nxt, c->snd.una) : MAX_UNRELIABLE_SIZE;
668
669         assert(left >= 0);
670
671         if(cwndleft <= 0) {
672                 left = 0;
673         } else if(cwndleft < left) {
674                 left = cwndleft;
675
676                 if(!sendatleastone || cwndleft > c->utcp->mss) {
677                         left -= left % c->utcp->mss;
678                 }
679         }
680
681         debug(c, "cwndleft %d left %d\n", cwndleft, left);
682
683         if(!left && !sendatleastone) {
684                 return;
685         }
686
687         struct {
688                 struct hdr hdr;
689                 uint8_t data[];
690         } *pkt = c->utcp->pkt;
691
692         pkt->hdr.src = c->src;
693         pkt->hdr.dst = c->dst;
694         pkt->hdr.ack = c->rcv.nxt;
695         pkt->hdr.wnd = is_reliable(c) ? c->rcvbuf.maxsize : 0;
696         pkt->hdr.ctl = ACK;
697         pkt->hdr.aux = 0;
698
699         do {
700                 uint32_t seglen = left > c->utcp->mss ? c->utcp->mss : left;
701                 pkt->hdr.seq = c->snd.nxt;
702
703                 buffer_copy(&c->sndbuf, pkt->data, seqdiff(c->snd.nxt, c->snd.una), seglen);
704
705                 c->snd.nxt += seglen;
706                 left -= seglen;
707
708                 if(!is_reliable(c)) {
709                         if(left) {
710                                 pkt->hdr.ctl |= MF;
711                         } else {
712                                 pkt->hdr.ctl &= ~MF;
713                         }
714                 }
715
716                 if(seglen && fin_wanted(c, c->snd.nxt)) {
717                         seglen--;
718                         pkt->hdr.ctl |= FIN;
719                 }
720
721                 if(!c->rtt_start.tv_sec) {
722                         // Start RTT measurement
723                         clock_gettime(UTCP_CLOCK, &c->rtt_start);
724                         c->rtt_seq = pkt->hdr.seq + seglen;
725                         debug(c, "starting RTT measurement, expecting ack %u\n", c->rtt_seq);
726                 }
727
728                 print_packet(c, "send", pkt, sizeof(pkt->hdr) + seglen);
729                 c->utcp->send(c->utcp, pkt, sizeof(pkt->hdr) + seglen);
730
731                 if(left && !is_reliable(c)) {
732                         pkt->hdr.wnd += seglen;
733                 }
734         } while(left);
735 }
736
737 ssize_t utcp_send(struct utcp_connection *c, const void *data, size_t len) {
738         if(c->reapable) {
739                 debug(c, "send() called on closed connection\n");
740                 errno = EBADF;
741                 return -1;
742         }
743
744         switch(c->state) {
745         case CLOSED:
746         case LISTEN:
747                 debug(c, "send() called on unconnected connection\n");
748                 errno = ENOTCONN;
749                 return -1;
750
751         case SYN_SENT:
752         case SYN_RECEIVED:
753         case ESTABLISHED:
754         case CLOSE_WAIT:
755                 break;
756
757         case FIN_WAIT_1:
758         case FIN_WAIT_2:
759         case CLOSING:
760         case LAST_ACK:
761         case TIME_WAIT:
762                 debug(c, "send() called on closed connection\n");
763                 errno = EPIPE;
764                 return -1;
765         }
766
767         // Exit early if we have nothing to send.
768
769         if(!len) {
770                 return 0;
771         }
772
773         if(!data) {
774                 errno = EFAULT;
775                 return -1;
776         }
777
778         // Check if we need to be able to buffer all data
779
780         if(c->flags & UTCP_NO_PARTIAL) {
781                 if(len > buffer_free(&c->sndbuf)) {
782                         if(len > c->sndbuf.maxsize) {
783                                 errno = EMSGSIZE;
784                                 return -1;
785                         } else {
786                                 errno = EWOULDBLOCK;
787                                 return 0;
788                         }
789                 }
790         }
791
792         // Add data to send buffer.
793
794         if(is_reliable(c)) {
795                 len = buffer_put(&c->sndbuf, data, len);
796         } else if(c->state != SYN_SENT && c->state != SYN_RECEIVED) {
797                 if(len > MAX_UNRELIABLE_SIZE || buffer_put(&c->sndbuf, data, len) != (ssize_t)len) {
798                         errno = EMSGSIZE;
799                         return -1;
800                 }
801         } else {
802                 return 0;
803         }
804
805         if(len <= 0) {
806                 if(is_reliable(c)) {
807                         errno = EWOULDBLOCK;
808                         return 0;
809                 } else {
810                         return len;
811                 }
812         }
813
814         c->snd.last += len;
815
816         // Don't send anything yet if the connection has not fully established yet
817
818         if(c->state == SYN_SENT || c->state == SYN_RECEIVED) {
819                 return len;
820         }
821
822         ack(c, false);
823
824         if(!is_reliable(c)) {
825                 c->snd.una = c->snd.nxt = c->snd.last;
826                 buffer_discard(&c->sndbuf, c->sndbuf.used);
827         }
828
829         if(is_reliable(c) && !timespec_isset(&c->rtrx_timeout)) {
830                 start_retransmit_timer(c);
831         }
832
833         if(is_reliable(c) && !timespec_isset(&c->conn_timeout)) {
834                 clock_gettime(UTCP_CLOCK, &c->conn_timeout);
835                 c->conn_timeout.tv_sec += c->utcp->timeout;
836         }
837
838         return len;
839 }
840
841 static void swap_ports(struct hdr *hdr) {
842         uint16_t tmp = hdr->src;
843         hdr->src = hdr->dst;
844         hdr->dst = tmp;
845 }
846
847 static void fast_retransmit(struct utcp_connection *c) {
848         if(c->state == CLOSED || c->snd.last == c->snd.una) {
849                 debug(c, "fast_retransmit() called but nothing to retransmit!\n");
850                 return;
851         }
852
853         struct utcp *utcp = c->utcp;
854
855         struct {
856                 struct hdr hdr;
857                 uint8_t data[];
858         } *pkt = c->utcp->pkt;
859
860         pkt->hdr.src = c->src;
861         pkt->hdr.dst = c->dst;
862         pkt->hdr.wnd = c->rcvbuf.maxsize;
863         pkt->hdr.aux = 0;
864
865         switch(c->state) {
866         case ESTABLISHED:
867         case FIN_WAIT_1:
868         case CLOSE_WAIT:
869         case CLOSING:
870         case LAST_ACK:
871                 // Send unacked data again.
872                 pkt->hdr.seq = c->snd.una;
873                 pkt->hdr.ack = c->rcv.nxt;
874                 pkt->hdr.ctl = ACK;
875                 uint32_t len = min(seqdiff(c->snd.last, c->snd.una), utcp->mss);
876
877                 if(fin_wanted(c, c->snd.una + len)) {
878                         len--;
879                         pkt->hdr.ctl |= FIN;
880                 }
881
882                 buffer_copy(&c->sndbuf, pkt->data, 0, len);
883                 print_packet(c, "rtrx", pkt, sizeof(pkt->hdr) + len);
884                 utcp->send(utcp, pkt, sizeof(pkt->hdr) + len);
885                 break;
886
887         default:
888                 break;
889         }
890 }
891
892 static void retransmit(struct utcp_connection *c) {
893         if(c->state == CLOSED || c->snd.last == c->snd.una) {
894                 debug(c, "retransmit() called but nothing to retransmit!\n");
895                 stop_retransmit_timer(c);
896                 return;
897         }
898
899         struct utcp *utcp = c->utcp;
900
901         if(utcp->retransmit) {
902                 utcp->retransmit(c);
903         }
904
905         struct {
906                 struct hdr hdr;
907                 uint8_t data[];
908         } *pkt = c->utcp->pkt;
909
910         pkt->hdr.src = c->src;
911         pkt->hdr.dst = c->dst;
912         pkt->hdr.wnd = c->rcvbuf.maxsize;
913         pkt->hdr.aux = 0;
914
915         switch(c->state) {
916         case SYN_SENT:
917                 // Send our SYN again
918                 pkt->hdr.seq = c->snd.iss;
919                 pkt->hdr.ack = 0;
920                 pkt->hdr.ctl = SYN;
921                 pkt->hdr.aux = 0x0101;
922                 pkt->data[0] = 1;
923                 pkt->data[1] = 0;
924                 pkt->data[2] = 0;
925                 pkt->data[3] = c->flags & 0x7;
926                 print_packet(c, "rtrx", pkt, sizeof(pkt->hdr) + 4);
927                 utcp->send(utcp, pkt, sizeof(pkt->hdr) + 4);
928                 break;
929
930         case SYN_RECEIVED:
931                 // Send SYNACK again
932                 pkt->hdr.seq = c->snd.nxt;
933                 pkt->hdr.ack = c->rcv.nxt;
934                 pkt->hdr.ctl = SYN | ACK;
935                 print_packet(c, "rtrx", pkt, sizeof(pkt->hdr));
936                 utcp->send(utcp, pkt, sizeof(pkt->hdr));
937                 break;
938
939         case ESTABLISHED:
940         case FIN_WAIT_1:
941         case CLOSE_WAIT:
942         case CLOSING:
943         case LAST_ACK:
944                 // Send unacked data again.
945                 pkt->hdr.seq = c->snd.una;
946                 pkt->hdr.ack = c->rcv.nxt;
947                 pkt->hdr.ctl = ACK;
948                 uint32_t len = min(seqdiff(c->snd.last, c->snd.una), utcp->mss);
949
950                 if(fin_wanted(c, c->snd.una + len)) {
951                         len--;
952                         pkt->hdr.ctl |= FIN;
953                 }
954
955                 // RFC 5681 slow start after timeout
956                 uint32_t flightsize = seqdiff(c->snd.nxt, c->snd.una);
957                 c->snd.ssthresh = max(flightsize / 2, utcp->mss * 2); // eq. 4
958                 c->snd.cwnd = utcp->mss;
959                 debug_cwnd(c);
960
961                 buffer_copy(&c->sndbuf, pkt->data, 0, len);
962                 print_packet(c, "rtrx", pkt, sizeof(pkt->hdr) + len);
963                 utcp->send(utcp, pkt, sizeof(pkt->hdr) + len);
964
965                 c->snd.nxt = c->snd.una + len;
966                 break;
967
968         case CLOSED:
969         case LISTEN:
970         case TIME_WAIT:
971         case FIN_WAIT_2:
972                 // We shouldn't need to retransmit anything in this state.
973 #ifdef UTCP_DEBUG
974                 abort();
975 #endif
976                 stop_retransmit_timer(c);
977                 goto cleanup;
978         }
979
980         start_retransmit_timer(c);
981         c->rto *= 2;
982
983         if(c->rto > MAX_RTO) {
984                 c->rto = MAX_RTO;
985         }
986
987         c->rtt_start.tv_sec = 0; // invalidate RTT timer
988         c->dupack = 0; // cancel any ongoing fast recovery
989
990 cleanup:
991         return;
992 }
993
994 /* Update receive buffer and SACK entries after consuming data.
995  *
996  * Situation:
997  *
998  * |.....0000..1111111111.....22222......3333|
999  * |---------------^
1000  *
1001  * 0..3 represent the SACK entries. The ^ indicates up to which point we want
1002  * to remove data from the receive buffer. The idea is to substract "len"
1003  * from the offset of all the SACK entries, and then remove/cut down entries
1004  * that are shifted to before the start of the receive buffer.
1005  *
1006  * There are three cases:
1007  * - the SACK entry is after ^, in that case just change the offset.
1008  * - the SACK entry starts before and ends after ^, so we have to
1009  *   change both its offset and size.
1010  * - the SACK entry is completely before ^, in that case delete it.
1011  */
1012 static void sack_consume(struct utcp_connection *c, size_t len) {
1013         debug(c, "sack_consume %lu\n", (unsigned long)len);
1014
1015         if(len > c->rcvbuf.used) {
1016                 debug(c, "all SACK entries consumed\n");
1017                 c->sacks[0].len = 0;
1018                 return;
1019         }
1020
1021         buffer_discard(&c->rcvbuf, len);
1022
1023         for(int i = 0; i < NSACKS && c->sacks[i].len;) {
1024                 if(len < c->sacks[i].offset) {
1025                         c->sacks[i].offset -= len;
1026                         i++;
1027                 } else if(len < c->sacks[i].offset + c->sacks[i].len) {
1028                         c->sacks[i].len -= len - c->sacks[i].offset;
1029                         c->sacks[i].offset = 0;
1030                         i++;
1031                 } else {
1032                         if(i < NSACKS - 1) {
1033                                 memmove(&c->sacks[i], &c->sacks[i + 1], (NSACKS - 1 - i) * sizeof(c->sacks)[i]);
1034                                 c->sacks[NSACKS - 1].len = 0;
1035                         } else {
1036                                 c->sacks[i].len = 0;
1037                                 break;
1038                         }
1039                 }
1040         }
1041
1042         for(int i = 0; i < NSACKS && c->sacks[i].len; i++) {
1043                 debug(c, "SACK[%d] offset %u len %u\n", i, c->sacks[i].offset, c->sacks[i].len);
1044         }
1045 }
1046
1047 static void handle_out_of_order(struct utcp_connection *c, uint32_t offset, const void *data, size_t len) {
1048         debug(c, "out of order packet, offset %u\n", offset);
1049         // Packet loss or reordering occured. Store the data in the buffer.
1050         ssize_t rxd = buffer_put_at(&c->rcvbuf, offset, data, len);
1051
1052         if(rxd <= 0) {
1053                 debug(c, "packet outside receive buffer, dropping\n");
1054                 return;
1055         }
1056
1057         if((size_t)rxd < len) {
1058                 debug(c, "packet partially outside receive buffer\n");
1059                 len = rxd;
1060         }
1061
1062         // Make note of where we put it.
1063         for(int i = 0; i < NSACKS; i++) {
1064                 if(!c->sacks[i].len) { // nothing to merge, add new entry
1065                         debug(c, "new SACK entry %d\n", i);
1066                         c->sacks[i].offset = offset;
1067                         c->sacks[i].len = rxd;
1068                         break;
1069                 } else if(offset < c->sacks[i].offset) {
1070                         if(offset + rxd < c->sacks[i].offset) { // insert before
1071                                 if(!c->sacks[NSACKS - 1].len) { // only if room left
1072                                         debug(c, "insert SACK entry at %d\n", i);
1073                                         memmove(&c->sacks[i + 1], &c->sacks[i], (NSACKS - i - 1) * sizeof(c->sacks)[i]);
1074                                         c->sacks[i].offset = offset;
1075                                         c->sacks[i].len = rxd;
1076                                 } else {
1077                                         debug(c, "SACK entries full, dropping packet\n");
1078                                 }
1079
1080                                 break;
1081                         } else { // merge
1082                                 debug(c, "merge with start of SACK entry at %d\n", i);
1083                                 c->sacks[i].offset = offset;
1084                                 break;
1085                         }
1086                 } else if(offset <= c->sacks[i].offset + c->sacks[i].len) {
1087                         if(offset + rxd > c->sacks[i].offset + c->sacks[i].len) { // merge
1088                                 debug(c, "merge with end of SACK entry at %d\n", i);
1089                                 c->sacks[i].len = offset + rxd - c->sacks[i].offset;
1090                                 // TODO: handle potential merge with next entry
1091                         }
1092
1093                         break;
1094                 }
1095         }
1096
1097         for(int i = 0; i < NSACKS && c->sacks[i].len; i++) {
1098                 debug(c, "SACK[%d] offset %u len %u\n", i, c->sacks[i].offset, c->sacks[i].len);
1099         }
1100 }
1101
1102 static void handle_in_order(struct utcp_connection *c, const void *data, size_t len) {
1103         if(c->recv) {
1104                 ssize_t rxd = c->recv(c, data, len);
1105
1106                 if(rxd != (ssize_t)len) {
1107                         // TODO: handle the application not accepting all data.
1108                         abort();
1109                 }
1110         }
1111
1112         // Check if we can process out-of-order data now.
1113         if(c->sacks[0].len && len >= c->sacks[0].offset) {
1114                 debug(c, "incoming packet len %lu connected with SACK at %u\n", (unsigned long)len, c->sacks[0].offset);
1115
1116                 if(len < c->sacks[0].offset + c->sacks[0].len) {
1117                         size_t offset = len;
1118                         len = c->sacks[0].offset + c->sacks[0].len;
1119                         size_t remainder = len - offset;
1120
1121                         ssize_t rxd = buffer_call(c, &c->rcvbuf, offset, remainder);
1122
1123                         if(rxd != (ssize_t)remainder) {
1124                                 // TODO: handle the application not accepting all data.
1125                                 abort();
1126                         }
1127                 }
1128         }
1129
1130         if(c->rcvbuf.used) {
1131                 sack_consume(c, len);
1132         }
1133
1134         c->rcv.nxt += len;
1135 }
1136
1137 static void handle_unreliable(struct utcp_connection *c, const struct hdr *hdr, const void *data, size_t len) {
1138         // Fast path for unfragmented packets
1139         if(!hdr->wnd && !(hdr->ctl & MF)) {
1140                 if(c->recv) {
1141                         c->recv(c, data, len);
1142                 }
1143
1144                 c->rcv.nxt = hdr->seq + len;
1145                 return;
1146         }
1147
1148         // Ensure reassembled packet are not larger than 64 kiB
1149         if(hdr->wnd >= MAX_UNRELIABLE_SIZE || hdr->wnd + len > MAX_UNRELIABLE_SIZE) {
1150                 return;
1151         }
1152
1153         // Don't accept out of order fragments
1154         if(hdr->wnd && hdr->seq != c->rcv.nxt) {
1155                 return;
1156         }
1157
1158         // Reset the receive buffer for the first fragment
1159         if(!hdr->wnd) {
1160                 buffer_clear(&c->rcvbuf);
1161         }
1162
1163         ssize_t rxd = buffer_put_at(&c->rcvbuf, hdr->wnd, data, len);
1164
1165         if(rxd != (ssize_t)len) {
1166                 return;
1167         }
1168
1169         // Send the packet if it's the final fragment
1170         if(!(hdr->ctl & MF)) {
1171                 buffer_call(c, &c->rcvbuf, 0, hdr->wnd + len);
1172         }
1173
1174         c->rcv.nxt = hdr->seq + len;
1175 }
1176
1177 static void handle_incoming_data(struct utcp_connection *c, const struct hdr *hdr, const void *data, size_t len) {
1178         if(!is_reliable(c)) {
1179                 handle_unreliable(c, hdr, data, len);
1180                 return;
1181         }
1182
1183         uint32_t offset = seqdiff(hdr->seq, c->rcv.nxt);
1184
1185         if(offset) {
1186                 handle_out_of_order(c, offset, data, len);
1187         } else {
1188                 handle_in_order(c, data, len);
1189         }
1190 }
1191
1192
1193 ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) {
1194         const uint8_t *ptr = data;
1195
1196         if(!utcp) {
1197                 errno = EFAULT;
1198                 return -1;
1199         }
1200
1201         if(!len) {
1202                 return 0;
1203         }
1204
1205         if(!data) {
1206                 errno = EFAULT;
1207                 return -1;
1208         }
1209
1210         // Drop packets smaller than the header
1211
1212         struct hdr hdr;
1213
1214         if(len < sizeof(hdr)) {
1215                 print_packet(NULL, "recv", data, len);
1216                 errno = EBADMSG;
1217                 return -1;
1218         }
1219
1220         // Make a copy from the potentially unaligned data to a struct hdr
1221
1222         memcpy(&hdr, ptr, sizeof(hdr));
1223
1224         // Try to match the packet to an existing connection
1225
1226         struct utcp_connection *c = find_connection(utcp, hdr.dst, hdr.src);
1227         print_packet(c, "recv", data, len);
1228
1229         // Process the header
1230
1231         ptr += sizeof(hdr);
1232         len -= sizeof(hdr);
1233
1234         // Drop packets with an unknown CTL flag
1235
1236         if(hdr.ctl & ~(SYN | ACK | RST | FIN | MF)) {
1237                 print_packet(NULL, "recv", data, len);
1238                 errno = EBADMSG;
1239                 return -1;
1240         }
1241
1242         // Check for auxiliary headers
1243
1244         const uint8_t *init = NULL;
1245
1246         uint16_t aux = hdr.aux;
1247
1248         while(aux) {
1249                 size_t auxlen = 4 * (aux >> 8) & 0xf;
1250                 uint8_t auxtype = aux & 0xff;
1251
1252                 if(len < auxlen) {
1253                         errno = EBADMSG;
1254                         return -1;
1255                 }
1256
1257                 switch(auxtype) {
1258                 case AUX_INIT:
1259                         if(!(hdr.ctl & SYN) || auxlen != 4) {
1260                                 errno = EBADMSG;
1261                                 return -1;
1262                         }
1263
1264                         init = ptr;
1265                         break;
1266
1267                 default:
1268                         errno = EBADMSG;
1269                         return -1;
1270                 }
1271
1272                 len -= auxlen;
1273                 ptr += auxlen;
1274
1275                 if(!(aux & 0x800)) {
1276                         break;
1277                 }
1278
1279                 if(len < 2) {
1280                         errno = EBADMSG;
1281                         return -1;
1282                 }
1283
1284                 memcpy(&aux, ptr, 2);
1285                 len -= 2;
1286                 ptr += 2;
1287         }
1288
1289         bool has_data = len || (hdr.ctl & (SYN | FIN));
1290
1291         // Is it for a new connection?
1292
1293         if(!c) {
1294                 // Ignore RST packets
1295
1296                 if(hdr.ctl & RST) {
1297                         return 0;
1298                 }
1299
1300                 // Is it a SYN packet and are we LISTENing?
1301
1302                 if(hdr.ctl & SYN && !(hdr.ctl & ACK) && utcp->accept) {
1303                         // If we don't want to accept it, send a RST back
1304                         if((utcp->pre_accept && !utcp->pre_accept(utcp, hdr.dst))) {
1305                                 len = 1;
1306                                 goto reset;
1307                         }
1308
1309                         // Try to allocate memory, otherwise send a RST back
1310                         c = allocate_connection(utcp, hdr.dst, hdr.src);
1311
1312                         if(!c) {
1313                                 len = 1;
1314                                 goto reset;
1315                         }
1316
1317                         // Parse auxilliary information
1318                         if(init) {
1319                                 if(init[0] < 1) {
1320                                         len = 1;
1321                                         goto reset;
1322                                 }
1323
1324                                 c->flags = init[3] & 0x7;
1325                         } else {
1326                                 c->flags = UTCP_TCP;
1327                         }
1328
1329 synack:
1330                         // Return SYN+ACK, go to SYN_RECEIVED state
1331                         c->snd.wnd = hdr.wnd;
1332                         c->rcv.irs = hdr.seq;
1333                         c->rcv.nxt = c->rcv.irs + 1;
1334                         set_state(c, SYN_RECEIVED);
1335
1336                         struct {
1337                                 struct hdr hdr;
1338                                 uint8_t data[4];
1339                         } pkt;
1340
1341                         pkt.hdr.src = c->src;
1342                         pkt.hdr.dst = c->dst;
1343                         pkt.hdr.ack = c->rcv.irs + 1;
1344                         pkt.hdr.seq = c->snd.iss;
1345                         pkt.hdr.wnd = c->rcvbuf.maxsize;
1346                         pkt.hdr.ctl = SYN | ACK;
1347
1348                         if(init) {
1349                                 pkt.hdr.aux = 0x0101;
1350                                 pkt.data[0] = 1;
1351                                 pkt.data[1] = 0;
1352                                 pkt.data[2] = 0;
1353                                 pkt.data[3] = c->flags & 0x7;
1354                                 print_packet(c, "send", &pkt, sizeof(hdr) + 4);
1355                                 utcp->send(utcp, &pkt, sizeof(hdr) + 4);
1356                         } else {
1357                                 pkt.hdr.aux = 0;
1358                                 print_packet(c, "send", &pkt, sizeof(hdr));
1359                                 utcp->send(utcp, &pkt, sizeof(hdr));
1360                         }
1361
1362                         start_retransmit_timer(c);
1363                 } else {
1364                         // No, we don't want your packets, send a RST back
1365                         len = 1;
1366                         goto reset;
1367                 }
1368
1369                 return 0;
1370         }
1371
1372         debug(c, "state %s\n", strstate[c->state]);
1373
1374         // In case this is for a CLOSED connection, ignore the packet.
1375         // TODO: make it so incoming packets can never match a CLOSED connection.
1376
1377         if(c->state == CLOSED) {
1378                 debug(c, "got packet for closed connection\n");
1379                 return 0;
1380         }
1381
1382         // It is for an existing connection.
1383
1384         // 1. Drop invalid packets.
1385
1386         // 1a. Drop packets that should not happen in our current state.
1387
1388         switch(c->state) {
1389         case SYN_SENT:
1390         case SYN_RECEIVED:
1391         case ESTABLISHED:
1392         case FIN_WAIT_1:
1393         case FIN_WAIT_2:
1394         case CLOSE_WAIT:
1395         case CLOSING:
1396         case LAST_ACK:
1397         case TIME_WAIT:
1398                 break;
1399
1400         default:
1401 #ifdef UTCP_DEBUG
1402                 abort();
1403 #endif
1404                 break;
1405         }
1406
1407         // 1b. Discard data that is not in our receive window.
1408
1409         if(is_reliable(c)) {
1410                 bool acceptable;
1411
1412                 if(c->state == SYN_SENT) {
1413                         acceptable = true;
1414                 } else if(len == 0) {
1415                         acceptable = seqdiff(hdr.seq, c->rcv.nxt) >= 0;
1416                 } else {
1417                         int32_t rcv_offset = seqdiff(hdr.seq, c->rcv.nxt);
1418
1419                         // cut already accepted front overlapping
1420                         if(rcv_offset < 0) {
1421                                 acceptable = len > (size_t) - rcv_offset;
1422
1423                                 if(acceptable) {
1424                                         ptr -= rcv_offset;
1425                                         len += rcv_offset;
1426                                         hdr.seq -= rcv_offset;
1427                                 }
1428                         } else {
1429                                 acceptable = seqdiff(hdr.seq, c->rcv.nxt) >= 0 && seqdiff(hdr.seq, c->rcv.nxt) + len <= c->rcvbuf.maxsize;
1430                         }
1431                 }
1432
1433                 if(!acceptable) {
1434                         debug(c, "packet not acceptable, %u <= %u + %lu < %u\n", c->rcv.nxt, hdr.seq, (unsigned long)len, c->rcv.nxt + c->rcvbuf.maxsize);
1435
1436                         // Ignore unacceptable RST packets.
1437                         if(hdr.ctl & RST) {
1438                                 return 0;
1439                         }
1440
1441                         // Otherwise, continue processing.
1442                         len = 0;
1443                 }
1444         } else {
1445 #if UTCP_DEBUG
1446                 int32_t rcv_offset = seqdiff(hdr.seq, c->rcv.nxt);
1447
1448                 if(rcv_offset) {
1449                         debug(c, "packet out of order, offset %u bytes", rcv_offset);
1450                 }
1451
1452 #endif
1453         }
1454
1455         c->snd.wnd = hdr.wnd; // TODO: move below
1456
1457         // 1c. Drop packets with an invalid ACK.
1458         // ackno should not roll back, and it should also not be bigger than what we ever could have sent
1459         // (= snd.una + c->sndbuf.used).
1460
1461         if(!is_reliable(c)) {
1462                 if(hdr.ack != c->snd.last && c->state >= ESTABLISHED) {
1463                         hdr.ack = c->snd.una;
1464                 }
1465         }
1466
1467         if(hdr.ctl & ACK && (seqdiff(hdr.ack, c->snd.last) > 0 || seqdiff(hdr.ack, c->snd.una) < 0)) {
1468                 debug(c, "packet ack seqno out of range, %u <= %u < %u\n", c->snd.una, hdr.ack, c->snd.una + c->sndbuf.used);
1469
1470                 // Ignore unacceptable RST packets.
1471                 if(hdr.ctl & RST) {
1472                         return 0;
1473                 }
1474
1475                 goto reset;
1476         }
1477
1478         // 2. Handle RST packets
1479
1480         if(hdr.ctl & RST) {
1481                 switch(c->state) {
1482                 case SYN_SENT:
1483                         if(!(hdr.ctl & ACK)) {
1484                                 return 0;
1485                         }
1486
1487                         // The peer has refused our connection.
1488                         set_state(c, CLOSED);
1489                         errno = ECONNREFUSED;
1490
1491                         if(c->recv) {
1492                                 c->recv(c, NULL, 0);
1493                         }
1494
1495                         if(c->poll && !c->reapable) {
1496                                 c->poll(c, 0);
1497                         }
1498
1499                         return 0;
1500
1501                 case SYN_RECEIVED:
1502                         if(hdr.ctl & ACK) {
1503                                 return 0;
1504                         }
1505
1506                         // We haven't told the application about this connection yet. Silently delete.
1507                         free_connection(c);
1508                         return 0;
1509
1510                 case ESTABLISHED:
1511                 case FIN_WAIT_1:
1512                 case FIN_WAIT_2:
1513                 case CLOSE_WAIT:
1514                         if(hdr.ctl & ACK) {
1515                                 return 0;
1516                         }
1517
1518                         // The peer has aborted our connection.
1519                         set_state(c, CLOSED);
1520                         errno = ECONNRESET;
1521
1522                         if(c->recv) {
1523                                 c->recv(c, NULL, 0);
1524                         }
1525
1526                         if(c->poll && !c->reapable) {
1527                                 c->poll(c, 0);
1528                         }
1529
1530                         return 0;
1531
1532                 case CLOSING:
1533                 case LAST_ACK:
1534                 case TIME_WAIT:
1535                         if(hdr.ctl & ACK) {
1536                                 return 0;
1537                         }
1538
1539                         // As far as the application is concerned, the connection has already been closed.
1540                         // If it has called utcp_close() already, we can immediately free this connection.
1541                         if(c->reapable) {
1542                                 free_connection(c);
1543                                 return 0;
1544                         }
1545
1546                         // Otherwise, immediately move to the CLOSED state.
1547                         set_state(c, CLOSED);
1548                         return 0;
1549
1550                 default:
1551 #ifdef UTCP_DEBUG
1552                         abort();
1553 #endif
1554                         break;
1555                 }
1556         }
1557
1558         uint32_t advanced;
1559
1560         if(!(hdr.ctl & ACK)) {
1561                 advanced = 0;
1562                 goto skip_ack;
1563         }
1564
1565         // 3. Advance snd.una
1566
1567         advanced = seqdiff(hdr.ack, c->snd.una);
1568
1569         if(advanced) {
1570                 // RTT measurement
1571                 if(c->rtt_start.tv_sec) {
1572                         if(c->rtt_seq == hdr.ack) {
1573                                 struct timespec now;
1574                                 clock_gettime(UTCP_CLOCK, &now);
1575                                 int32_t diff = timespec_diff_usec(&now, &c->rtt_start);
1576                                 update_rtt(c, diff);
1577                                 c->rtt_start.tv_sec = 0;
1578                         } else if(c->rtt_seq < hdr.ack) {
1579                                 debug(c, "cancelling RTT measurement: %u < %u\n", c->rtt_seq, hdr.ack);
1580                                 c->rtt_start.tv_sec = 0;
1581                         }
1582                 }
1583
1584                 int32_t data_acked = advanced;
1585
1586                 switch(c->state) {
1587                 case SYN_SENT:
1588                 case SYN_RECEIVED:
1589                         data_acked--;
1590                         break;
1591
1592                 // TODO: handle FIN as well.
1593                 default:
1594                         break;
1595                 }
1596
1597                 assert(data_acked >= 0);
1598
1599 #ifndef NDEBUG
1600                 int32_t bufused = seqdiff(c->snd.last, c->snd.una);
1601                 assert(data_acked <= bufused);
1602 #endif
1603
1604                 if(data_acked) {
1605                         buffer_discard(&c->sndbuf, data_acked);
1606
1607                         if(is_reliable(c)) {
1608                                 c->do_poll = true;
1609                         }
1610                 }
1611
1612                 // Also advance snd.nxt if possible
1613                 if(seqdiff(c->snd.nxt, hdr.ack) < 0) {
1614                         c->snd.nxt = hdr.ack;
1615                 }
1616
1617                 c->snd.una = hdr.ack;
1618
1619                 if(c->dupack) {
1620                         if(c->dupack >= 3) {
1621                                 debug(c, "fast recovery ended\n");
1622                                 c->snd.cwnd = c->snd.ssthresh;
1623                         }
1624
1625                         c->dupack = 0;
1626                 }
1627
1628                 // Increase the congestion window according to RFC 5681
1629                 if(c->snd.cwnd < c->snd.ssthresh) {
1630                         c->snd.cwnd += min(advanced, utcp->mss); // eq. 2
1631                 } else {
1632                         c->snd.cwnd += max(1, (utcp->mss * utcp->mss) / c->snd.cwnd); // eq. 3
1633                 }
1634
1635                 if(c->snd.cwnd > c->sndbuf.maxsize) {
1636                         c->snd.cwnd = c->sndbuf.maxsize;
1637                 }
1638
1639                 debug_cwnd(c);
1640
1641                 // Check if we have sent a FIN that is now ACKed.
1642                 switch(c->state) {
1643                 case FIN_WAIT_1:
1644                         if(c->snd.una == c->snd.last) {
1645                                 set_state(c, FIN_WAIT_2);
1646                         }
1647
1648                         break;
1649
1650                 case CLOSING:
1651                         if(c->snd.una == c->snd.last) {
1652                                 clock_gettime(UTCP_CLOCK, &c->conn_timeout);
1653                                 c->conn_timeout.tv_sec += utcp->timeout;
1654                                 set_state(c, TIME_WAIT);
1655                         }
1656
1657                         break;
1658
1659                 default:
1660                         break;
1661                 }
1662         } else {
1663                 if(!len && is_reliable(c) && c->snd.una != c->snd.last) {
1664                         c->dupack++;
1665                         debug(c, "duplicate ACK %d\n", c->dupack);
1666
1667                         if(c->dupack == 3) {
1668                                 // RFC 5681 fast recovery
1669                                 debug(c, "fast recovery started\n", c->dupack);
1670                                 uint32_t flightsize = seqdiff(c->snd.nxt, c->snd.una);
1671                                 c->snd.ssthresh = max(flightsize / 2, utcp->mss * 2); // eq. 4
1672                                 c->snd.cwnd = min(c->snd.ssthresh + 3 * utcp->mss, c->sndbuf.maxsize);
1673
1674                                 if(c->snd.cwnd > c->sndbuf.maxsize) {
1675                                         c->snd.cwnd = c->sndbuf.maxsize;
1676                                 }
1677
1678                                 debug_cwnd(c);
1679
1680                                 fast_retransmit(c);
1681                         } else if(c->dupack > 3) {
1682                                 c->snd.cwnd += utcp->mss;
1683
1684                                 if(c->snd.cwnd > c->sndbuf.maxsize) {
1685                                         c->snd.cwnd = c->sndbuf.maxsize;
1686                                 }
1687
1688                                 debug_cwnd(c);
1689                         }
1690
1691                         // We got an ACK which indicates the other side did get one of our packets.
1692                         // Reset the retransmission timer to avoid going to slow start,
1693                         // but don't touch the connection timeout.
1694                         start_retransmit_timer(c);
1695                 }
1696         }
1697
1698         // 4. Update timers
1699
1700         if(advanced) {
1701                 if(c->snd.una == c->snd.last) {
1702                         stop_retransmit_timer(c);
1703                         timespec_clear(&c->conn_timeout);
1704                 } else if(is_reliable(c)) {
1705                         start_retransmit_timer(c);
1706                         clock_gettime(UTCP_CLOCK, &c->conn_timeout);
1707                         c->conn_timeout.tv_sec += utcp->timeout;
1708                 }
1709         }
1710
1711 skip_ack:
1712         // 5. Process SYN stuff
1713
1714         if(hdr.ctl & SYN) {
1715                 switch(c->state) {
1716                 case SYN_SENT:
1717
1718                         // This is a SYNACK. It should always have ACKed the SYN.
1719                         if(!advanced) {
1720                                 goto reset;
1721                         }
1722
1723                         c->rcv.irs = hdr.seq;
1724                         c->rcv.nxt = hdr.seq + 1;
1725
1726                         if(c->shut_wr) {
1727                                 c->snd.last++;
1728                                 set_state(c, FIN_WAIT_1);
1729                         } else {
1730                                 set_state(c, ESTABLISHED);
1731                         }
1732
1733                         break;
1734
1735                 case SYN_RECEIVED:
1736                         // This is a retransmit of a SYN, send back the SYNACK.
1737                         goto synack;
1738
1739                 case ESTABLISHED:
1740                 case FIN_WAIT_1:
1741                 case FIN_WAIT_2:
1742                 case CLOSE_WAIT:
1743                 case CLOSING:
1744                 case LAST_ACK:
1745                 case TIME_WAIT:
1746                         // This could be a retransmission. Ignore the SYN flag, but send an ACK back.
1747                         break;
1748
1749                 default:
1750 #ifdef UTCP_DEBUG
1751                         abort();
1752 #endif
1753                         return 0;
1754                 }
1755         }
1756
1757         // 6. Process new data
1758
1759         if(c->state == SYN_RECEIVED) {
1760                 // This is the ACK after the SYNACK. It should always have ACKed the SYNACK.
1761                 if(!advanced) {
1762                         goto reset;
1763                 }
1764
1765                 // Are we still LISTENing?
1766                 if(utcp->accept) {
1767                         utcp->accept(c, c->src);
1768                 }
1769
1770                 if(c->state != ESTABLISHED) {
1771                         set_state(c, CLOSED);
1772                         c->reapable = true;
1773                         goto reset;
1774                 }
1775         }
1776
1777         if(len) {
1778                 switch(c->state) {
1779                 case SYN_SENT:
1780                 case SYN_RECEIVED:
1781                         // This should never happen.
1782 #ifdef UTCP_DEBUG
1783                         abort();
1784 #endif
1785                         return 0;
1786
1787                 case ESTABLISHED:
1788                 case FIN_WAIT_1:
1789                 case FIN_WAIT_2:
1790                         break;
1791
1792                 case CLOSE_WAIT:
1793                 case CLOSING:
1794                 case LAST_ACK:
1795                 case TIME_WAIT:
1796                         // Ehm no, We should never receive more data after a FIN.
1797                         goto reset;
1798
1799                 default:
1800 #ifdef UTCP_DEBUG
1801                         abort();
1802 #endif
1803                         return 0;
1804                 }
1805
1806                 handle_incoming_data(c, &hdr, ptr, len);
1807         }
1808
1809         // 7. Process FIN stuff
1810
1811         if((hdr.ctl & FIN) && (!is_reliable(c) || hdr.seq + len == c->rcv.nxt)) {
1812                 switch(c->state) {
1813                 case SYN_SENT:
1814                 case SYN_RECEIVED:
1815                         // This should never happen.
1816 #ifdef UTCP_DEBUG
1817                         abort();
1818 #endif
1819                         break;
1820
1821                 case ESTABLISHED:
1822                         set_state(c, CLOSE_WAIT);
1823                         break;
1824
1825                 case FIN_WAIT_1:
1826                         set_state(c, CLOSING);
1827                         break;
1828
1829                 case FIN_WAIT_2:
1830                         clock_gettime(UTCP_CLOCK, &c->conn_timeout);
1831                         c->conn_timeout.tv_sec += utcp->timeout;
1832                         set_state(c, TIME_WAIT);
1833                         break;
1834
1835                 case CLOSE_WAIT:
1836                 case CLOSING:
1837                 case LAST_ACK:
1838                 case TIME_WAIT:
1839                         // Ehm, no. We should never receive a second FIN.
1840                         goto reset;
1841
1842                 default:
1843 #ifdef UTCP_DEBUG
1844                         abort();
1845 #endif
1846                         break;
1847                 }
1848
1849                 // FIN counts as one sequence number
1850                 c->rcv.nxt++;
1851                 len++;
1852
1853                 // Inform the application that the peer closed its end of the connection.
1854                 if(c->recv) {
1855                         errno = 0;
1856                         c->recv(c, NULL, 0);
1857                 }
1858         }
1859
1860         // Now we send something back if:
1861         // - we received data, so we have to send back an ACK
1862         //   -> sendatleastone = true
1863         // - or we got an ack, so we should maybe send a bit more data
1864         //   -> sendatleastone = false
1865
1866         if(is_reliable(c) || hdr.ctl & SYN || hdr.ctl & FIN) {
1867                 ack(c, has_data);
1868         }
1869
1870         return 0;
1871
1872 reset:
1873         swap_ports(&hdr);
1874         hdr.wnd = 0;
1875         hdr.aux = 0;
1876
1877         if(hdr.ctl & ACK) {
1878                 hdr.seq = hdr.ack;
1879                 hdr.ctl = RST;
1880         } else {
1881                 hdr.ack = hdr.seq + len;
1882                 hdr.seq = 0;
1883                 hdr.ctl = RST | ACK;
1884         }
1885
1886         print_packet(c, "send", &hdr, sizeof(hdr));
1887         utcp->send(utcp, &hdr, sizeof(hdr));
1888         return 0;
1889
1890 }
1891
1892 int utcp_shutdown(struct utcp_connection *c, int dir) {
1893         debug(c, "shutdown %d at %u\n", dir, c ? c->snd.last : 0);
1894
1895         if(!c) {
1896                 errno = EFAULT;
1897                 return -1;
1898         }
1899
1900         if(c->reapable) {
1901                 debug(c, "shutdown() called on closed connection\n");
1902                 errno = EBADF;
1903                 return -1;
1904         }
1905
1906         if(!(dir == UTCP_SHUT_RD || dir == UTCP_SHUT_WR || dir == UTCP_SHUT_RDWR)) {
1907                 errno = EINVAL;
1908                 return -1;
1909         }
1910
1911         // TCP does not have a provision for stopping incoming packets.
1912         // The best we can do is to just ignore them.
1913         if(dir == UTCP_SHUT_RD || dir == UTCP_SHUT_RDWR) {
1914                 c->recv = NULL;
1915         }
1916
1917         // The rest of the code deals with shutting down writes.
1918         if(dir == UTCP_SHUT_RD) {
1919                 return 0;
1920         }
1921
1922         // Only process shutting down writes once.
1923         if(c->shut_wr) {
1924                 return 0;
1925         }
1926
1927         c->shut_wr = true;
1928
1929         switch(c->state) {
1930         case CLOSED:
1931         case LISTEN:
1932                 errno = ENOTCONN;
1933                 return -1;
1934
1935         case SYN_SENT:
1936                 return 0;
1937
1938         case SYN_RECEIVED:
1939         case ESTABLISHED:
1940                 set_state(c, FIN_WAIT_1);
1941                 break;
1942
1943         case FIN_WAIT_1:
1944         case FIN_WAIT_2:
1945                 return 0;
1946
1947         case CLOSE_WAIT:
1948                 set_state(c, CLOSING);
1949                 break;
1950
1951         case CLOSING:
1952         case LAST_ACK:
1953         case TIME_WAIT:
1954                 return 0;
1955         }
1956
1957         c->snd.last++;
1958
1959         ack(c, false);
1960
1961         if(!timespec_isset(&c->rtrx_timeout)) {
1962                 start_retransmit_timer(c);
1963         }
1964
1965         return 0;
1966 }
1967
1968 static bool reset_connection(struct utcp_connection *c) {
1969         if(!c) {
1970                 errno = EFAULT;
1971                 return false;
1972         }
1973
1974         if(c->reapable) {
1975                 debug(c, "abort() called on closed connection\n");
1976                 errno = EBADF;
1977                 return false;
1978         }
1979
1980         c->recv = NULL;
1981         c->poll = NULL;
1982
1983         switch(c->state) {
1984         case CLOSED:
1985                 return true;
1986
1987         case LISTEN:
1988         case SYN_SENT:
1989         case CLOSING:
1990         case LAST_ACK:
1991         case TIME_WAIT:
1992                 set_state(c, CLOSED);
1993                 return true;
1994
1995         case SYN_RECEIVED:
1996         case ESTABLISHED:
1997         case FIN_WAIT_1:
1998         case FIN_WAIT_2:
1999         case CLOSE_WAIT:
2000                 set_state(c, CLOSED);
2001                 break;
2002         }
2003
2004         // Send RST
2005
2006         struct hdr hdr;
2007
2008         hdr.src = c->src;
2009         hdr.dst = c->dst;
2010         hdr.seq = c->snd.nxt;
2011         hdr.ack = 0;
2012         hdr.wnd = 0;
2013         hdr.ctl = RST;
2014
2015         print_packet(c, "send", &hdr, sizeof(hdr));
2016         c->utcp->send(c->utcp, &hdr, sizeof(hdr));
2017         return true;
2018 }
2019
2020 // Closes all the opened connections
2021 void utcp_abort_all_connections(struct utcp *utcp) {
2022         if(!utcp) {
2023                 errno = EINVAL;
2024                 return;
2025         }
2026
2027         for(int i = 0; i < utcp->nconnections; i++) {
2028                 struct utcp_connection *c = utcp->connections[i];
2029
2030                 if(c->reapable || c->state == CLOSED) {
2031                         continue;
2032                 }
2033
2034                 utcp_recv_t old_recv = c->recv;
2035                 utcp_poll_t old_poll = c->poll;
2036
2037                 reset_connection(c);
2038
2039                 if(old_recv) {
2040                         errno = 0;
2041                         old_recv(c, NULL, 0);
2042                 }
2043
2044                 if(old_poll && !c->reapable) {
2045                         errno = 0;
2046                         old_poll(c, 0);
2047                 }
2048         }
2049
2050         return;
2051 }
2052
2053 int utcp_close(struct utcp_connection *c) {
2054         if(c->rcvbuf.used) {
2055                 fprintf(stderr, "UTCP channel closed with stuff in receive buffer\n");
2056                 return reset_connection(c) ? 0 : -1;
2057         }
2058
2059         if(utcp_shutdown(c, SHUT_RDWR) && errno != ENOTCONN) {
2060                 return -1;
2061         }
2062
2063         c->recv = NULL;
2064         c->poll = NULL;
2065         c->reapable = true;
2066         return 0;
2067 }
2068
2069 int utcp_abort(struct utcp_connection *c) {
2070         if(!reset_connection(c)) {
2071                 return -1;
2072         }
2073
2074         c->reapable = true;
2075         return 0;
2076 }
2077
2078 /* Handle timeouts.
2079  * One call to this function will loop through all connections,
2080  * checking if something needs to be resent or not.
2081  * The return value is the time to the next timeout in milliseconds,
2082  * or maybe a negative value if the timeout is infinite.
2083  */
2084 struct timespec utcp_timeout(struct utcp *utcp) {
2085         struct timespec now;
2086         clock_gettime(UTCP_CLOCK, &now);
2087         struct timespec next = {now.tv_sec + 3600, now.tv_nsec};
2088
2089         for(int i = 0; i < utcp->nconnections; i++) {
2090                 struct utcp_connection *c = utcp->connections[i];
2091
2092                 if(!c) {
2093                         continue;
2094                 }
2095
2096                 // delete connections that have been utcp_close()d.
2097                 if(c->state == CLOSED) {
2098                         if(c->reapable) {
2099                                 debug(c, "reaping\n");
2100                                 free_connection(c);
2101                                 i--;
2102                         }
2103
2104                         continue;
2105                 }
2106
2107                 if(timespec_isset(&c->conn_timeout) && timespec_lt(&c->conn_timeout, &now)) {
2108                         errno = ETIMEDOUT;
2109                         c->state = CLOSED;
2110
2111                         if(c->recv) {
2112                                 c->recv(c, NULL, 0);
2113                         }
2114
2115                         if(c->poll && !c->reapable) {
2116                                 c->poll(c, 0);
2117                         }
2118
2119                         continue;
2120                 }
2121
2122                 if(timespec_isset(&c->rtrx_timeout) && timespec_lt(&c->rtrx_timeout, &now)) {
2123                         debug(c, "retransmitting after timeout\n");
2124                         retransmit(c);
2125                 }
2126
2127                 if(c->poll) {
2128                         if((c->state == ESTABLISHED || c->state == CLOSE_WAIT) && c->do_poll) {
2129                                 c->do_poll = false;
2130                                 uint32_t len = buffer_free(&c->sndbuf);
2131
2132                                 if(len) {
2133                                         c->poll(c, len);
2134                                 }
2135                         } else if(c->state == CLOSED) {
2136                                 c->poll(c, 0);
2137                         }
2138                 }
2139
2140                 if(timespec_isset(&c->conn_timeout) && timespec_lt(&c->conn_timeout, &next)) {
2141                         next = c->conn_timeout;
2142                 }
2143
2144                 if(timespec_isset(&c->rtrx_timeout) && timespec_lt(&c->rtrx_timeout, &next)) {
2145                         next = c->rtrx_timeout;
2146                 }
2147         }
2148
2149         struct timespec diff;
2150
2151         timespec_sub(&next, &now, &diff);
2152
2153         return diff;
2154 }
2155
2156 bool utcp_is_active(struct utcp *utcp) {
2157         if(!utcp) {
2158                 return false;
2159         }
2160
2161         for(int i = 0; i < utcp->nconnections; i++)
2162                 if(utcp->connections[i]->state != CLOSED && utcp->connections[i]->state != TIME_WAIT) {
2163                         return true;
2164                 }
2165
2166         return false;
2167 }
2168
2169 struct utcp *utcp_init(utcp_accept_t accept, utcp_pre_accept_t pre_accept, utcp_send_t send, void *priv) {
2170         if(!send) {
2171                 errno = EFAULT;
2172                 return NULL;
2173         }
2174
2175         struct utcp *utcp = calloc(1, sizeof(*utcp));
2176
2177         if(!utcp) {
2178                 return NULL;
2179         }
2180
2181         utcp_set_mtu(utcp, DEFAULT_MTU);
2182
2183         if(!utcp->pkt) {
2184                 free(utcp);
2185                 return NULL;
2186         }
2187
2188         if(!CLOCK_GRANULARITY) {
2189                 struct timespec res;
2190                 clock_getres(UTCP_CLOCK, &res);
2191                 CLOCK_GRANULARITY = res.tv_sec * USEC_PER_SEC + res.tv_nsec / 1000;
2192         }
2193
2194         utcp->accept = accept;
2195         utcp->pre_accept = pre_accept;
2196         utcp->send = send;
2197         utcp->priv = priv;
2198         utcp->timeout = DEFAULT_USER_TIMEOUT; // sec
2199
2200         return utcp;
2201 }
2202
2203 void utcp_exit(struct utcp *utcp) {
2204         if(!utcp) {
2205                 return;
2206         }
2207
2208         for(int i = 0; i < utcp->nconnections; i++) {
2209                 struct utcp_connection *c = utcp->connections[i];
2210
2211                 if(!c->reapable) {
2212                         if(c->recv) {
2213                                 c->recv(c, NULL, 0);
2214                         }
2215
2216                         if(c->poll && !c->reapable) {
2217                                 c->poll(c, 0);
2218                         }
2219                 }
2220
2221                 buffer_exit(&c->rcvbuf);
2222                 buffer_exit(&c->sndbuf);
2223                 free(c);
2224         }
2225
2226         free(utcp->connections);
2227         free(utcp->pkt);
2228         free(utcp);
2229 }
2230
2231 uint16_t utcp_get_mtu(struct utcp *utcp) {
2232         return utcp ? utcp->mtu : 0;
2233 }
2234
2235 uint16_t utcp_get_mss(struct utcp *utcp) {
2236         return utcp ? utcp->mss : 0;
2237 }
2238
2239 void utcp_set_mtu(struct utcp *utcp, uint16_t mtu) {
2240         if(!utcp) {
2241                 return;
2242         }
2243
2244         if(mtu <= sizeof(struct hdr)) {
2245                 return;
2246         }
2247
2248         if(mtu > utcp->mtu) {
2249                 char *new = realloc(utcp->pkt, mtu + sizeof(struct hdr));
2250
2251                 if(!new) {
2252                         return;
2253                 }
2254
2255                 utcp->pkt = new;
2256         }
2257
2258         utcp->mtu = mtu;
2259         utcp->mss = mtu - sizeof(struct hdr);
2260 }
2261
2262 void utcp_reset_timers(struct utcp *utcp) {
2263         if(!utcp) {
2264                 return;
2265         }
2266
2267         struct timespec now, then;
2268
2269         clock_gettime(UTCP_CLOCK, &now);
2270
2271         then = now;
2272
2273         then.tv_sec += utcp->timeout;
2274
2275         for(int i = 0; i < utcp->nconnections; i++) {
2276                 struct utcp_connection *c = utcp->connections[i];
2277
2278                 if(c->reapable) {
2279                         continue;
2280                 }
2281
2282                 if(timespec_isset(&c->rtrx_timeout)) {
2283                         c->rtrx_timeout = now;
2284                 }
2285
2286                 if(timespec_isset(&c->conn_timeout)) {
2287                         c->conn_timeout = then;
2288                 }
2289
2290                 c->rtt_start.tv_sec = 0;
2291
2292                 if(c->rto > START_RTO) {
2293                         c->rto = START_RTO;
2294                 }
2295         }
2296 }
2297
2298 int utcp_get_user_timeout(struct utcp *u) {
2299         return u ? u->timeout : 0;
2300 }
2301
2302 void utcp_set_user_timeout(struct utcp *u, int timeout) {
2303         if(u) {
2304                 u->timeout = timeout;
2305         }
2306 }
2307
2308 size_t utcp_get_sndbuf(struct utcp_connection *c) {
2309         return c ? c->sndbuf.maxsize : 0;
2310 }
2311
2312 size_t utcp_get_sndbuf_free(struct utcp_connection *c) {
2313         if(!c) {
2314                 return 0;
2315         }
2316
2317         switch(c->state) {
2318         case SYN_SENT:
2319         case SYN_RECEIVED:
2320         case ESTABLISHED:
2321         case CLOSE_WAIT:
2322                 return buffer_free(&c->sndbuf);
2323
2324         default:
2325                 return 0;
2326         }
2327 }
2328
2329 void utcp_set_sndbuf(struct utcp_connection *c, size_t size) {
2330         if(!c) {
2331                 return;
2332         }
2333
2334         c->sndbuf.maxsize = size;
2335
2336         if(c->sndbuf.maxsize != size) {
2337                 c->sndbuf.maxsize = -1;
2338         }
2339
2340         c->do_poll = is_reliable(c) && buffer_free(&c->sndbuf);
2341 }
2342
2343 size_t utcp_get_rcvbuf(struct utcp_connection *c) {
2344         return c ? c->rcvbuf.maxsize : 0;
2345 }
2346
2347 size_t utcp_get_rcvbuf_free(struct utcp_connection *c) {
2348         if(c && (c->state == ESTABLISHED || c->state == CLOSE_WAIT)) {
2349                 return buffer_free(&c->rcvbuf);
2350         } else {
2351                 return 0;
2352         }
2353 }
2354
2355 void utcp_set_rcvbuf(struct utcp_connection *c, size_t size) {
2356         if(!c) {
2357                 return;
2358         }
2359
2360         c->rcvbuf.maxsize = size;
2361
2362         if(c->rcvbuf.maxsize != size) {
2363                 c->rcvbuf.maxsize = -1;
2364         }
2365 }
2366
2367 size_t utcp_get_sendq(struct utcp_connection *c) {
2368         return c->sndbuf.used;
2369 }
2370
2371 size_t utcp_get_recvq(struct utcp_connection *c) {
2372         return c->rcvbuf.used;
2373 }
2374
2375 bool utcp_get_nodelay(struct utcp_connection *c) {
2376         return c ? c->nodelay : false;
2377 }
2378
2379 void utcp_set_nodelay(struct utcp_connection *c, bool nodelay) {
2380         if(c) {
2381                 c->nodelay = nodelay;
2382         }
2383 }
2384
2385 bool utcp_get_keepalive(struct utcp_connection *c) {
2386         return c ? c->keepalive : false;
2387 }
2388
2389 void utcp_set_keepalive(struct utcp_connection *c, bool keepalive) {
2390         if(c) {
2391                 c->keepalive = keepalive;
2392         }
2393 }
2394
2395 size_t utcp_get_outq(struct utcp_connection *c) {
2396         return c ? seqdiff(c->snd.nxt, c->snd.una) : 0;
2397 }
2398
2399 void utcp_set_recv_cb(struct utcp_connection *c, utcp_recv_t recv) {
2400         if(c) {
2401                 c->recv = recv;
2402         }
2403 }
2404
2405 void utcp_set_poll_cb(struct utcp_connection *c, utcp_poll_t poll) {
2406         if(c) {
2407                 c->poll = poll;
2408                 c->do_poll = is_reliable(c) && buffer_free(&c->sndbuf);
2409         }
2410 }
2411
2412 void utcp_set_accept_cb(struct utcp *utcp, utcp_accept_t accept, utcp_pre_accept_t pre_accept) {
2413         if(utcp) {
2414                 utcp->accept = accept;
2415                 utcp->pre_accept = pre_accept;
2416         }
2417 }
2418
2419 void utcp_expect_data(struct utcp_connection *c, bool expect) {
2420         if(!c || c->reapable) {
2421                 return;
2422         }
2423
2424         if(!(c->state == ESTABLISHED || c->state == FIN_WAIT_1 || c->state == FIN_WAIT_2)) {
2425                 return;
2426         }
2427
2428         if(expect) {
2429                 // If we expect data, start the connection timer.
2430                 if(!timespec_isset(&c->conn_timeout)) {
2431                         clock_gettime(UTCP_CLOCK, &c->conn_timeout);
2432                         c->conn_timeout.tv_sec += c->utcp->timeout;
2433                 }
2434         } else {
2435                 // If we want to cancel expecting data, only clear the timer when there is no unACKed data.
2436                 if(c->snd.una == c->snd.last) {
2437                         timespec_clear(&c->conn_timeout);
2438                 }
2439         }
2440 }
2441
2442 void utcp_offline(struct utcp *utcp, bool offline) {
2443         struct timespec now;
2444         clock_gettime(UTCP_CLOCK, &now);
2445
2446         for(int i = 0; i < utcp->nconnections; i++) {
2447                 struct utcp_connection *c = utcp->connections[i];
2448
2449                 if(c->reapable) {
2450                         continue;
2451                 }
2452
2453                 utcp_expect_data(c, offline);
2454
2455                 if(!offline) {
2456                         if(timespec_isset(&c->rtrx_timeout)) {
2457                                 c->rtrx_timeout = now;
2458                         }
2459
2460                         utcp->connections[i]->rtt_start.tv_sec = 0;
2461
2462                         if(c->rto > START_RTO) {
2463                                 c->rto = START_RTO;
2464                         }
2465                 }
2466         }
2467 }
2468
2469 void utcp_set_retransmit_cb(struct utcp *utcp, utcp_retransmit_t cb) {
2470         utcp->retransmit = cb;
2471 }
2472
2473 void utcp_set_clock_granularity(long granularity) {
2474         CLOCK_GRANULARITY = granularity;
2475 }