]> git.meshlink.io Git - utcp/blob - utcp.c
Fix RTT measurement.
[utcp] / utcp.c
1 /*
2     utcp.c -- Userspace TCP
3     Copyright (C) 2014-2017 Guus Sliepen <guus@tinc-vpn.org>
4
5     This program is free software; you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation; either version 2 of the License, or
8     (at your option) any later version.
9
10     This program is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14
15     You should have received a copy of the GNU General Public License along
16     with this program; if not, write to the Free Software Foundation, Inc.,
17     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #define _GNU_SOURCE
21
22 #include <assert.h>
23 #include <errno.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <stdint.h>
27 #include <stdbool.h>
28 #include <string.h>
29 #include <unistd.h>
30 #include <time.h>
31
32 #include "utcp_priv.h"
33
34 #ifndef EBADMSG
35 #define EBADMSG         104
36 #endif
37
38 #ifndef SHUT_RDWR
39 #define SHUT_RDWR 2
40 #endif
41
42 #ifdef poll
43 #undef poll
44 #endif
45
46 #ifndef UTCP_CLOCK
47 #if defined(CLOCK_MONOTONIC_RAW) && defined(__x86_64__)
48 #define UTCP_CLOCK CLOCK_MONOTONIC_RAW
49 #else
50 #define UTCP_CLOCK CLOCK_MONOTONIC
51 #endif
52 #endif
53
54 static void timespec_sub(const struct timespec *a, const struct timespec *b, struct timespec *r) {
55         r->tv_sec = a->tv_sec - b->tv_sec;
56         r->tv_nsec = a->tv_nsec - b->tv_nsec;
57
58         if(r->tv_nsec < 0) {
59                 r->tv_sec--, r->tv_nsec += NSEC_PER_SEC;
60         }
61 }
62
63 static int32_t timespec_diff_usec(const struct timespec *a, const struct timespec *b) {
64         return (a->tv_sec - b->tv_sec) * 1000000 + (a->tv_nsec - b->tv_nsec) / 1000;
65 }
66
67 static bool timespec_lt(const struct timespec *a, const struct timespec *b) {
68         if(a->tv_sec == b->tv_sec) {
69                 return a->tv_nsec < b->tv_nsec;
70         } else {
71                 return a->tv_sec < b->tv_sec;
72         }
73 }
74
75 static void timespec_clear(struct timespec *a) {
76         a->tv_sec = 0;
77 }
78
79 static bool timespec_isset(const struct timespec *a) {
80         return a->tv_sec;
81 }
82
83 static long CLOCK_GRANULARITY; // usec
84
85 static inline size_t min(size_t a, size_t b) {
86         return a < b ? a : b;
87 }
88
89 static inline size_t max(size_t a, size_t b) {
90         return a > b ? a : b;
91 }
92
93 #ifdef UTCP_DEBUG
94 #include <stdarg.h>
95
96 #ifndef UTCP_DEBUG_DATALEN
97 #define UTCP_DEBUG_DATALEN 20
98 #endif
99
100 static void debug(struct utcp_connection *c, const char *format, ...) {
101         struct timespec tv;
102         char buf[1024];
103         int len;
104
105         clock_gettime(CLOCK_REALTIME, &tv);
106         len = snprintf(buf, sizeof(buf), "%ld.%06lu %u:%u ", (long)tv.tv_sec, tv.tv_nsec / 1000, c ? c->src : 0, c ? c->dst : 0);
107         va_list ap;
108         va_start(ap, format);
109         len += vsnprintf(buf + len, sizeof(buf) - len, format, ap);
110         va_end(ap);
111
112         if(len > 0 && (size_t)len < sizeof(buf)) {
113                 fwrite(buf, len, 1, stderr);
114         }
115 }
116
117 static void print_packet(struct utcp_connection *c, const char *dir, const void *pkt, size_t len) {
118         struct hdr hdr;
119
120         if(len < sizeof(hdr)) {
121                 debug(c, "%s: short packet (%lu bytes)\n", dir, (unsigned long)len);
122                 return;
123         }
124
125         memcpy(&hdr, pkt, sizeof(hdr));
126
127         uint32_t datalen;
128
129         if(len > sizeof(hdr)) {
130                 datalen = min(len - sizeof(hdr), UTCP_DEBUG_DATALEN);
131         } else {
132                 datalen = 0;
133         }
134
135
136         const uint8_t *data = (uint8_t *)pkt + sizeof(hdr);
137         char str[datalen * 2 + 1];
138         char *p = str;
139
140         for(uint32_t i = 0; i < datalen; i++) {
141                 *p++ = "0123456789ABCDEF"[data[i] >> 4];
142                 *p++ = "0123456789ABCDEF"[data[i] & 15];
143         }
144
145         *p = 0;
146
147         debug(c, "%s: len %lu src %u dst %u seq %u ack %u wnd %u aux %x ctl %s%s%s%s%s data %s\n",
148               dir, (unsigned long)len, hdr.src, hdr.dst, hdr.seq, hdr.ack, hdr.wnd, hdr.aux,
149               hdr.ctl & SYN ? "SYN" : "",
150               hdr.ctl & RST ? "RST" : "",
151               hdr.ctl & FIN ? "FIN" : "",
152               hdr.ctl & ACK ? "ACK" : "",
153               hdr.ctl & MF ? "MF" : "",
154               str
155              );
156 }
157
158 static void debug_cwnd(struct utcp_connection *c) {
159         debug(c, "snd.cwnd %u snd.ssthresh %u\n", c->snd.cwnd, ~c->snd.ssthresh ? c->snd.ssthresh : 0);
160 }
161 #else
162 #define debug(...) do {} while(0)
163 #define print_packet(...) do {} while(0)
164 #define debug_cwnd(...) do {} while(0)
165 #endif
166
167 static void set_state(struct utcp_connection *c, enum state state) {
168         c->state = state;
169
170         if(state == ESTABLISHED) {
171                 timespec_clear(&c->conn_timeout);
172         }
173
174         debug(c, "state %s\n", strstate[state]);
175 }
176
177 static bool fin_wanted(struct utcp_connection *c, uint32_t seq) {
178         if(seq != c->snd.last) {
179                 return false;
180         }
181
182         switch(c->state) {
183         case FIN_WAIT_1:
184         case CLOSING:
185         case LAST_ACK:
186                 return true;
187
188         default:
189                 return false;
190         }
191 }
192
193 static bool is_reliable(struct utcp_connection *c) {
194         return c->flags & UTCP_RELIABLE;
195 }
196
197 static int32_t seqdiff(uint32_t a, uint32_t b) {
198         return a - b;
199 }
200
201 // Buffer functions
202 static bool buffer_wraps(struct buffer *buf) {
203         return buf->size - buf->offset < buf->used;
204 }
205
206 static bool buffer_resize(struct buffer *buf, uint32_t newsize) {
207         char *newdata = realloc(buf->data, newsize);
208
209         if(!newdata) {
210                 return false;
211         }
212
213         buf->data = newdata;
214
215         if(buffer_wraps(buf)) {
216                 // Shift the right part of the buffer until it hits the end of the new buffer.
217                 // Old situation:
218                 // [345......012]
219                 // New situation:
220                 // [345.........|........012]
221                 uint32_t tailsize = buf->size - buf->offset;
222                 uint32_t newoffset = newsize - tailsize;
223                 memmove(buf->data + newoffset, buf->data + buf->offset, tailsize);
224                 buf->offset = newoffset;
225         }
226
227         buf->size = newsize;
228         return true;
229 }
230
231 // Store data into the buffer
232 static ssize_t buffer_put_at(struct buffer *buf, size_t offset, const void *data, size_t len) {
233         debug(NULL, "buffer_put_at %lu %lu %lu\n", (unsigned long)buf->used, (unsigned long)offset, (unsigned long)len);
234
235         // Ensure we don't store more than maxsize bytes in total
236         size_t required = offset + len;
237
238         if(required > buf->maxsize) {
239                 if(offset >= buf->maxsize) {
240                         return 0;
241                 }
242
243                 len = buf->maxsize - offset;
244                 required = buf->maxsize;
245         }
246
247         // Check if we need to resize the buffer
248         if(required > buf->size) {
249                 size_t newsize = buf->size;
250
251                 if(!newsize) {
252                         newsize = 4096;
253                 }
254
255                 do {
256                         newsize *= 2;
257                 } while(newsize < required);
258
259                 if(newsize > buf->maxsize) {
260                         newsize = buf->maxsize;
261                 }
262
263                 if(!buffer_resize(buf, newsize)) {
264                         return -1;
265                 }
266         }
267
268         uint32_t realoffset = buf->offset + offset;
269
270         if(buf->size - buf->offset < offset) {
271                 // The offset wrapped
272                 realoffset -= buf->size;
273         }
274
275         if(buf->size - realoffset < len) {
276                 // The new chunk of data must be wrapped
277                 memcpy(buf->data + realoffset, data, buf->size - realoffset);
278                 memcpy(buf->data, (char *)data + buf->size - realoffset, len - (buf->size - realoffset));
279         } else {
280                 memcpy(buf->data + realoffset, data, len);
281         }
282
283         if(required > buf->used) {
284                 buf->used = required;
285         }
286
287         return len;
288 }
289
290 static ssize_t buffer_put(struct buffer *buf, const void *data, size_t len) {
291         return buffer_put_at(buf, buf->used, data, len);
292 }
293
294 // Copy data from the buffer without removing it.
295 static ssize_t buffer_copy(struct buffer *buf, void *data, size_t offset, size_t len) {
296         // Ensure we don't copy more than is actually stored in the buffer
297         if(offset >= buf->used) {
298                 return 0;
299         }
300
301         if(buf->used - offset < len) {
302                 len = buf->used - offset;
303         }
304
305         uint32_t realoffset = buf->offset + offset;
306
307         if(buf->size - buf->offset < offset) {
308                 // The offset wrapped
309                 realoffset -= buf->size;
310         }
311
312         if(buf->size - realoffset < len) {
313                 // The data is wrapped
314                 memcpy(data, buf->data + realoffset, buf->size - realoffset);
315                 memcpy((char *)data + buf->size - realoffset, buf->data, len - (buf->size - realoffset));
316         } else {
317                 memcpy(data, buf->data + realoffset, len);
318         }
319
320         return len;
321 }
322
323 // Copy data from the buffer without removing it.
324 static ssize_t buffer_call(struct buffer *buf, utcp_recv_t cb, void *arg, size_t offset, size_t len) {
325         // Ensure we don't copy more than is actually stored in the buffer
326         if(offset >= buf->used) {
327                 return 0;
328         }
329
330         if(buf->used - offset < len) {
331                 len = buf->used - offset;
332         }
333
334         uint32_t realoffset = buf->offset + offset;
335
336         if(buf->size - buf->offset < offset) {
337                 // The offset wrapped
338                 realoffset -= buf->size;
339         }
340
341         if(buf->size - realoffset < len) {
342                 // The data is wrapped
343                 ssize_t rx1 = cb(arg, buf->data + realoffset, buf->size - realoffset);
344
345                 if(rx1 < buf->size - realoffset) {
346                         return rx1;
347                 }
348
349                 ssize_t rx2 = cb(arg, buf->data, len - (buf->size - realoffset));
350
351                 if(rx2 < 0) {
352                         return rx2;
353                 } else {
354                         return rx1 + rx2;
355                 }
356         } else {
357                 return cb(arg, buf->data + realoffset, len);
358         }
359 }
360
361 // Discard data from the buffer.
362 static ssize_t buffer_discard(struct buffer *buf, size_t len) {
363         if(buf->used < len) {
364                 len = buf->used;
365         }
366
367         if(buf->size - buf->offset < len) {
368                 buf->offset -= buf->size;
369         }
370
371         if(buf->used == len) {
372                 buf->offset = 0;
373         } else {
374                 buf->offset += len;
375         }
376
377         buf->used -= len;
378
379         return len;
380 }
381
382 static void buffer_clear(struct buffer *buf) {
383         buf->used = 0;
384         buf->offset = 0;
385 }
386
387 static bool buffer_set_size(struct buffer *buf, uint32_t minsize, uint32_t maxsize) {
388         if(maxsize < minsize) {
389                 maxsize = minsize;
390         }
391
392         buf->maxsize = maxsize;
393
394         return buf->size >= minsize || buffer_resize(buf, minsize);
395 }
396
397 static void buffer_exit(struct buffer *buf) {
398         free(buf->data);
399         memset(buf, 0, sizeof(*buf));
400 }
401
402 static uint32_t buffer_free(const struct buffer *buf) {
403         return buf->maxsize - buf->used;
404 }
405
406 // Connections are stored in a sorted list.
407 // This gives O(log(N)) lookup time, O(N log(N)) insertion time and O(N) deletion time.
408
409 static int compare(const void *va, const void *vb) {
410         assert(va && vb);
411
412         const struct utcp_connection *a = *(struct utcp_connection **)va;
413         const struct utcp_connection *b = *(struct utcp_connection **)vb;
414
415         assert(a && b);
416         assert(a->src && b->src);
417
418         int c = (int)a->src - (int)b->src;
419
420         if(c) {
421                 return c;
422         }
423
424         c = (int)a->dst - (int)b->dst;
425         return c;
426 }
427
428 static struct utcp_connection *find_connection(const struct utcp *utcp, uint16_t src, uint16_t dst) {
429         if(!utcp->nconnections) {
430                 return NULL;
431         }
432
433         struct utcp_connection key = {
434                 .src = src,
435                 .dst = dst,
436         }, *keyp = &key;
437         struct utcp_connection **match = bsearch(&keyp, utcp->connections, utcp->nconnections, sizeof(*utcp->connections), compare);
438         return match ? *match : NULL;
439 }
440
441 static void free_connection(struct utcp_connection *c) {
442         struct utcp *utcp = c->utcp;
443         struct utcp_connection **cp = bsearch(&c, utcp->connections, utcp->nconnections, sizeof(*utcp->connections), compare);
444
445         assert(cp);
446
447         int i = cp - utcp->connections;
448         memmove(cp, cp + 1, (utcp->nconnections - i - 1) * sizeof(*cp));
449         utcp->nconnections--;
450
451         buffer_exit(&c->rcvbuf);
452         buffer_exit(&c->sndbuf);
453         free(c);
454 }
455
456 static struct utcp_connection *allocate_connection(struct utcp *utcp, uint16_t src, uint16_t dst) {
457         // Check whether this combination of src and dst is free
458
459         if(src) {
460                 if(find_connection(utcp, src, dst)) {
461                         errno = EADDRINUSE;
462                         return NULL;
463                 }
464         } else { // If src == 0, generate a random port number with the high bit set
465                 if(utcp->nconnections >= 32767) {
466                         errno = ENOMEM;
467                         return NULL;
468                 }
469
470                 src = rand() | 0x8000;
471
472                 while(find_connection(utcp, src, dst)) {
473                         src++;
474                 }
475         }
476
477         // Allocate memory for the new connection
478
479         if(utcp->nconnections >= utcp->nallocated) {
480                 if(!utcp->nallocated) {
481                         utcp->nallocated = 4;
482                 } else {
483                         utcp->nallocated *= 2;
484                 }
485
486                 struct utcp_connection **new_array = realloc(utcp->connections, utcp->nallocated * sizeof(*utcp->connections));
487
488                 if(!new_array) {
489                         return NULL;
490                 }
491
492                 utcp->connections = new_array;
493         }
494
495         struct utcp_connection *c = calloc(1, sizeof(*c));
496
497         if(!c) {
498                 return NULL;
499         }
500
501         if(!buffer_set_size(&c->sndbuf, DEFAULT_SNDBUFSIZE, DEFAULT_MAXSNDBUFSIZE)) {
502                 free(c);
503                 return NULL;
504         }
505
506         if(!buffer_set_size(&c->rcvbuf, DEFAULT_RCVBUFSIZE, DEFAULT_MAXRCVBUFSIZE)) {
507                 buffer_exit(&c->sndbuf);
508                 free(c);
509                 return NULL;
510         }
511
512         // Fill in the details
513
514         c->src = src;
515         c->dst = dst;
516 #ifdef UTCP_DEBUG
517         c->snd.iss = 0;
518 #else
519         c->snd.iss = rand();
520 #endif
521         c->snd.una = c->snd.iss;
522         c->snd.nxt = c->snd.iss + 1;
523         c->snd.last = c->snd.nxt;
524         c->snd.cwnd = (utcp->mss > 2190 ? 2 : utcp->mss > 1095 ? 3 : 4) * utcp->mss;
525         c->snd.ssthresh = ~0;
526         debug_cwnd(c);
527         c->utcp = utcp;
528
529         // Add it to the sorted list of connections
530
531         utcp->connections[utcp->nconnections++] = c;
532         qsort(utcp->connections, utcp->nconnections, sizeof(*utcp->connections), compare);
533
534         return c;
535 }
536
537 static inline uint32_t absdiff(uint32_t a, uint32_t b) {
538         if(a > b) {
539                 return a - b;
540         } else {
541                 return b - a;
542         }
543 }
544
545 // Update RTT variables. See RFC 6298.
546 static void update_rtt(struct utcp_connection *c, uint32_t rtt) {
547         if(!rtt) {
548                 debug(c, "invalid rtt\n");
549                 return;
550         }
551
552         struct utcp *utcp = c->utcp;
553
554         if(!utcp->srtt) {
555                 utcp->srtt = rtt;
556                 utcp->rttvar = rtt / 2;
557         } else {
558                 utcp->rttvar = (utcp->rttvar * 3 + absdiff(utcp->srtt, rtt)) / 4;
559                 utcp->srtt = (utcp->srtt * 7 + rtt) / 8;
560         }
561
562         utcp->rto = utcp->srtt + max(4 * utcp->rttvar, CLOCK_GRANULARITY);
563
564         if(utcp->rto > MAX_RTO) {
565                 utcp->rto = MAX_RTO;
566         }
567
568         debug(c, "rtt %u srtt %u rttvar %u rto %u\n", rtt, utcp->srtt, utcp->rttvar, utcp->rto);
569 }
570
571 static void start_retransmit_timer(struct utcp_connection *c) {
572         clock_gettime(UTCP_CLOCK, &c->rtrx_timeout);
573
574         uint32_t rto = c->utcp->rto;
575
576         while(rto > USEC_PER_SEC) {
577                 c->rtrx_timeout.tv_sec++;
578                 rto -= USEC_PER_SEC;
579         }
580
581         c->rtrx_timeout.tv_nsec += c->utcp->rto * 1000;
582
583         if(c->rtrx_timeout.tv_nsec >= NSEC_PER_SEC) {
584                 c->rtrx_timeout.tv_nsec -= NSEC_PER_SEC;
585                 c->rtrx_timeout.tv_sec++;
586         }
587
588         debug(c, "rtrx_timeout %ld.%06lu\n", c->rtrx_timeout.tv_sec, c->rtrx_timeout.tv_nsec);
589 }
590
591 static void stop_retransmit_timer(struct utcp_connection *c) {
592         timespec_clear(&c->rtrx_timeout);
593         debug(c, "rtrx_timeout cleared\n");
594 }
595
596 struct utcp_connection *utcp_connect_ex(struct utcp *utcp, uint16_t dst, utcp_recv_t recv, void *priv, uint32_t flags) {
597         struct utcp_connection *c = allocate_connection(utcp, 0, dst);
598
599         if(!c) {
600                 return NULL;
601         }
602
603         assert((flags & ~0x1f) == 0);
604
605         c->flags = flags;
606         c->recv = recv;
607         c->priv = priv;
608
609         struct {
610                 struct hdr hdr;
611                 uint8_t init[4];
612         } pkt;
613
614         pkt.hdr.src = c->src;
615         pkt.hdr.dst = c->dst;
616         pkt.hdr.seq = c->snd.iss;
617         pkt.hdr.ack = 0;
618         pkt.hdr.wnd = c->rcvbuf.maxsize;
619         pkt.hdr.ctl = SYN;
620         pkt.hdr.aux = 0x0101;
621         pkt.init[0] = 1;
622         pkt.init[1] = 0;
623         pkt.init[2] = 0;
624         pkt.init[3] = flags & 0x7;
625
626         set_state(c, SYN_SENT);
627
628         print_packet(c, "send", &pkt, sizeof(pkt));
629         utcp->send(utcp, &pkt, sizeof(pkt));
630
631         clock_gettime(UTCP_CLOCK, &c->conn_timeout);
632         c->conn_timeout.tv_sec += utcp->timeout;
633
634         start_retransmit_timer(c);
635
636         return c;
637 }
638
639 struct utcp_connection *utcp_connect(struct utcp *utcp, uint16_t dst, utcp_recv_t recv, void *priv) {
640         return utcp_connect_ex(utcp, dst, recv, priv, UTCP_TCP);
641 }
642
643 void utcp_accept(struct utcp_connection *c, utcp_recv_t recv, void *priv) {
644         if(c->reapable || c->state != SYN_RECEIVED) {
645                 debug(c, "accept() called on invalid connection in state %s\n", c, strstate[c->state]);
646                 return;
647         }
648
649         debug(c, "accepted %p %p\n", c, recv, priv);
650         c->recv = recv;
651         c->priv = priv;
652         set_state(c, ESTABLISHED);
653 }
654
655 static void ack(struct utcp_connection *c, bool sendatleastone) {
656         int32_t left = seqdiff(c->snd.last, c->snd.nxt);
657         int32_t cwndleft = is_reliable(c) ? min(c->snd.cwnd, c->snd.wnd) - seqdiff(c->snd.nxt, c->snd.una) : MAX_UNRELIABLE_SIZE;
658
659         assert(left >= 0);
660
661         if(cwndleft <= 0) {
662                 left = 0;
663         } else if(cwndleft < left) {
664                 left = cwndleft;
665
666                 if(!sendatleastone || cwndleft > c->utcp->mss) {
667                         left -= left % c->utcp->mss;
668                 }
669         }
670
671         debug(c, "cwndleft %d left %d\n", cwndleft, left);
672
673         if(!left && !sendatleastone) {
674                 return;
675         }
676
677         struct {
678                 struct hdr hdr;
679                 uint8_t data[];
680         } *pkt = c->utcp->pkt;
681
682         pkt->hdr.src = c->src;
683         pkt->hdr.dst = c->dst;
684         pkt->hdr.ack = c->rcv.nxt;
685         pkt->hdr.wnd = is_reliable(c) ? c->rcvbuf.maxsize : 0;
686         pkt->hdr.ctl = ACK;
687         pkt->hdr.aux = 0;
688
689         do {
690                 uint32_t seglen = left > c->utcp->mss ? c->utcp->mss : left;
691                 pkt->hdr.seq = c->snd.nxt;
692
693                 buffer_copy(&c->sndbuf, pkt->data, seqdiff(c->snd.nxt, c->snd.una), seglen);
694
695                 c->snd.nxt += seglen;
696                 left -= seglen;
697
698                 if(!is_reliable(c)) {
699                         if(left) {
700                                 pkt->hdr.ctl |= MF;
701                         } else {
702                                 pkt->hdr.ctl &= ~MF;
703                         }
704                 }
705
706                 if(seglen && fin_wanted(c, c->snd.nxt)) {
707                         seglen--;
708                         pkt->hdr.ctl |= FIN;
709                 }
710
711                 if(!c->rtt_start.tv_sec) {
712                         // Start RTT measurement
713                         clock_gettime(UTCP_CLOCK, &c->rtt_start);
714                         c->rtt_seq = pkt->hdr.seq + seglen;
715                         debug(c, "starting RTT measurement, expecting ack %u\n", c->rtt_seq);
716                 }
717
718                 print_packet(c, "send", pkt, sizeof(pkt->hdr) + seglen);
719                 c->utcp->send(c->utcp, pkt, sizeof(pkt->hdr) + seglen);
720
721                 if(left && !is_reliable(c)) {
722                         pkt->hdr.wnd += seglen;
723                 }
724         } while(left);
725 }
726
727 ssize_t utcp_send(struct utcp_connection *c, const void *data, size_t len) {
728         if(c->reapable) {
729                 debug(c, "send() called on closed connection\n");
730                 errno = EBADF;
731                 return -1;
732         }
733
734         switch(c->state) {
735         case CLOSED:
736         case LISTEN:
737                 debug(c, "send() called on unconnected connection\n");
738                 errno = ENOTCONN;
739                 return -1;
740
741         case SYN_SENT:
742         case SYN_RECEIVED:
743         case ESTABLISHED:
744         case CLOSE_WAIT:
745                 break;
746
747         case FIN_WAIT_1:
748         case FIN_WAIT_2:
749         case CLOSING:
750         case LAST_ACK:
751         case TIME_WAIT:
752                 debug(c, "send() called on closed connection\n");
753                 errno = EPIPE;
754                 return -1;
755         }
756
757         // Exit early if we have nothing to send.
758
759         if(!len) {
760                 return 0;
761         }
762
763         if(!data) {
764                 errno = EFAULT;
765                 return -1;
766         }
767
768         // Check if we need to be able to buffer all data
769
770         if(c->flags & UTCP_NO_PARTIAL) {
771                 if(len > buffer_free(&c->sndbuf)) {
772                         if(len > c->sndbuf.maxsize) {
773                                 errno = EMSGSIZE;
774                                 return -1;
775                         } else {
776                                 errno = EWOULDBLOCK;
777                                 return 0;
778                         }
779                 }
780         }
781
782         // Add data to send buffer.
783
784         if(is_reliable(c)) {
785                 len = buffer_put(&c->sndbuf, data, len);
786         } else if(c->state != SYN_SENT && c->state != SYN_RECEIVED) {
787                 if(len > MAX_UNRELIABLE_SIZE || buffer_put(&c->sndbuf, data, len) != (ssize_t)len) {
788                         errno = EMSGSIZE;
789                         return -1;
790                 }
791         } else {
792                 return 0;
793         }
794
795         if(len <= 0) {
796                 if(is_reliable(c)) {
797                         errno = EWOULDBLOCK;
798                         return 0;
799                 } else {
800                         return len;
801                 }
802         }
803
804         c->snd.last += len;
805
806         // Don't send anything yet if the connection has not fully established yet
807
808         if(c->state == SYN_SENT || c->state == SYN_RECEIVED) {
809                 return len;
810         }
811
812         ack(c, false);
813
814         if(!is_reliable(c)) {
815                 c->snd.una = c->snd.nxt = c->snd.last;
816                 buffer_discard(&c->sndbuf, c->sndbuf.used);
817                 c->do_poll = true;
818         }
819
820         if(is_reliable(c) && !timespec_isset(&c->rtrx_timeout)) {
821                 start_retransmit_timer(c);
822         }
823
824         if(is_reliable(c) && !timespec_isset(&c->conn_timeout)) {
825                 clock_gettime(UTCP_CLOCK, &c->conn_timeout);
826                 c->conn_timeout.tv_sec += c->utcp->timeout;
827         }
828
829         return len;
830 }
831
832 static void swap_ports(struct hdr *hdr) {
833         uint16_t tmp = hdr->src;
834         hdr->src = hdr->dst;
835         hdr->dst = tmp;
836 }
837
838 static void fast_retransmit(struct utcp_connection *c) {
839         if(c->state == CLOSED || c->snd.last == c->snd.una) {
840                 debug(c, "fast_retransmit() called but nothing to retransmit!\n");
841                 return;
842         }
843
844         struct utcp *utcp = c->utcp;
845
846         struct {
847                 struct hdr hdr;
848                 uint8_t data[];
849         } *pkt;
850
851         pkt = malloc(c->utcp->mtu);
852
853         if(!pkt) {
854                 return;
855         }
856
857         pkt->hdr.src = c->src;
858         pkt->hdr.dst = c->dst;
859         pkt->hdr.wnd = c->rcvbuf.maxsize;
860         pkt->hdr.aux = 0;
861
862         switch(c->state) {
863         case ESTABLISHED:
864         case FIN_WAIT_1:
865         case CLOSE_WAIT:
866         case CLOSING:
867         case LAST_ACK:
868                 // Send unacked data again.
869                 pkt->hdr.seq = c->snd.una;
870                 pkt->hdr.ack = c->rcv.nxt;
871                 pkt->hdr.ctl = ACK;
872                 uint32_t len = min(seqdiff(c->snd.last, c->snd.una), utcp->mss);
873
874                 if(fin_wanted(c, c->snd.una + len)) {
875                         len--;
876                         pkt->hdr.ctl |= FIN;
877                 }
878
879                 buffer_copy(&c->sndbuf, pkt->data, 0, len);
880                 print_packet(c, "rtrx", pkt, sizeof(pkt->hdr) + len);
881                 utcp->send(utcp, pkt, sizeof(pkt->hdr) + len);
882                 break;
883
884         default:
885                 break;
886         }
887
888         free(pkt);
889 }
890
891 static void retransmit(struct utcp_connection *c) {
892         if(c->state == CLOSED || c->snd.last == c->snd.una) {
893                 debug(c, "retransmit() called but nothing to retransmit!\n");
894                 stop_retransmit_timer(c);
895                 return;
896         }
897
898         struct utcp *utcp = c->utcp;
899
900         struct {
901                 struct hdr hdr;
902                 uint8_t data[];
903         } *pkt = c->utcp->pkt;
904
905         pkt->hdr.src = c->src;
906         pkt->hdr.dst = c->dst;
907         pkt->hdr.wnd = c->rcvbuf.maxsize;
908         pkt->hdr.aux = 0;
909
910         switch(c->state) {
911         case SYN_SENT:
912                 // Send our SYN again
913                 pkt->hdr.seq = c->snd.iss;
914                 pkt->hdr.ack = 0;
915                 pkt->hdr.ctl = SYN;
916                 pkt->hdr.aux = 0x0101;
917                 pkt->data[0] = 1;
918                 pkt->data[1] = 0;
919                 pkt->data[2] = 0;
920                 pkt->data[3] = c->flags & 0x7;
921                 print_packet(c, "rtrx", pkt, sizeof(pkt->hdr) + 4);
922                 utcp->send(utcp, pkt, sizeof(pkt->hdr) + 4);
923                 break;
924
925         case SYN_RECEIVED:
926                 // Send SYNACK again
927                 pkt->hdr.seq = c->snd.nxt;
928                 pkt->hdr.ack = c->rcv.nxt;
929                 pkt->hdr.ctl = SYN | ACK;
930                 print_packet(c, "rtrx", pkt, sizeof(pkt->hdr));
931                 utcp->send(utcp, pkt, sizeof(pkt->hdr));
932                 break;
933
934         case ESTABLISHED:
935         case FIN_WAIT_1:
936         case CLOSE_WAIT:
937         case CLOSING:
938         case LAST_ACK:
939                 // Send unacked data again.
940                 pkt->hdr.seq = c->snd.una;
941                 pkt->hdr.ack = c->rcv.nxt;
942                 pkt->hdr.ctl = ACK;
943                 uint32_t len = min(seqdiff(c->snd.last, c->snd.una), utcp->mss);
944
945                 if(fin_wanted(c, c->snd.una + len)) {
946                         len--;
947                         pkt->hdr.ctl |= FIN;
948                 }
949
950                 // RFC 5681 slow start after timeout
951                 uint32_t flightsize = seqdiff(c->snd.nxt, c->snd.una);
952                 c->snd.ssthresh = max(flightsize / 2, utcp->mss * 2); // eq. 4
953                 c->snd.cwnd = utcp->mss;
954                 debug_cwnd(c);
955
956                 buffer_copy(&c->sndbuf, pkt->data, 0, len);
957                 print_packet(c, "rtrx", pkt, sizeof(pkt->hdr) + len);
958                 utcp->send(utcp, pkt, sizeof(pkt->hdr) + len);
959
960                 c->snd.nxt = c->snd.una + len;
961                 break;
962
963         case CLOSED:
964         case LISTEN:
965         case TIME_WAIT:
966         case FIN_WAIT_2:
967                 // We shouldn't need to retransmit anything in this state.
968 #ifdef UTCP_DEBUG
969                 abort();
970 #endif
971                 stop_retransmit_timer(c);
972                 goto cleanup;
973         }
974
975         start_retransmit_timer(c);
976         utcp->rto *= 2;
977
978         if(utcp->rto > MAX_RTO) {
979                 utcp->rto = MAX_RTO;
980         }
981
982         c->rtt_start.tv_sec = 0; // invalidate RTT timer
983         c->dupack = 0; // cancel any ongoing fast recovery
984
985 cleanup:
986         return;
987 }
988
989 /* Update receive buffer and SACK entries after consuming data.
990  *
991  * Situation:
992  *
993  * |.....0000..1111111111.....22222......3333|
994  * |---------------^
995  *
996  * 0..3 represent the SACK entries. The ^ indicates up to which point we want
997  * to remove data from the receive buffer. The idea is to substract "len"
998  * from the offset of all the SACK entries, and then remove/cut down entries
999  * that are shifted to before the start of the receive buffer.
1000  *
1001  * There are three cases:
1002  * - the SACK entry is after ^, in that case just change the offset.
1003  * - the SACK entry starts before and ends after ^, so we have to
1004  *   change both its offset and size.
1005  * - the SACK entry is completely before ^, in that case delete it.
1006  */
1007 static void sack_consume(struct utcp_connection *c, size_t len) {
1008         debug(c, "sack_consume %lu\n", (unsigned long)len);
1009
1010         if(len > c->rcvbuf.used) {
1011                 debug(c, "all SACK entries consumed\n");
1012                 c->sacks[0].len = 0;
1013                 return;
1014         }
1015
1016         buffer_discard(&c->rcvbuf, len);
1017
1018         for(int i = 0; i < NSACKS && c->sacks[i].len;) {
1019                 if(len < c->sacks[i].offset) {
1020                         c->sacks[i].offset -= len;
1021                         i++;
1022                 } else if(len < c->sacks[i].offset + c->sacks[i].len) {
1023                         c->sacks[i].len -= len - c->sacks[i].offset;
1024                         c->sacks[i].offset = 0;
1025                         i++;
1026                 } else {
1027                         if(i < NSACKS - 1) {
1028                                 memmove(&c->sacks[i], &c->sacks[i + 1], (NSACKS - 1 - i) * sizeof(c->sacks)[i]);
1029                                 c->sacks[NSACKS - 1].len = 0;
1030                         } else {
1031                                 c->sacks[i].len = 0;
1032                                 break;
1033                         }
1034                 }
1035         }
1036
1037         for(int i = 0; i < NSACKS && c->sacks[i].len; i++) {
1038                 debug(c, "SACK[%d] offset %u len %u\n", i, c->sacks[i].offset, c->sacks[i].len);
1039         }
1040 }
1041
1042 static void handle_out_of_order(struct utcp_connection *c, uint32_t offset, const void *data, size_t len) {
1043         debug(c, "out of order packet, offset %u\n", offset);
1044         // Packet loss or reordering occured. Store the data in the buffer.
1045         ssize_t rxd = buffer_put_at(&c->rcvbuf, offset, data, len);
1046
1047         if(rxd < 0 || (size_t)rxd < len) {
1048                 abort();
1049         }
1050
1051         // Make note of where we put it.
1052         for(int i = 0; i < NSACKS; i++) {
1053                 if(!c->sacks[i].len) { // nothing to merge, add new entry
1054                         debug(c, "new SACK entry %d\n", i);
1055                         c->sacks[i].offset = offset;
1056                         c->sacks[i].len = rxd;
1057                         break;
1058                 } else if(offset < c->sacks[i].offset) {
1059                         if(offset + rxd < c->sacks[i].offset) { // insert before
1060                                 if(!c->sacks[NSACKS - 1].len) { // only if room left
1061                                         debug(c, "insert SACK entry at %d\n", i);
1062                                         memmove(&c->sacks[i + 1], &c->sacks[i], (NSACKS - i - 1) * sizeof(c->sacks)[i]);
1063                                         c->sacks[i].offset = offset;
1064                                         c->sacks[i].len = rxd;
1065                                 } else {
1066                                         debug(c, "SACK entries full, dropping packet\n");
1067                                 }
1068
1069                                 break;
1070                         } else { // merge
1071                                 debug(c, "merge with start of SACK entry at %d\n", i);
1072                                 c->sacks[i].offset = offset;
1073                                 break;
1074                         }
1075                 } else if(offset <= c->sacks[i].offset + c->sacks[i].len) {
1076                         if(offset + rxd > c->sacks[i].offset + c->sacks[i].len) { // merge
1077                                 debug(c, "merge with end of SACK entry at %d\n", i);
1078                                 c->sacks[i].len = offset + rxd - c->sacks[i].offset;
1079                                 // TODO: handle potential merge with next entry
1080                         }
1081
1082                         break;
1083                 }
1084         }
1085
1086         for(int i = 0; i < NSACKS && c->sacks[i].len; i++) {
1087                 debug(c, "SACK[%d] offset %u len %u\n", i, c->sacks[i].offset, c->sacks[i].len);
1088         }
1089 }
1090
1091 static void handle_in_order(struct utcp_connection *c, const void *data, size_t len) {
1092         if(c->recv) {
1093                 ssize_t rxd = c->recv(c, data, len);
1094
1095                 if(rxd != (ssize_t)len) {
1096                         // TODO: handle the application not accepting all data.
1097                         abort();
1098                 }
1099         }
1100
1101         // Check if we can process out-of-order data now.
1102         if(c->sacks[0].len && len >= c->sacks[0].offset) {
1103                 debug(c, "incoming packet len %lu connected with SACK at %u\n", (unsigned long)len, c->sacks[0].offset);
1104
1105                 if(len < c->sacks[0].offset + c->sacks[0].len) {
1106                         size_t offset = len;
1107                         len = c->sacks[0].offset + c->sacks[0].len;
1108                         size_t remainder = len - offset;
1109                         ssize_t rxd = buffer_call(&c->rcvbuf, c->recv, c, offset, remainder);
1110
1111                         if(rxd != (ssize_t)remainder) {
1112                                 // TODO: handle the application not accepting all data.
1113                                 abort();
1114                         }
1115                 }
1116         }
1117
1118         if(c->rcvbuf.used) {
1119                 sack_consume(c, len);
1120         }
1121
1122         c->rcv.nxt += len;
1123 }
1124
1125 static void handle_unreliable(struct utcp_connection *c, const struct hdr *hdr, const void *data, size_t len) {
1126         // Fast path for unfragmented packets
1127         if(!hdr->wnd && !(hdr->ctl & MF)) {
1128                 c->recv(c, data, len);
1129                 c->rcv.nxt = hdr->seq + len;
1130                 return;
1131         }
1132
1133         // Ensure reassembled packet are not larger than 64 kiB
1134         if(hdr->wnd >= MAX_UNRELIABLE_SIZE || hdr->wnd + len > MAX_UNRELIABLE_SIZE) {
1135                 return;
1136         }
1137
1138         // Don't accept out of order fragments
1139         if(hdr->wnd && hdr->seq != c->rcv.nxt) {
1140                 return;
1141         }
1142
1143         // Reset the receive buffer for the first fragment
1144         if(!hdr->wnd) {
1145                 buffer_clear(&c->rcvbuf);
1146         }
1147
1148         ssize_t rxd = buffer_put_at(&c->rcvbuf, hdr->wnd, data, len);
1149
1150         if(rxd != (ssize_t)len) {
1151                 return;
1152         }
1153
1154         // Send the packet if it's the final fragment
1155         if(!(hdr->ctl & MF)) {
1156                 buffer_call(&c->rcvbuf, c->recv, c, 0, hdr->wnd + len);
1157         }
1158
1159         c->rcv.nxt = hdr->seq + len;
1160 }
1161
1162 static void handle_incoming_data(struct utcp_connection *c, const struct hdr *hdr, const void *data, size_t len) {
1163         if(!is_reliable(c)) {
1164                 handle_unreliable(c, hdr, data, len);
1165                 return;
1166         }
1167
1168         uint32_t offset = seqdiff(hdr->seq, c->rcv.nxt);
1169
1170         if(offset + len > c->rcvbuf.maxsize) {
1171                 abort();
1172         }
1173
1174         if(offset) {
1175                 handle_out_of_order(c, offset, data, len);
1176         } else {
1177                 handle_in_order(c, data, len);
1178         }
1179 }
1180
1181
1182 ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) {
1183         const uint8_t *ptr = data;
1184
1185         if(!utcp) {
1186                 errno = EFAULT;
1187                 return -1;
1188         }
1189
1190         if(!len) {
1191                 return 0;
1192         }
1193
1194         if(!data) {
1195                 errno = EFAULT;
1196                 return -1;
1197         }
1198
1199         // Drop packets smaller than the header
1200
1201         struct hdr hdr;
1202
1203         if(len < sizeof(hdr)) {
1204                 print_packet(NULL, "recv", data, len);
1205                 errno = EBADMSG;
1206                 return -1;
1207         }
1208
1209         // Make a copy from the potentially unaligned data to a struct hdr
1210
1211         memcpy(&hdr, ptr, sizeof(hdr));
1212
1213         // Try to match the packet to an existing connection
1214
1215         struct utcp_connection *c = find_connection(utcp, hdr.dst, hdr.src);
1216         print_packet(c, "recv", data, len);
1217
1218         // Process the header
1219
1220         ptr += sizeof(hdr);
1221         len -= sizeof(hdr);
1222
1223         // Drop packets with an unknown CTL flag
1224
1225         if(hdr.ctl & ~(SYN | ACK | RST | FIN | MF)) {
1226                 print_packet(NULL, "recv", data, len);
1227                 errno = EBADMSG;
1228                 return -1;
1229         }
1230
1231         // Check for auxiliary headers
1232
1233         const uint8_t *init = NULL;
1234
1235         uint16_t aux = hdr.aux;
1236
1237         while(aux) {
1238                 size_t auxlen = 4 * (aux >> 8) & 0xf;
1239                 uint8_t auxtype = aux & 0xff;
1240
1241                 if(len < auxlen) {
1242                         errno = EBADMSG;
1243                         return -1;
1244                 }
1245
1246                 switch(auxtype) {
1247                 case AUX_INIT:
1248                         if(!(hdr.ctl & SYN) || auxlen != 4) {
1249                                 errno = EBADMSG;
1250                                 return -1;
1251                         }
1252
1253                         init = ptr;
1254                         break;
1255
1256                 default:
1257                         errno = EBADMSG;
1258                         return -1;
1259                 }
1260
1261                 len -= auxlen;
1262                 ptr += auxlen;
1263
1264                 if(!(aux & 0x800)) {
1265                         break;
1266                 }
1267
1268                 if(len < 2) {
1269                         errno = EBADMSG;
1270                         return -1;
1271                 }
1272
1273                 memcpy(&aux, ptr, 2);
1274                 len -= 2;
1275                 ptr += 2;
1276         }
1277
1278         bool has_data = len || (hdr.ctl & (SYN | FIN));
1279
1280         // Is it for a new connection?
1281
1282         if(!c) {
1283                 // Ignore RST packets
1284
1285                 if(hdr.ctl & RST) {
1286                         return 0;
1287                 }
1288
1289                 // Is it a SYN packet and are we LISTENing?
1290
1291                 if(hdr.ctl & SYN && !(hdr.ctl & ACK) && utcp->accept) {
1292                         // If we don't want to accept it, send a RST back
1293                         if((utcp->pre_accept && !utcp->pre_accept(utcp, hdr.dst))) {
1294                                 len = 1;
1295                                 goto reset;
1296                         }
1297
1298                         // Try to allocate memory, otherwise send a RST back
1299                         c = allocate_connection(utcp, hdr.dst, hdr.src);
1300
1301                         if(!c) {
1302                                 len = 1;
1303                                 goto reset;
1304                         }
1305
1306                         // Parse auxilliary information
1307                         if(init) {
1308                                 if(init[0] < 1) {
1309                                         len = 1;
1310                                         goto reset;
1311                                 }
1312
1313                                 c->flags = init[3] & 0x7;
1314                         } else {
1315                                 c->flags = UTCP_TCP;
1316                         }
1317
1318 synack:
1319                         // Return SYN+ACK, go to SYN_RECEIVED state
1320                         c->snd.wnd = hdr.wnd;
1321                         c->rcv.irs = hdr.seq;
1322                         c->rcv.nxt = c->rcv.irs + 1;
1323                         set_state(c, SYN_RECEIVED);
1324
1325                         struct {
1326                                 struct hdr hdr;
1327                                 uint8_t data[4];
1328                         } pkt;
1329
1330                         pkt.hdr.src = c->src;
1331                         pkt.hdr.dst = c->dst;
1332                         pkt.hdr.ack = c->rcv.irs + 1;
1333                         pkt.hdr.seq = c->snd.iss;
1334                         pkt.hdr.wnd = c->rcvbuf.maxsize;
1335                         pkt.hdr.ctl = SYN | ACK;
1336
1337                         if(init) {
1338                                 pkt.hdr.aux = 0x0101;
1339                                 pkt.data[0] = 1;
1340                                 pkt.data[1] = 0;
1341                                 pkt.data[2] = 0;
1342                                 pkt.data[3] = c->flags & 0x7;
1343                                 print_packet(c, "send", &pkt, sizeof(hdr) + 4);
1344                                 utcp->send(utcp, &pkt, sizeof(hdr) + 4);
1345                         } else {
1346                                 pkt.hdr.aux = 0;
1347                                 print_packet(c, "send", &pkt, sizeof(hdr));
1348                                 utcp->send(utcp, &pkt, sizeof(hdr));
1349                         }
1350                 } else {
1351                         // No, we don't want your packets, send a RST back
1352                         len = 1;
1353                         goto reset;
1354                 }
1355
1356                 return 0;
1357         }
1358
1359         debug(c, "state %s\n", strstate[c->state]);
1360
1361         // In case this is for a CLOSED connection, ignore the packet.
1362         // TODO: make it so incoming packets can never match a CLOSED connection.
1363
1364         if(c->state == CLOSED) {
1365                 debug(c, "got packet for closed connection\n");
1366                 return 0;
1367         }
1368
1369         // It is for an existing connection.
1370
1371         // 1. Drop invalid packets.
1372
1373         // 1a. Drop packets that should not happen in our current state.
1374
1375         switch(c->state) {
1376         case SYN_SENT:
1377         case SYN_RECEIVED:
1378         case ESTABLISHED:
1379         case FIN_WAIT_1:
1380         case FIN_WAIT_2:
1381         case CLOSE_WAIT:
1382         case CLOSING:
1383         case LAST_ACK:
1384         case TIME_WAIT:
1385                 break;
1386
1387         default:
1388 #ifdef UTCP_DEBUG
1389                 abort();
1390 #endif
1391                 break;
1392         }
1393
1394         // 1b. Discard data that is not in our receive window.
1395
1396         if(is_reliable(c)) {
1397                 bool acceptable;
1398
1399                 if(c->state == SYN_SENT) {
1400                         acceptable = true;
1401                 } else if(len == 0) {
1402                         acceptable = seqdiff(hdr.seq, c->rcv.nxt) >= 0;
1403                 } else {
1404                         int32_t rcv_offset = seqdiff(hdr.seq, c->rcv.nxt);
1405
1406                         // cut already accepted front overlapping
1407                         if(rcv_offset < 0) {
1408                                 acceptable = len > (size_t) - rcv_offset;
1409
1410                                 if(acceptable) {
1411                                         ptr -= rcv_offset;
1412                                         len += rcv_offset;
1413                                         hdr.seq -= rcv_offset;
1414                                 }
1415                         } else {
1416                                 acceptable = seqdiff(hdr.seq, c->rcv.nxt) >= 0 && seqdiff(hdr.seq, c->rcv.nxt) + len <= c->rcvbuf.maxsize;
1417                         }
1418                 }
1419
1420                 if(!acceptable) {
1421                         debug(c, "packet not acceptable, %u <= %u + %lu < %u\n", c->rcv.nxt, hdr.seq, (unsigned long)len, c->rcv.nxt + c->rcvbuf.maxsize);
1422
1423                         // Ignore unacceptable RST packets.
1424                         if(hdr.ctl & RST) {
1425                                 return 0;
1426                         }
1427
1428                         // Otherwise, continue processing.
1429                         len = 0;
1430                 }
1431         } else {
1432 #if UTCP_DEBUG
1433                 int32_t rcv_offset = seqdiff(hdr.seq, c->rcv.nxt);
1434
1435                 if(rcv_offset) {
1436                         debug(c, "packet out of order, offset %u bytes", rcv_offset);
1437                 }
1438
1439 #endif
1440         }
1441
1442         c->snd.wnd = hdr.wnd; // TODO: move below
1443
1444         // 1c. Drop packets with an invalid ACK.
1445         // ackno should not roll back, and it should also not be bigger than what we ever could have sent
1446         // (= snd.una + c->sndbuf.used).
1447
1448         if(!is_reliable(c)) {
1449                 if(hdr.ack != c->snd.last && c->state >= ESTABLISHED) {
1450                         hdr.ack = c->snd.una;
1451                 }
1452         }
1453
1454         if(hdr.ctl & ACK && (seqdiff(hdr.ack, c->snd.last) > 0 || seqdiff(hdr.ack, c->snd.una) < 0)) {
1455                 debug(c, "packet ack seqno out of range, %u <= %u < %u\n", c->snd.una, hdr.ack, c->snd.una + c->sndbuf.used);
1456
1457                 // Ignore unacceptable RST packets.
1458                 if(hdr.ctl & RST) {
1459                         return 0;
1460                 }
1461
1462                 goto reset;
1463         }
1464
1465         // 2. Handle RST packets
1466
1467         if(hdr.ctl & RST) {
1468                 switch(c->state) {
1469                 case SYN_SENT:
1470                         if(!(hdr.ctl & ACK)) {
1471                                 return 0;
1472                         }
1473
1474                         // The peer has refused our connection.
1475                         set_state(c, CLOSED);
1476                         errno = ECONNREFUSED;
1477
1478                         if(c->recv) {
1479                                 c->recv(c, NULL, 0);
1480                         }
1481
1482                         if(c->poll && !c->reapable) {
1483                                 c->poll(c, 0);
1484                         }
1485
1486                         return 0;
1487
1488                 case SYN_RECEIVED:
1489                         if(hdr.ctl & ACK) {
1490                                 return 0;
1491                         }
1492
1493                         // We haven't told the application about this connection yet. Silently delete.
1494                         free_connection(c);
1495                         return 0;
1496
1497                 case ESTABLISHED:
1498                 case FIN_WAIT_1:
1499                 case FIN_WAIT_2:
1500                 case CLOSE_WAIT:
1501                         if(hdr.ctl & ACK) {
1502                                 return 0;
1503                         }
1504
1505                         // The peer has aborted our connection.
1506                         set_state(c, CLOSED);
1507                         errno = ECONNRESET;
1508
1509                         if(c->recv) {
1510                                 c->recv(c, NULL, 0);
1511                         }
1512
1513                         if(c->poll && !c->reapable) {
1514                                 c->poll(c, 0);
1515                         }
1516
1517                         return 0;
1518
1519                 case CLOSING:
1520                 case LAST_ACK:
1521                 case TIME_WAIT:
1522                         if(hdr.ctl & ACK) {
1523                                 return 0;
1524                         }
1525
1526                         // As far as the application is concerned, the connection has already been closed.
1527                         // If it has called utcp_close() already, we can immediately free this connection.
1528                         if(c->reapable) {
1529                                 free_connection(c);
1530                                 return 0;
1531                         }
1532
1533                         // Otherwise, immediately move to the CLOSED state.
1534                         set_state(c, CLOSED);
1535                         return 0;
1536
1537                 default:
1538 #ifdef UTCP_DEBUG
1539                         abort();
1540 #endif
1541                         break;
1542                 }
1543         }
1544
1545         uint32_t advanced;
1546
1547         if(!(hdr.ctl & ACK)) {
1548                 advanced = 0;
1549                 goto skip_ack;
1550         }
1551
1552         // 3. Advance snd.una
1553
1554         advanced = seqdiff(hdr.ack, c->snd.una);
1555
1556         if(advanced) {
1557                 // RTT measurement
1558                 if(c->rtt_start.tv_sec) {
1559                         if(c->rtt_seq == hdr.ack) {
1560                                 struct timespec now;
1561                                 clock_gettime(UTCP_CLOCK, &now);
1562                                 int32_t diff = timespec_diff_usec(&now, &c->rtt_start);
1563                                 update_rtt(c, diff);
1564                                 c->rtt_start.tv_sec = 0;
1565                         } else if(c->rtt_seq < hdr.ack) {
1566                                 debug(c, "cancelling RTT measurement: %u < %u\n", c->rtt_seq, hdr.ack);
1567                                 c->rtt_start.tv_sec = 0;
1568                         }
1569                 }
1570
1571                 int32_t data_acked = advanced;
1572
1573                 switch(c->state) {
1574                 case SYN_SENT:
1575                 case SYN_RECEIVED:
1576                         data_acked--;
1577                         break;
1578
1579                 // TODO: handle FIN as well.
1580                 default:
1581                         break;
1582                 }
1583
1584                 assert(data_acked >= 0);
1585
1586 #ifndef NDEBUG
1587                 int32_t bufused = seqdiff(c->snd.last, c->snd.una);
1588                 assert(data_acked <= bufused);
1589 #endif
1590
1591                 if(data_acked) {
1592                         buffer_discard(&c->sndbuf, data_acked);
1593                         c->do_poll = true;
1594                 }
1595
1596                 // Also advance snd.nxt if possible
1597                 if(seqdiff(c->snd.nxt, hdr.ack) < 0) {
1598                         c->snd.nxt = hdr.ack;
1599                 }
1600
1601                 c->snd.una = hdr.ack;
1602
1603                 if(c->dupack) {
1604                         if(c->dupack >= 3) {
1605                                 debug(c, "fast recovery ended\n");
1606                                 c->snd.cwnd = c->snd.ssthresh;
1607                         }
1608
1609                         c->dupack = 0;
1610                 }
1611
1612                 // Increase the congestion window according to RFC 5681
1613                 if(c->snd.cwnd < c->snd.ssthresh) {
1614                         c->snd.cwnd += min(advanced, utcp->mss); // eq. 2
1615                 } else {
1616                         c->snd.cwnd += max(1, (utcp->mss * utcp->mss) / c->snd.cwnd); // eq. 3
1617                 }
1618
1619                 if(c->snd.cwnd > c->sndbuf.maxsize) {
1620                         c->snd.cwnd = c->sndbuf.maxsize;
1621                 }
1622
1623                 debug_cwnd(c);
1624
1625                 // Check if we have sent a FIN that is now ACKed.
1626                 switch(c->state) {
1627                 case FIN_WAIT_1:
1628                         if(c->snd.una == c->snd.last) {
1629                                 set_state(c, FIN_WAIT_2);
1630                         }
1631
1632                         break;
1633
1634                 case CLOSING:
1635                         if(c->snd.una == c->snd.last) {
1636                                 clock_gettime(UTCP_CLOCK, &c->conn_timeout);
1637                                 c->conn_timeout.tv_sec += utcp->timeout;
1638                                 set_state(c, TIME_WAIT);
1639                         }
1640
1641                         break;
1642
1643                 default:
1644                         break;
1645                 }
1646         } else {
1647                 if(!len && is_reliable(c) && c->snd.una != c->snd.last) {
1648                         c->dupack++;
1649                         debug(c, "duplicate ACK %d\n", c->dupack);
1650
1651                         if(c->dupack == 3) {
1652                                 // RFC 5681 fast recovery
1653                                 debug(c, "fast recovery started\n", c->dupack);
1654                                 uint32_t flightsize = seqdiff(c->snd.nxt, c->snd.una);
1655                                 c->snd.ssthresh = max(flightsize / 2, utcp->mss * 2); // eq. 4
1656                                 c->snd.cwnd = min(c->snd.ssthresh + 3 * utcp->mss, c->sndbuf.maxsize);
1657
1658                                 if(c->snd.cwnd > c->sndbuf.maxsize) {
1659                                         c->snd.cwnd = c->sndbuf.maxsize;
1660                                 }
1661
1662                                 debug_cwnd(c);
1663
1664                                 fast_retransmit(c);
1665                         } else if(c->dupack > 3) {
1666                                 c->snd.cwnd += utcp->mss;
1667
1668                                 if(c->snd.cwnd > c->sndbuf.maxsize) {
1669                                         c->snd.cwnd = c->sndbuf.maxsize;
1670                                 }
1671
1672                                 debug_cwnd(c);
1673                         }
1674
1675                         // We got an ACK which indicates the other side did get one of our packets.
1676                         // Reset the retransmission timer to avoid going to slow start,
1677                         // but don't touch the connection timeout.
1678                         start_retransmit_timer(c);
1679                 }
1680         }
1681
1682         // 4. Update timers
1683
1684         if(advanced) {
1685                 if(c->snd.una == c->snd.last) {
1686                         stop_retransmit_timer(c);
1687                         timespec_clear(&c->conn_timeout);
1688                 } else if(is_reliable(c)) {
1689                         start_retransmit_timer(c);
1690                         clock_gettime(UTCP_CLOCK, &c->conn_timeout);
1691                         c->conn_timeout.tv_sec += utcp->timeout;
1692                 }
1693         }
1694
1695 skip_ack:
1696         // 5. Process SYN stuff
1697
1698         if(hdr.ctl & SYN) {
1699                 switch(c->state) {
1700                 case SYN_SENT:
1701
1702                         // This is a SYNACK. It should always have ACKed the SYN.
1703                         if(!advanced) {
1704                                 goto reset;
1705                         }
1706
1707                         c->rcv.irs = hdr.seq;
1708                         c->rcv.nxt = hdr.seq;
1709
1710                         if(c->shut_wr) {
1711                                 c->snd.last++;
1712                                 set_state(c, FIN_WAIT_1);
1713                         } else {
1714                                 set_state(c, ESTABLISHED);
1715                         }
1716
1717                         // TODO: notify application of this somehow.
1718                         break;
1719
1720                 case SYN_RECEIVED:
1721                         // This is a retransmit of a SYN, send back the SYNACK.
1722                         goto synack;
1723
1724                 case ESTABLISHED:
1725                 case FIN_WAIT_1:
1726                 case FIN_WAIT_2:
1727                 case CLOSE_WAIT:
1728                 case CLOSING:
1729                 case LAST_ACK:
1730                 case TIME_WAIT:
1731                         // Ehm, no. We should never receive a second SYN.
1732                         return 0;
1733
1734                 default:
1735 #ifdef UTCP_DEBUG
1736                         abort();
1737 #endif
1738                         return 0;
1739                 }
1740
1741                 // SYN counts as one sequence number
1742                 c->rcv.nxt++;
1743         }
1744
1745         // 6. Process new data
1746
1747         if(c->state == SYN_RECEIVED) {
1748                 // This is the ACK after the SYNACK. It should always have ACKed the SYNACK.
1749                 if(!advanced) {
1750                         goto reset;
1751                 }
1752
1753                 // Are we still LISTENing?
1754                 if(utcp->accept) {
1755                         utcp->accept(c, c->src);
1756                 }
1757
1758                 if(c->state != ESTABLISHED) {
1759                         set_state(c, CLOSED);
1760                         c->reapable = true;
1761                         goto reset;
1762                 }
1763         }
1764
1765         if(len) {
1766                 switch(c->state) {
1767                 case SYN_SENT:
1768                 case SYN_RECEIVED:
1769                         // This should never happen.
1770 #ifdef UTCP_DEBUG
1771                         abort();
1772 #endif
1773                         return 0;
1774
1775                 case ESTABLISHED:
1776                 case FIN_WAIT_1:
1777                 case FIN_WAIT_2:
1778                         break;
1779
1780                 case CLOSE_WAIT:
1781                 case CLOSING:
1782                 case LAST_ACK:
1783                 case TIME_WAIT:
1784                         // Ehm no, We should never receive more data after a FIN.
1785                         goto reset;
1786
1787                 default:
1788 #ifdef UTCP_DEBUG
1789                         abort();
1790 #endif
1791                         return 0;
1792                 }
1793
1794                 handle_incoming_data(c, &hdr, ptr, len);
1795         }
1796
1797         // 7. Process FIN stuff
1798
1799         if((hdr.ctl & FIN) && (!is_reliable(c) || hdr.seq + len == c->rcv.nxt)) {
1800                 switch(c->state) {
1801                 case SYN_SENT:
1802                 case SYN_RECEIVED:
1803                         // This should never happen.
1804 #ifdef UTCP_DEBUG
1805                         abort();
1806 #endif
1807                         break;
1808
1809                 case ESTABLISHED:
1810                         set_state(c, CLOSE_WAIT);
1811                         break;
1812
1813                 case FIN_WAIT_1:
1814                         set_state(c, CLOSING);
1815                         break;
1816
1817                 case FIN_WAIT_2:
1818                         clock_gettime(UTCP_CLOCK, &c->conn_timeout);
1819                         c->conn_timeout.tv_sec += utcp->timeout;
1820                         set_state(c, TIME_WAIT);
1821                         break;
1822
1823                 case CLOSE_WAIT:
1824                 case CLOSING:
1825                 case LAST_ACK:
1826                 case TIME_WAIT:
1827                         // Ehm, no. We should never receive a second FIN.
1828                         goto reset;
1829
1830                 default:
1831 #ifdef UTCP_DEBUG
1832                         abort();
1833 #endif
1834                         break;
1835                 }
1836
1837                 // FIN counts as one sequence number
1838                 c->rcv.nxt++;
1839                 len++;
1840
1841                 // Inform the application that the peer closed its end of the connection.
1842                 if(c->recv) {
1843                         errno = 0;
1844                         c->recv(c, NULL, 0);
1845                 }
1846         }
1847
1848         // Now we send something back if:
1849         // - we received data, so we have to send back an ACK
1850         //   -> sendatleastone = true
1851         // - or we got an ack, so we should maybe send a bit more data
1852         //   -> sendatleastone = false
1853
1854         if(is_reliable(c) || hdr.ctl & SYN || hdr.ctl & FIN) {
1855                 ack(c, has_data);
1856         }
1857
1858         return 0;
1859
1860 reset:
1861         swap_ports(&hdr);
1862         hdr.wnd = 0;
1863         hdr.aux = 0;
1864
1865         if(hdr.ctl & ACK) {
1866                 hdr.seq = hdr.ack;
1867                 hdr.ctl = RST;
1868         } else {
1869                 hdr.ack = hdr.seq + len;
1870                 hdr.seq = 0;
1871                 hdr.ctl = RST | ACK;
1872         }
1873
1874         print_packet(c, "send", &hdr, sizeof(hdr));
1875         utcp->send(utcp, &hdr, sizeof(hdr));
1876         return 0;
1877
1878 }
1879
1880 int utcp_shutdown(struct utcp_connection *c, int dir) {
1881         debug(c, "shutdown %d at %u\n", dir, c ? c->snd.last : 0);
1882
1883         if(!c) {
1884                 errno = EFAULT;
1885                 return -1;
1886         }
1887
1888         if(c->reapable) {
1889                 debug(c, "shutdown() called on closed connection\n");
1890                 errno = EBADF;
1891                 return -1;
1892         }
1893
1894         if(!(dir == UTCP_SHUT_RD || dir == UTCP_SHUT_WR || dir == UTCP_SHUT_RDWR)) {
1895                 errno = EINVAL;
1896                 return -1;
1897         }
1898
1899         // TCP does not have a provision for stopping incoming packets.
1900         // The best we can do is to just ignore them.
1901         if(dir == UTCP_SHUT_RD || dir == UTCP_SHUT_RDWR) {
1902                 c->recv = NULL;
1903         }
1904
1905         // The rest of the code deals with shutting down writes.
1906         if(dir == UTCP_SHUT_RD) {
1907                 return 0;
1908         }
1909
1910         // Only process shutting down writes once.
1911         if(c->shut_wr) {
1912                 return 0;
1913         }
1914
1915         c->shut_wr = true;
1916
1917         switch(c->state) {
1918         case CLOSED:
1919         case LISTEN:
1920                 errno = ENOTCONN;
1921                 return -1;
1922
1923         case SYN_SENT:
1924                 return 0;
1925
1926         case SYN_RECEIVED:
1927         case ESTABLISHED:
1928                 set_state(c, FIN_WAIT_1);
1929                 break;
1930
1931         case FIN_WAIT_1:
1932         case FIN_WAIT_2:
1933                 return 0;
1934
1935         case CLOSE_WAIT:
1936                 set_state(c, CLOSING);
1937                 break;
1938
1939         case CLOSING:
1940         case LAST_ACK:
1941         case TIME_WAIT:
1942                 return 0;
1943         }
1944
1945         c->snd.last++;
1946
1947         ack(c, false);
1948
1949         if(!timespec_isset(&c->rtrx_timeout)) {
1950                 start_retransmit_timer(c);
1951         }
1952
1953         return 0;
1954 }
1955
1956 static bool reset_connection(struct utcp_connection *c) {
1957         if(!c) {
1958                 errno = EFAULT;
1959                 return false;
1960         }
1961
1962         if(c->reapable) {
1963                 debug(c, "abort() called on closed connection\n");
1964                 errno = EBADF;
1965                 return false;
1966         }
1967
1968         c->recv = NULL;
1969         c->poll = NULL;
1970
1971         switch(c->state) {
1972         case CLOSED:
1973                 return true;
1974
1975         case LISTEN:
1976         case SYN_SENT:
1977         case CLOSING:
1978         case LAST_ACK:
1979         case TIME_WAIT:
1980                 set_state(c, CLOSED);
1981                 return true;
1982
1983         case SYN_RECEIVED:
1984         case ESTABLISHED:
1985         case FIN_WAIT_1:
1986         case FIN_WAIT_2:
1987         case CLOSE_WAIT:
1988                 set_state(c, CLOSED);
1989                 break;
1990         }
1991
1992         // Send RST
1993
1994         struct hdr hdr;
1995
1996         hdr.src = c->src;
1997         hdr.dst = c->dst;
1998         hdr.seq = c->snd.nxt;
1999         hdr.ack = 0;
2000         hdr.wnd = 0;
2001         hdr.ctl = RST;
2002
2003         print_packet(c, "send", &hdr, sizeof(hdr));
2004         c->utcp->send(c->utcp, &hdr, sizeof(hdr));
2005         return true;
2006 }
2007
2008 // Closes all the opened connections
2009 void utcp_abort_all_connections(struct utcp *utcp) {
2010         if(!utcp) {
2011                 errno = EINVAL;
2012                 return;
2013         }
2014
2015         for(int i = 0; i < utcp->nconnections; i++) {
2016                 struct utcp_connection *c = utcp->connections[i];
2017
2018                 if(c->reapable || c->state == CLOSED) {
2019                         continue;
2020                 }
2021
2022                 utcp_recv_t old_recv = c->recv;
2023                 utcp_poll_t old_poll = c->poll;
2024
2025                 reset_connection(c);
2026
2027                 if(old_recv) {
2028                         errno = 0;
2029                         old_recv(c, NULL, 0);
2030                 }
2031
2032                 if(old_poll && !c->reapable) {
2033                         errno = 0;
2034                         old_poll(c, 0);
2035                 }
2036         }
2037
2038         return;
2039 }
2040
2041 int utcp_close(struct utcp_connection *c) {
2042         if(utcp_shutdown(c, SHUT_RDWR) && errno != ENOTCONN) {
2043                 return -1;
2044         }
2045
2046         c->recv = NULL;
2047         c->poll = NULL;
2048         c->reapable = true;
2049         return 0;
2050 }
2051
2052 int utcp_abort(struct utcp_connection *c) {
2053         if(!reset_connection(c)) {
2054                 return -1;
2055         }
2056
2057         c->reapable = true;
2058         return 0;
2059 }
2060
2061 /* Handle timeouts.
2062  * One call to this function will loop through all connections,
2063  * checking if something needs to be resent or not.
2064  * The return value is the time to the next timeout in milliseconds,
2065  * or maybe a negative value if the timeout is infinite.
2066  */
2067 struct timespec utcp_timeout(struct utcp *utcp) {
2068         struct timespec now;
2069         clock_gettime(UTCP_CLOCK, &now);
2070         struct timespec next = {now.tv_sec + 3600, now.tv_nsec};
2071
2072         for(int i = 0; i < utcp->nconnections; i++) {
2073                 struct utcp_connection *c = utcp->connections[i];
2074
2075                 if(!c) {
2076                         continue;
2077                 }
2078
2079                 // delete connections that have been utcp_close()d.
2080                 if(c->state == CLOSED) {
2081                         if(c->reapable) {
2082                                 debug(c, "reaping\n");
2083                                 free_connection(c);
2084                                 i--;
2085                         }
2086
2087                         continue;
2088                 }
2089
2090                 if(timespec_isset(&c->conn_timeout) && timespec_lt(&c->conn_timeout, &now)) {
2091                         errno = ETIMEDOUT;
2092                         c->state = CLOSED;
2093
2094                         if(c->recv) {
2095                                 c->recv(c, NULL, 0);
2096                         }
2097
2098                         if(c->poll && !c->reapable) {
2099                                 c->poll(c, 0);
2100                         }
2101
2102                         continue;
2103                 }
2104
2105                 if(timespec_isset(&c->rtrx_timeout) && timespec_lt(&c->rtrx_timeout, &now)) {
2106                         debug(c, "retransmitting after timeout\n");
2107                         retransmit(c);
2108                 }
2109
2110                 if(c->poll) {
2111                         if((c->state == ESTABLISHED || c->state == CLOSE_WAIT) && c->do_poll) {
2112                                 c->do_poll = false;
2113                                 uint32_t len = buffer_free(&c->sndbuf);
2114
2115                                 if(len) {
2116                                         c->poll(c, len);
2117                                 }
2118                         } else if(c->state == CLOSED) {
2119                                 c->poll(c, 0);
2120                         }
2121                 }
2122
2123                 if(timespec_isset(&c->conn_timeout) && timespec_lt(&c->conn_timeout, &next)) {
2124                         next = c->conn_timeout;
2125                 }
2126
2127                 if(timespec_isset(&c->rtrx_timeout) && timespec_lt(&c->rtrx_timeout, &next)) {
2128                         next = c->rtrx_timeout;
2129                 }
2130         }
2131
2132         struct timespec diff;
2133
2134         timespec_sub(&next, &now, &diff);
2135
2136         return diff;
2137 }
2138
2139 bool utcp_is_active(struct utcp *utcp) {
2140         if(!utcp) {
2141                 return false;
2142         }
2143
2144         for(int i = 0; i < utcp->nconnections; i++)
2145                 if(utcp->connections[i]->state != CLOSED && utcp->connections[i]->state != TIME_WAIT) {
2146                         return true;
2147                 }
2148
2149         return false;
2150 }
2151
2152 struct utcp *utcp_init(utcp_accept_t accept, utcp_pre_accept_t pre_accept, utcp_send_t send, void *priv) {
2153         if(!send) {
2154                 errno = EFAULT;
2155                 return NULL;
2156         }
2157
2158         struct utcp *utcp = calloc(1, sizeof(*utcp));
2159
2160         if(!utcp) {
2161                 return NULL;
2162         }
2163
2164         if(!CLOCK_GRANULARITY) {
2165                 struct timespec res;
2166                 clock_getres(UTCP_CLOCK, &res);
2167                 CLOCK_GRANULARITY = res.tv_sec * USEC_PER_SEC + res.tv_nsec / 1000;
2168         }
2169
2170         utcp->accept = accept;
2171         utcp->pre_accept = pre_accept;
2172         utcp->send = send;
2173         utcp->priv = priv;
2174         utcp_set_mtu(utcp, DEFAULT_MTU);
2175         utcp->timeout = DEFAULT_USER_TIMEOUT; // sec
2176         utcp->rto = START_RTO; // usec
2177
2178         return utcp;
2179 }
2180
2181 void utcp_exit(struct utcp *utcp) {
2182         if(!utcp) {
2183                 return;
2184         }
2185
2186         for(int i = 0; i < utcp->nconnections; i++) {
2187                 struct utcp_connection *c = utcp->connections[i];
2188
2189                 if(!c->reapable) {
2190                         if(c->recv) {
2191                                 c->recv(c, NULL, 0);
2192                         }
2193
2194                         if(c->poll && !c->reapable) {
2195                                 c->poll(c, 0);
2196                         }
2197                 }
2198
2199                 buffer_exit(&c->rcvbuf);
2200                 buffer_exit(&c->sndbuf);
2201                 free(c);
2202         }
2203
2204         free(utcp->connections);
2205         free(utcp->pkt);
2206         free(utcp);
2207 }
2208
2209 uint16_t utcp_get_mtu(struct utcp *utcp) {
2210         return utcp ? utcp->mtu : 0;
2211 }
2212
2213 uint16_t utcp_get_mss(struct utcp *utcp) {
2214         return utcp ? utcp->mss : 0;
2215 }
2216
2217 void utcp_set_mtu(struct utcp *utcp, uint16_t mtu) {
2218         if(!utcp) {
2219                 return;
2220         }
2221
2222         if(mtu <= sizeof(struct hdr)) {
2223                 return;
2224         }
2225
2226         if(mtu > utcp->mtu) {
2227                 char *new = realloc(utcp->pkt, mtu + sizeof(struct hdr));
2228
2229                 if(!new) {
2230                         return;
2231                 }
2232
2233                 utcp->pkt = new;
2234         }
2235
2236         utcp->mtu = mtu;
2237         utcp->mss = mtu - sizeof(struct hdr);
2238 }
2239
2240 void utcp_reset_timers(struct utcp *utcp) {
2241         if(!utcp) {
2242                 return;
2243         }
2244
2245         struct timespec now, then;
2246
2247         clock_gettime(UTCP_CLOCK, &now);
2248
2249         then = now;
2250
2251         then.tv_sec += utcp->timeout;
2252
2253         for(int i = 0; i < utcp->nconnections; i++) {
2254                 struct utcp_connection *c = utcp->connections[i];
2255
2256                 if(c->reapable) {
2257                         continue;
2258                 }
2259
2260                 if(timespec_isset(&c->rtrx_timeout)) {
2261                         c->rtrx_timeout = now;
2262                 }
2263
2264                 if(timespec_isset(&c->conn_timeout)) {
2265                         c->conn_timeout = then;
2266                 }
2267
2268                 c->rtt_start.tv_sec = 0;
2269         }
2270
2271         if(utcp->rto > START_RTO) {
2272                 utcp->rto = START_RTO;
2273         }
2274 }
2275
2276 int utcp_get_user_timeout(struct utcp *u) {
2277         return u ? u->timeout : 0;
2278 }
2279
2280 void utcp_set_user_timeout(struct utcp *u, int timeout) {
2281         if(u) {
2282                 u->timeout = timeout;
2283         }
2284 }
2285
2286 size_t utcp_get_sndbuf(struct utcp_connection *c) {
2287         return c ? c->sndbuf.maxsize : 0;
2288 }
2289
2290 size_t utcp_get_sndbuf_free(struct utcp_connection *c) {
2291         if(!c) {
2292                 return 0;
2293         }
2294
2295         switch(c->state) {
2296         case SYN_SENT:
2297         case SYN_RECEIVED:
2298         case ESTABLISHED:
2299         case CLOSE_WAIT:
2300                 return buffer_free(&c->sndbuf);
2301
2302         default:
2303                 return 0;
2304         }
2305 }
2306
2307 void utcp_set_sndbuf(struct utcp_connection *c, size_t size) {
2308         if(!c) {
2309                 return;
2310         }
2311
2312         c->sndbuf.maxsize = size;
2313
2314         if(c->sndbuf.maxsize != size) {
2315                 c->sndbuf.maxsize = -1;
2316         }
2317
2318         c->do_poll = buffer_free(&c->sndbuf);
2319 }
2320
2321 size_t utcp_get_rcvbuf(struct utcp_connection *c) {
2322         return c ? c->rcvbuf.maxsize : 0;
2323 }
2324
2325 size_t utcp_get_rcvbuf_free(struct utcp_connection *c) {
2326         if(c && (c->state == ESTABLISHED || c->state == CLOSE_WAIT)) {
2327                 return buffer_free(&c->rcvbuf);
2328         } else {
2329                 return 0;
2330         }
2331 }
2332
2333 void utcp_set_rcvbuf(struct utcp_connection *c, size_t size) {
2334         if(!c) {
2335                 return;
2336         }
2337
2338         c->rcvbuf.maxsize = size;
2339
2340         if(c->rcvbuf.maxsize != size) {
2341                 c->rcvbuf.maxsize = -1;
2342         }
2343 }
2344
2345 size_t utcp_get_sendq(struct utcp_connection *c) {
2346         return c->sndbuf.used;
2347 }
2348
2349 size_t utcp_get_recvq(struct utcp_connection *c) {
2350         return c->rcvbuf.used;
2351 }
2352
2353 bool utcp_get_nodelay(struct utcp_connection *c) {
2354         return c ? c->nodelay : false;
2355 }
2356
2357 void utcp_set_nodelay(struct utcp_connection *c, bool nodelay) {
2358         if(c) {
2359                 c->nodelay = nodelay;
2360         }
2361 }
2362
2363 bool utcp_get_keepalive(struct utcp_connection *c) {
2364         return c ? c->keepalive : false;
2365 }
2366
2367 void utcp_set_keepalive(struct utcp_connection *c, bool keepalive) {
2368         if(c) {
2369                 c->keepalive = keepalive;
2370         }
2371 }
2372
2373 size_t utcp_get_outq(struct utcp_connection *c) {
2374         return c ? seqdiff(c->snd.nxt, c->snd.una) : 0;
2375 }
2376
2377 void utcp_set_recv_cb(struct utcp_connection *c, utcp_recv_t recv) {
2378         if(c) {
2379                 c->recv = recv;
2380         }
2381 }
2382
2383 void utcp_set_poll_cb(struct utcp_connection *c, utcp_poll_t poll) {
2384         if(c) {
2385                 c->poll = poll;
2386                 c->do_poll = buffer_free(&c->sndbuf);
2387         }
2388 }
2389
2390 void utcp_set_accept_cb(struct utcp *utcp, utcp_accept_t accept, utcp_pre_accept_t pre_accept) {
2391         if(utcp) {
2392                 utcp->accept = accept;
2393                 utcp->pre_accept = pre_accept;
2394         }
2395 }
2396
2397 void utcp_expect_data(struct utcp_connection *c, bool expect) {
2398         if(!c || c->reapable) {
2399                 return;
2400         }
2401
2402         if(!(c->state == ESTABLISHED || c->state == FIN_WAIT_1 || c->state == FIN_WAIT_2)) {
2403                 return;
2404         }
2405
2406         if(expect) {
2407                 // If we expect data, start the connection timer.
2408                 if(!timespec_isset(&c->conn_timeout)) {
2409                         clock_gettime(UTCP_CLOCK, &c->conn_timeout);
2410                         c->conn_timeout.tv_sec += c->utcp->timeout;
2411                 }
2412         } else {
2413                 // If we want to cancel expecting data, only clear the timer when there is no unACKed data.
2414                 if(c->snd.una == c->snd.last) {
2415                         timespec_clear(&c->conn_timeout);
2416                 }
2417         }
2418 }
2419
2420 void utcp_offline(struct utcp *utcp, bool offline) {
2421         struct timespec now;
2422         clock_gettime(UTCP_CLOCK, &now);
2423
2424         for(int i = 0; i < utcp->nconnections; i++) {
2425                 struct utcp_connection *c = utcp->connections[i];
2426
2427                 if(c->reapable) {
2428                         continue;
2429                 }
2430
2431                 utcp_expect_data(c, offline);
2432
2433                 if(!offline) {
2434                         if(timespec_isset(&c->rtrx_timeout)) {
2435                                 c->rtrx_timeout = now;
2436                         }
2437
2438                         utcp->connections[i]->rtt_start.tv_sec = 0;
2439                 }
2440         }
2441
2442         if(!offline && utcp->rto > START_RTO) {
2443                 utcp->rto = START_RTO;
2444         }
2445 }
2446
2447 void utcp_set_clock_granularity(long granularity) {
2448         CLOCK_GRANULARITY = granularity;
2449 }