]> git.meshlink.io Git - utcp/blob - utcp.c
Fix handling packets partially overlapping the start of the receive buffer.
[utcp] / utcp.c
1 /*
2     utcp.c -- Userspace TCP
3     Copyright (C) 2014 Guus Sliepen <guus@tinc-vpn.org>
4
5     This program is free software; you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation; either version 2 of the License, or
8     (at your option) any later version.
9
10     This program is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14
15     You should have received a copy of the GNU General Public License along
16     with this program; if not, write to the Free Software Foundation, Inc.,
17     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #define _GNU_SOURCE
21
22 #include <assert.h>
23 #include <errno.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <stdint.h>
27 #include <stdbool.h>
28 #include <string.h>
29 #include <unistd.h>
30 #include <sys/time.h>
31 #include <sys/socket.h>
32
33 #include "utcp_priv.h"
34
35 #ifndef EBADMSG
36 #define EBADMSG         104
37 #endif
38
39 #ifndef SHUT_RDWR
40 #define SHUT_RDWR 2
41 #endif
42
43 #ifdef poll
44 #undef poll
45 #endif
46
47 #ifndef timersub
48 #define timersub(a, b, r) do {\
49         (r)->tv_sec = (a)->tv_sec - (b)->tv_sec;\
50         (r)->tv_usec = (a)->tv_usec - (b)->tv_usec;\
51         if((r)->tv_usec < 0)\
52                 (r)->tv_sec--, (r)->tv_usec += USEC_PER_SEC;\
53 } while (0)
54 #endif
55
56 static inline size_t max(size_t a, size_t b) {
57         return a > b ? a : b;
58 }
59
60 #ifdef UTCP_DEBUG
61 #include <stdarg.h>
62
63 static void debug(const char *format, ...) {
64         va_list ap;
65         va_start(ap, format);
66         vfprintf(stderr, format, ap);
67         va_end(ap);
68 }
69
70 static void print_packet(struct utcp *utcp, const char *dir, const void *pkt, size_t len) {
71         struct hdr hdr;
72         if(len < sizeof hdr) {
73                 debug("%p %s: short packet (%lu bytes)\n", utcp, dir, (unsigned long)len);
74                 return;
75         }
76
77         memcpy(&hdr, pkt, sizeof hdr);
78         debug("%p %s: len=%lu, src=%u dst=%u seq=%u ack=%u wnd=%u ctl=", utcp, dir, (unsigned long)len, hdr.src, hdr.dst, hdr.seq, hdr.ack, hdr.wnd);
79         if(hdr.ctl & SYN)
80                 debug("SYN");
81         if(hdr.ctl & RST)
82                 debug("RST");
83         if(hdr.ctl & FIN)
84                 debug("FIN");
85         if(hdr.ctl & ACK)
86                 debug("ACK");
87
88         if(len > sizeof hdr) {
89                 uint32_t datalen = len - sizeof hdr;
90                 const uint8_t *data = (uint8_t *)pkt + sizeof hdr;
91                 char str[datalen * 2 + 1];
92                 char *p = str;
93
94                 for(uint32_t i = 0; i < datalen; i++) {
95                         *p++ = "0123456789ABCDEF"[data[i] >> 4];
96                         *p++ = "0123456789ABCDEF"[data[i] & 15];
97                 }
98                 *p = 0;
99
100                 debug(" data=%s", str);
101         }
102
103         debug("\n");
104 }
105 #else
106 #define debug(...)
107 #define print_packet(...)
108 #endif
109
110 static void set_state(struct utcp_connection *c, enum state state) {
111         c->state = state;
112         if(state == ESTABLISHED)
113                 timerclear(&c->conn_timeout);
114         debug("%p new state: %s\n", c->utcp, strstate[state]);
115 }
116
117 static bool fin_wanted(struct utcp_connection *c, uint32_t seq) {
118         if(seq != c->snd.last)
119                 return false;
120         switch(c->state) {
121         case FIN_WAIT_1:
122         case CLOSING:
123         case LAST_ACK:
124                 return true;
125         default:
126                 return false;
127         }
128 }
129
130 static inline void list_connections(struct utcp *utcp) {
131         debug("%p has %d connections:\n", utcp, utcp->nconnections);
132         for(int i = 0; i < utcp->nconnections; i++)
133                 debug("  %u -> %u state %s\n", utcp->connections[i]->src, utcp->connections[i]->dst, strstate[utcp->connections[i]->state]);
134 }
135
136 static int32_t seqdiff(uint32_t a, uint32_t b) {
137         return a - b;
138 }
139
140 // Buffer functions
141 // TODO: convert to ringbuffers to avoid memmove() operations.
142
143 // Store data into the buffer
144 static ssize_t buffer_put_at(struct buffer *buf, size_t offset, const void *data, size_t len) {
145         debug("buffer_put_at %lu %lu %lu\n", (unsigned long)buf->used, (unsigned long)offset, (unsigned long)len);
146
147         size_t required = offset + len;
148         if(required > buf->maxsize) {
149                 if(offset >= buf->maxsize)
150                         return 0;
151                 len = buf->maxsize - offset;
152                 required = buf->maxsize;
153         }
154
155         if(required > buf->size) {
156                 size_t newsize = buf->size;
157                 if(!newsize) {
158                         newsize = required;
159                 } else {
160                         do {
161                                 newsize *= 2;
162                         } while(newsize < required);
163                 }
164                 if(newsize > buf->maxsize)
165                         newsize = buf->maxsize;
166                 char *newdata = realloc(buf->data, newsize);
167                 if(!newdata)
168                         return -1;
169                 buf->data = newdata;
170                 buf->size = newsize;
171         }
172
173         memcpy(buf->data + offset, data, len);
174         if(required > buf->used)
175                 buf->used = required;
176         return len;
177 }
178
179 static ssize_t buffer_put(struct buffer *buf, const void *data, size_t len) {
180         return buffer_put_at(buf, buf->used, data, len);
181 }
182
183 // Get data from the buffer. data can be NULL.
184 static ssize_t buffer_get(struct buffer *buf, void *data, size_t len) {
185         if(len > buf->used)
186                 len = buf->used;
187         if(data)
188                 memcpy(data, buf->data, len);
189         if(len < buf->used)
190                 memmove(buf->data, buf->data + len, buf->used - len);
191         buf->used -= len;
192         return len;
193 }
194
195 // Copy data from the buffer without removing it.
196 static ssize_t buffer_copy(struct buffer *buf, void *data, size_t offset, size_t len) {
197         if(offset >= buf->used)
198                 return 0;
199         if(offset + len > buf->used)
200                 len = buf->used - offset;
201         memcpy(data, buf->data + offset, len);
202         return len;
203 }
204
205 static bool buffer_init(struct buffer *buf, uint32_t len, uint32_t maxlen) {
206         memset(buf, 0, sizeof *buf);
207         if(len) {
208                 buf->data = malloc(len);
209                 if(!buf->data)
210                         return false;
211         }
212         buf->size = len;
213         buf->maxsize = maxlen;
214         return true;
215 }
216
217 static void buffer_exit(struct buffer *buf) {
218         free(buf->data);
219         memset(buf, 0, sizeof *buf);
220 }
221
222 static uint32_t buffer_free(const struct buffer *buf) {
223         return buf->maxsize - buf->used;
224 }
225
226 // Connections are stored in a sorted list.
227 // This gives O(log(N)) lookup time, O(N log(N)) insertion time and O(N) deletion time.
228
229 static int compare(const void *va, const void *vb) {
230         assert(va && vb);
231
232         const struct utcp_connection *a = *(struct utcp_connection **)va;
233         const struct utcp_connection *b = *(struct utcp_connection **)vb;
234
235         assert(a && b);
236         assert(a->src && b->src);
237
238         int c = (int)a->src - (int)b->src;
239         if(c)
240                 return c;
241         c = (int)a->dst - (int)b->dst;
242         return c;
243 }
244
245 static struct utcp_connection *find_connection(const struct utcp *utcp, uint16_t src, uint16_t dst) {
246         if(!utcp->nconnections)
247                 return NULL;
248         struct utcp_connection key = {
249                 .src = src,
250                 .dst = dst,
251         }, *keyp = &key;
252         struct utcp_connection **match = bsearch(&keyp, utcp->connections, utcp->nconnections, sizeof *utcp->connections, compare);
253         return match ? *match : NULL;
254 }
255
256 static void free_connection(struct utcp_connection *c) {
257         struct utcp *utcp = c->utcp;
258         struct utcp_connection **cp = bsearch(&c, utcp->connections, utcp->nconnections, sizeof *utcp->connections, compare);
259
260         assert(cp);
261
262         int i = cp - utcp->connections;
263         memmove(cp, cp + 1, (utcp->nconnections - i - 1) * sizeof *cp);
264         utcp->nconnections--;
265
266         buffer_exit(&c->rcvbuf);
267         buffer_exit(&c->sndbuf);
268         free(c);
269 }
270
271 static struct utcp_connection *allocate_connection(struct utcp *utcp, uint16_t src, uint16_t dst) {
272         // Check whether this combination of src and dst is free
273
274         if(src) {
275                 if(find_connection(utcp, src, dst)) {
276                         errno = EADDRINUSE;
277                         return NULL;
278                 }
279         } else { // If src == 0, generate a random port number with the high bit set
280                 if(utcp->nconnections >= 32767) {
281                         errno = ENOMEM;
282                         return NULL;
283                 }
284                 src = rand() | 0x8000;
285                 while(find_connection(utcp, src, dst))
286                         src++;
287         }
288
289         // Allocate memory for the new connection
290
291         if(utcp->nconnections >= utcp->nallocated) {
292                 if(!utcp->nallocated)
293                         utcp->nallocated = 4;
294                 else
295                         utcp->nallocated *= 2;
296                 struct utcp_connection **new_array = realloc(utcp->connections, utcp->nallocated * sizeof *utcp->connections);
297                 if(!new_array)
298                         return NULL;
299                 utcp->connections = new_array;
300         }
301
302         struct utcp_connection *c = calloc(1, sizeof *c);
303         if(!c)
304                 return NULL;
305
306         if(!buffer_init(&c->sndbuf, DEFAULT_SNDBUFSIZE, DEFAULT_MAXSNDBUFSIZE)) {
307                 free(c);
308                 return NULL;
309         }
310
311         if(!buffer_init(&c->rcvbuf, DEFAULT_RCVBUFSIZE, DEFAULT_MAXRCVBUFSIZE)) {
312                 buffer_exit(&c->sndbuf);
313                 free(c);
314                 return NULL;
315         }
316
317         // Fill in the details
318
319         c->src = src;
320         c->dst = dst;
321 #ifdef UTCP_DEBUG
322         c->snd.iss = 0;
323 #else
324         c->snd.iss = rand();
325 #endif
326         c->snd.una = c->snd.iss;
327         c->snd.nxt = c->snd.iss + 1;
328         c->rcv.wnd = utcp->mtu;
329         c->snd.last = c->snd.nxt;
330         c->snd.cwnd = utcp->mtu;
331         c->utcp = utcp;
332
333         // Add it to the sorted list of connections
334
335         utcp->connections[utcp->nconnections++] = c;
336         qsort(utcp->connections, utcp->nconnections, sizeof *utcp->connections, compare);
337
338         return c;
339 }
340
341 // Update RTT variables. See RFC 6298.
342 static void update_rtt(struct utcp_connection *c, uint32_t rtt) {
343         if(!rtt) {
344                 debug("invalid rtt\n");
345                 return;
346         }
347
348         struct utcp *utcp = c->utcp;
349
350         if(!utcp->srtt) {
351                 utcp->srtt = rtt;
352                 utcp->rttvar = rtt / 2;
353                 utcp->rto = rtt + max(2 * rtt, CLOCK_GRANULARITY);
354         } else {
355                 utcp->rttvar = (utcp->rttvar * 3 + abs(utcp->srtt - rtt)) / 4;
356                 utcp->srtt = (utcp->srtt * 7 + rtt) / 8;
357                 utcp->rto = utcp->srtt + max(utcp->rttvar, CLOCK_GRANULARITY);
358         }
359
360         if(utcp->rto > MAX_RTO)
361                 utcp->rto = MAX_RTO;
362
363         debug("rtt %u srtt %u rttvar %u rto %u\n", rtt, utcp->srtt, utcp->rttvar, utcp->rto);
364 }
365
366 static void start_retransmit_timer(struct utcp_connection *c) {
367         gettimeofday(&c->rtrx_timeout, NULL);
368         c->rtrx_timeout.tv_usec += c->utcp->rto;
369         while(c->rtrx_timeout.tv_usec >= 1000000) {
370                 c->rtrx_timeout.tv_usec -= 1000000;
371                 c->rtrx_timeout.tv_sec++;
372         }
373         debug("timeout set to %lu.%06lu (%u)\n", c->rtrx_timeout.tv_sec, c->rtrx_timeout.tv_usec, c->utcp->rto);
374 }
375
376 static void stop_retransmit_timer(struct utcp_connection *c) {
377         timerclear(&c->rtrx_timeout);
378         debug("timeout cleared\n");
379 }
380
381 struct utcp_connection *utcp_connect(struct utcp *utcp, uint16_t dst, utcp_recv_t recv, void *priv) {
382         struct utcp_connection *c = allocate_connection(utcp, 0, dst);
383         if(!c)
384                 return NULL;
385
386         c->recv = recv;
387         c->priv = priv;
388
389         struct hdr hdr;
390
391         hdr.src = c->src;
392         hdr.dst = c->dst;
393         hdr.seq = c->snd.iss;
394         hdr.ack = 0;
395         hdr.wnd = c->rcv.wnd;
396         hdr.ctl = SYN;
397         hdr.aux = 0;
398
399         set_state(c, SYN_SENT);
400
401         print_packet(utcp, "send", &hdr, sizeof hdr);
402         utcp->send(utcp, &hdr, sizeof hdr);
403
404         gettimeofday(&c->conn_timeout, NULL);
405         c->conn_timeout.tv_sec += utcp->timeout;
406
407         return c;
408 }
409
410 void utcp_accept(struct utcp_connection *c, utcp_recv_t recv, void *priv) {
411         if(c->reapable || c->state != SYN_RECEIVED) {
412                 debug("Error: accept() called on invalid connection %p in state %s\n", c, strstate[c->state]);
413                 return;
414         }
415
416         debug("%p accepted, %p %p\n", c, recv, priv);
417         c->recv = recv;
418         c->priv = priv;
419         set_state(c, ESTABLISHED);
420 }
421
422 static void ack(struct utcp_connection *c, bool sendatleastone) {
423         int32_t left = seqdiff(c->snd.last, c->snd.nxt);
424         int32_t cwndleft = c->snd.cwnd - seqdiff(c->snd.nxt, c->snd.una);
425         debug("cwndleft = %d\n", cwndleft);
426
427         assert(left >= 0);
428
429         if(cwndleft <= 0)
430                 cwndleft = 0;
431
432         if(cwndleft < left)
433                 left = cwndleft;
434
435         if(!left && !sendatleastone)
436                 return;
437
438         struct {
439                 struct hdr hdr;
440                 char data[];
441         } *pkt;
442
443         pkt = malloc(sizeof pkt->hdr + c->utcp->mtu);
444         if(!pkt)
445                 return;
446
447         pkt->hdr.src = c->src;
448         pkt->hdr.dst = c->dst;
449         pkt->hdr.ack = c->rcv.nxt;
450         pkt->hdr.wnd = c->snd.wnd;
451         pkt->hdr.ctl = ACK;
452         pkt->hdr.aux = 0;
453
454         do {
455                 uint32_t seglen = left > c->utcp->mtu ? c->utcp->mtu : left;
456                 pkt->hdr.seq = c->snd.nxt;
457
458                 buffer_copy(&c->sndbuf, pkt->data, seqdiff(c->snd.nxt, c->snd.una), seglen);
459
460                 c->snd.nxt += seglen;
461                 left -= seglen;
462
463                 if(seglen && fin_wanted(c, c->snd.nxt)) {
464                         seglen--;
465                         pkt->hdr.ctl |= FIN;
466                 }
467
468                 if(!c->rtt_start.tv_sec) {
469                         // Start RTT measurement
470                         gettimeofday(&c->rtt_start, NULL);
471                         c->rtt_seq = pkt->hdr.seq + seglen;
472                         debug("Starting RTT measurement, expecting ack %u\n", c->rtt_seq);
473                 }
474
475                 print_packet(c->utcp, "send", pkt, sizeof pkt->hdr + seglen);
476                 c->utcp->send(c->utcp, pkt, sizeof pkt->hdr + seglen);
477         } while(left);
478
479         free(pkt);
480 }
481
482 ssize_t utcp_send(struct utcp_connection *c, const void *data, size_t len) {
483         if(c->reapable) {
484                 debug("Error: send() called on closed connection %p\n", c);
485                 errno = EBADF;
486                 return -1;
487         }
488
489         switch(c->state) {
490         case CLOSED:
491         case LISTEN:
492         case SYN_SENT:
493         case SYN_RECEIVED:
494                 debug("Error: send() called on unconnected connection %p\n", c);
495                 errno = ENOTCONN;
496                 return -1;
497         case ESTABLISHED:
498         case CLOSE_WAIT:
499                 break;
500         case FIN_WAIT_1:
501         case FIN_WAIT_2:
502         case CLOSING:
503         case LAST_ACK:
504         case TIME_WAIT:
505                 debug("Error: send() called on closing connection %p\n", c);
506                 errno = EPIPE;
507                 return -1;
508         }
509
510         // Add data to send buffer
511
512         if(!len)
513                 return 0;
514
515         if(!data) {
516                 errno = EFAULT;
517                 return -1;
518         }
519
520         len = buffer_put(&c->sndbuf, data, len);
521         if(len <= 0) {
522                 errno = EWOULDBLOCK;
523                 return 0;
524         }
525
526         c->snd.last += len;
527         ack(c, false);
528         if(!timerisset(&c->rtrx_timeout))
529                 start_retransmit_timer(c);
530         return len;
531 }
532
533 static void swap_ports(struct hdr *hdr) {
534         uint16_t tmp = hdr->src;
535         hdr->src = hdr->dst;
536         hdr->dst = tmp;
537 }
538
539 static void retransmit(struct utcp_connection *c) {
540         if(c->state == CLOSED || c->snd.last == c->snd.una) {
541                 debug("Retransmit() called but nothing to retransmit!\n");
542                 stop_retransmit_timer(c);
543                 return;
544         }
545
546         struct utcp *utcp = c->utcp;
547
548         struct {
549                 struct hdr hdr;
550                 char data[];
551         } *pkt;
552
553         pkt = malloc(sizeof pkt->hdr + c->utcp->mtu);
554         if(!pkt)
555                 return;
556
557         pkt->hdr.src = c->src;
558         pkt->hdr.dst = c->dst;
559         pkt->hdr.wnd = c->rcv.wnd;
560         pkt->hdr.aux = 0;
561
562         switch(c->state) {
563                 case SYN_SENT:
564                         // Send our SYN again
565                         pkt->hdr.seq = c->snd.iss;
566                         pkt->hdr.ack = 0;
567                         pkt->hdr.ctl = SYN;
568                         print_packet(c->utcp, "rtrx", pkt, sizeof pkt->hdr);
569                         utcp->send(utcp, pkt, sizeof pkt->hdr);
570                         break;
571
572                 case SYN_RECEIVED:
573                         // Send SYNACK again
574                         pkt->hdr.seq = c->snd.nxt;
575                         pkt->hdr.ack = c->rcv.nxt;
576                         pkt->hdr.ctl = SYN | ACK;
577                         print_packet(c->utcp, "rtrx", pkt, sizeof pkt->hdr);
578                         utcp->send(utcp, pkt, sizeof pkt->hdr);
579                         break;
580
581                 case ESTABLISHED:
582                 case FIN_WAIT_1:
583                 case CLOSE_WAIT:
584                 case CLOSING:
585                 case LAST_ACK:
586                         // Send unacked data again.
587                         pkt->hdr.seq = c->snd.una;
588                         pkt->hdr.ack = c->rcv.nxt;
589                         pkt->hdr.ctl = ACK;
590                         uint32_t len = seqdiff(c->snd.last, c->snd.una);
591                         if(len > utcp->mtu)
592                                 len = utcp->mtu;
593                         if(fin_wanted(c, c->snd.una + len)) {
594                                 len--;
595                                 pkt->hdr.ctl |= FIN;
596                         }
597                         c->snd.nxt = c->snd.una + len;
598                         c->snd.cwnd = utcp->mtu; // reduce cwnd on retransmit
599                         buffer_copy(&c->sndbuf, pkt->data, 0, len);
600                         print_packet(c->utcp, "rtrx", pkt, sizeof pkt->hdr + len);
601                         utcp->send(utcp, pkt, sizeof pkt->hdr + len);
602                         break;
603
604                 case CLOSED:
605                 case LISTEN:
606                 case TIME_WAIT:
607                 case FIN_WAIT_2:
608                         // We shouldn't need to retransmit anything in this state.
609 #ifdef UTCP_DEBUG
610                         abort();
611 #endif
612                         stop_retransmit_timer(c);
613                         goto cleanup;
614         }
615
616         start_retransmit_timer(c);
617         utcp->rto *= 2;
618         if(utcp->rto > MAX_RTO)
619                 utcp->rto = MAX_RTO;
620         c->rtt_start.tv_sec = 0; // invalidate RTT timer
621
622 cleanup:
623         free(pkt);
624 }
625
626 /* Update receive buffer and SACK entries after consuming data.
627  *
628  * Situation:
629  *
630  * |.....0000..1111111111.....22222......3333|
631  * |---------------^
632  *
633  * 0..3 represent the SACK entries. The ^ indicates up to which point we want
634  * to remove data from the receive buffer. The idea is to substract "len"
635  * from the offset of all the SACK entries, and then remove/cut down entries
636  * that are shifted to before the start of the receive buffer.
637  *
638  * There are three cases:
639  * - the SACK entry is after ^, in that case just change the offset.
640  * - the SACK entry starts before and ends after ^, so we have to
641  *   change both its offset and size.
642  * - the SACK entry is completely before ^, in that case delete it.
643  */
644 static void sack_consume(struct utcp_connection *c, size_t len) {
645         debug("sack_consume %lu\n", (unsigned long)len);
646         if(len > c->rcvbuf.used) {
647                 debug("All SACK entries consumed");
648                 c->sacks[0].len = 0;
649                 return;
650         }
651
652         buffer_get(&c->rcvbuf, NULL, len);
653
654         for(int i = 0; i < NSACKS && c->sacks[i].len; ) {
655                 if(len < c->sacks[i].offset) {
656                         c->sacks[i].offset -= len;
657                         i++;
658                 } else if(len < c->sacks[i].offset + c->sacks[i].len) {
659                         c->sacks[i].len -= len - c->sacks[i].offset;
660                         c->sacks[i].offset = 0;
661                         i++;
662                 } else {
663                         if(i < NSACKS - 1) {
664                                 memmove(&c->sacks[i], &c->sacks[i + 1], (NSACKS - 1 - i) * sizeof c->sacks[i]);
665                                 c->sacks[NSACKS - 1].len = 0;
666                         } else {
667                                 c->sacks[i].len = 0;
668                                 break;
669                         }
670                 }
671         }
672
673         for(int i = 0; i < NSACKS && c->sacks[i].len; i++)
674                 debug("SACK[%d] offset %u len %u\n", i, c->sacks[i].offset, c->sacks[i].len);
675 }
676
677 static void handle_out_of_order(struct utcp_connection *c, uint32_t offset, const void *data, size_t len) {
678         debug("out of order packet, offset %u\n", offset);
679         // Packet loss or reordering occured. Store the data in the buffer.
680         ssize_t rxd = buffer_put_at(&c->rcvbuf, offset, data, len);
681         if(rxd < len)
682                 abort();
683
684         // Make note of where we put it.
685         for(int i = 0; i < NSACKS; i++) {
686                 if(!c->sacks[i].len) { // nothing to merge, add new entry
687                         debug("New SACK entry %d\n", i);
688                         c->sacks[i].offset = offset;
689                         c->sacks[i].len = rxd;
690                         break;
691                 } else if(offset < c->sacks[i].offset) {
692                         if(offset + rxd < c->sacks[i].offset) { // insert before
693                                 if(!c->sacks[NSACKS - 1].len) { // only if room left
694                                         debug("Insert SACK entry at %d\n", i);
695                                         memmove(&c->sacks[i + 1], &c->sacks[i], (NSACKS - i - 1) * sizeof c->sacks[i]);
696                                         c->sacks[i].offset = offset;
697                                         c->sacks[i].len = rxd;
698                                 } else {
699                                         debug("SACK entries full, dropping packet\n");
700                                 }
701                                 break;
702                         } else { // merge
703                                 debug("Merge with start of SACK entry at %d\n", i);
704                                 c->sacks[i].offset = offset;
705                                 break;
706                         }
707                 } else if(offset <= c->sacks[i].offset + c->sacks[i].len) {
708                         if(offset + rxd > c->sacks[i].offset + c->sacks[i].len) { // merge
709                                 debug("Merge with end of SACK entry at %d\n", i);
710                                 c->sacks[i].len = offset + rxd - c->sacks[i].offset;
711                                 // TODO: handle potential merge with next entry
712                         }
713                         break;
714                 }
715         }
716
717         for(int i = 0; i < NSACKS && c->sacks[i].len; i++)
718                 debug("SACK[%d] offset %u len %u\n", i, c->sacks[i].offset, c->sacks[i].len);
719 }
720
721 static void handle_in_order(struct utcp_connection *c, const void *data, size_t len) {
722         // Check if we can process out-of-order data now.
723         if(c->sacks[0].len && len >= c->sacks[0].offset) { // TODO: handle overlap with second SACK
724                 debug("incoming packet len %lu connected with SACK at %u\n", (unsigned long)len, c->sacks[0].offset);
725                 buffer_put_at(&c->rcvbuf, 0, data, len); // TODO: handle return value
726                 len = max(len, c->sacks[0].offset + c->sacks[0].len);
727                 data = c->rcvbuf.data;
728         }
729
730         if(c->recv) {
731                 ssize_t rxd = c->recv(c, data, len);
732                 if(rxd != len) {
733                         // TODO: handle the application not accepting all data.
734                         abort();
735                 }
736         }
737
738         if(c->rcvbuf.used)
739                 sack_consume(c, len);
740
741         c->rcv.nxt += len;
742 }
743
744
745 static void handle_incoming_data(struct utcp_connection *c, uint32_t seq, const void *data, size_t len) {
746         uint32_t offset = seqdiff(seq, c->rcv.nxt);
747         if(offset + len > c->rcvbuf.maxsize)
748                 abort();
749
750         if(offset)
751                 handle_out_of_order(c, offset, data, len);
752         else
753                 handle_in_order(c, data, len);
754 }
755
756
757 ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) {
758         if(!utcp) {
759                 errno = EFAULT;
760                 return -1;
761         }
762
763         if(!len)
764                 return 0;
765
766         if(!data) {
767                 errno = EFAULT;
768                 return -1;
769         }
770
771         print_packet(utcp, "recv", data, len);
772
773         // Drop packets smaller than the header
774
775         struct hdr hdr;
776         if(len < sizeof hdr) {
777                 errno = EBADMSG;
778                 return -1;
779         }
780
781         // Make a copy from the potentially unaligned data to a struct hdr
782
783         memcpy(&hdr, data, sizeof hdr);
784         data += sizeof hdr;
785         len -= sizeof hdr;
786
787         // Drop packets with an unknown CTL flag
788
789         if(hdr.ctl & ~(SYN | ACK | RST | FIN)) {
790                 errno = EBADMSG;
791                 return -1;
792         }
793
794         // Try to match the packet to an existing connection
795
796         struct utcp_connection *c = find_connection(utcp, hdr.dst, hdr.src);
797
798         // Is it for a new connection?
799
800         if(!c) {
801                 // Ignore RST packets
802
803                 if(hdr.ctl & RST)
804                         return 0;
805
806                 // Is it a SYN packet and are we LISTENing?
807
808                 if(hdr.ctl & SYN && !(hdr.ctl & ACK) && utcp->accept) {
809                         // If we don't want to accept it, send a RST back
810                         if((utcp->pre_accept && !utcp->pre_accept(utcp, hdr.dst))) {
811                                 len = 1;
812                                 goto reset;
813                         }
814
815                         // Try to allocate memory, otherwise send a RST back
816                         c = allocate_connection(utcp, hdr.dst, hdr.src);
817                         if(!c) {
818                                 len = 1;
819                                 goto reset;
820                         }
821
822                         // Return SYN+ACK, go to SYN_RECEIVED state
823                         c->snd.wnd = hdr.wnd;
824                         c->rcv.irs = hdr.seq;
825                         c->rcv.nxt = c->rcv.irs + 1;
826                         set_state(c, SYN_RECEIVED);
827
828                         hdr.dst = c->dst;
829                         hdr.src = c->src;
830                         hdr.ack = c->rcv.irs + 1;
831                         hdr.seq = c->snd.iss;
832                         hdr.ctl = SYN | ACK;
833                         print_packet(c->utcp, "send", &hdr, sizeof hdr);
834                         utcp->send(utcp, &hdr, sizeof hdr);
835                 } else {
836                         // No, we don't want your packets, send a RST back
837                         len = 1;
838                         goto reset;
839                 }
840
841                 return 0;
842         }
843
844         debug("%p state %s\n", c->utcp, strstate[c->state]);
845
846         // In case this is for a CLOSED connection, ignore the packet.
847         // TODO: make it so incoming packets can never match a CLOSED connection.
848
849         if(c->state == CLOSED) {
850                 debug("Got packet for closed connection\n");
851                 return 0;
852         }
853
854         // It is for an existing connection.
855
856         uint32_t prevrcvnxt = c->rcv.nxt;
857
858         // 1. Drop invalid packets.
859
860         // 1a. Drop packets that should not happen in our current state.
861
862         switch(c->state) {
863         case SYN_SENT:
864         case SYN_RECEIVED:
865         case ESTABLISHED:
866         case FIN_WAIT_1:
867         case FIN_WAIT_2:
868         case CLOSE_WAIT:
869         case CLOSING:
870         case LAST_ACK:
871         case TIME_WAIT:
872                 break;
873         default:
874 #ifdef UTCP_DEBUG
875                 abort();
876 #endif
877                 break;
878         }
879
880         // 1b. Drop packets with a sequence number not in our receive window.
881
882         bool acceptable;
883
884         if(c->state == SYN_SENT)
885                 acceptable = true;
886         else if(len == 0)
887                 acceptable = seqdiff(hdr.seq, c->rcv.nxt) >= 0;
888         else {
889                 int32_t rcv_offset = seqdiff(hdr.seq, c->rcv.nxt);
890
891                 // cut already accepted front overlapping
892                 if(rcv_offset < 0) {
893                         acceptable = len > -rcv_offset;
894                         if(acceptable) {
895                                 data -= rcv_offset;
896                                 len += rcv_offset;
897                                 hdr.seq -= rcv_offset;
898                         }
899                 } else {
900                         acceptable = seqdiff(hdr.seq, c->rcv.nxt) >= 0 && seqdiff(hdr.seq, c->rcv.nxt) + len <= c->rcvbuf.maxsize;
901                 }
902         }
903
904         if(!acceptable) {
905                 debug("Packet not acceptable, %u <= %u + %lu < %u\n", c->rcv.nxt, hdr.seq, (unsigned long)len, c->rcv.nxt + c->rcvbuf.maxsize);
906                 // Ignore unacceptable RST packets.
907                 if(hdr.ctl & RST)
908                         return 0;
909                 // Otherwise, continue processing.
910                 len = 0;
911         }
912
913         c->snd.wnd = hdr.wnd; // TODO: move below
914
915         // 1c. Drop packets with an invalid ACK.
916         // ackno should not roll back, and it should also not be bigger than what we ever could have sent
917         // (= snd.una + c->sndbuf.used).
918
919         if(hdr.ctl & ACK && (seqdiff(hdr.ack, c->snd.last) > 0 || seqdiff(hdr.ack, c->snd.una) < 0)) {
920                 debug("Packet ack seqno out of range, %u <= %u < %u\n", c->snd.una, hdr.ack, c->snd.una + c->sndbuf.used);
921                 // Ignore unacceptable RST packets.
922                 if(hdr.ctl & RST)
923                         return 0;
924                 goto reset;
925         }
926
927         // 2. Handle RST packets
928
929         if(hdr.ctl & RST) {
930                 switch(c->state) {
931                 case SYN_SENT:
932                         if(!(hdr.ctl & ACK))
933                                 return 0;
934                         // The peer has refused our connection.
935                         set_state(c, CLOSED);
936                         errno = ECONNREFUSED;
937                         if(c->recv)
938                                 c->recv(c, NULL, 0);
939                         return 0;
940                 case SYN_RECEIVED:
941                         if(hdr.ctl & ACK)
942                                 return 0;
943                         // We haven't told the application about this connection yet. Silently delete.
944                         free_connection(c);
945                         return 0;
946                 case ESTABLISHED:
947                 case FIN_WAIT_1:
948                 case FIN_WAIT_2:
949                 case CLOSE_WAIT:
950                         if(hdr.ctl & ACK)
951                                 return 0;
952                         // The peer has aborted our connection.
953                         set_state(c, CLOSED);
954                         errno = ECONNRESET;
955                         if(c->recv)
956                                 c->recv(c, NULL, 0);
957                         return 0;
958                 case CLOSING:
959                 case LAST_ACK:
960                 case TIME_WAIT:
961                         if(hdr.ctl & ACK)
962                                 return 0;
963                         // As far as the application is concerned, the connection has already been closed.
964                         // If it has called utcp_close() already, we can immediately free this connection.
965                         if(c->reapable) {
966                                 free_connection(c);
967                                 return 0;
968                         }
969                         // Otherwise, immediately move to the CLOSED state.
970                         set_state(c, CLOSED);
971                         return 0;
972                 default:
973 #ifdef UTCP_DEBUG
974                         abort();
975 #endif
976                         break;
977                 }
978         }
979
980         // 3. Advance snd.una
981
982         uint32_t advanced = seqdiff(hdr.ack, c->snd.una);
983         prevrcvnxt = c->rcv.nxt;
984
985         if(advanced) {
986                 // RTT measurement
987                 if(c->rtt_start.tv_sec) {
988                         if(c->rtt_seq == hdr.ack) {
989                                 struct timeval now, diff;
990                                 gettimeofday(&now, NULL);
991                                 timersub(&now, &c->rtt_start, &diff);
992                                 update_rtt(c, diff.tv_sec * 1000000 + diff.tv_usec);
993                                 c->rtt_start.tv_sec = 0;
994                         } else if(c->rtt_seq < hdr.ack) {
995                                 debug("Cancelling RTT measurement: %u < %u\n", c->rtt_seq, hdr.ack);
996                                 c->rtt_start.tv_sec = 0;
997                         }
998                 }
999
1000                 int32_t data_acked = advanced;
1001
1002                 switch(c->state) {
1003                         case SYN_SENT:
1004                         case SYN_RECEIVED:
1005                                 data_acked--;
1006                                 break;
1007                         // TODO: handle FIN as well.
1008                         default:
1009                                 break;
1010                 }
1011
1012                 assert(data_acked >= 0);
1013
1014                 int32_t bufused = seqdiff(c->snd.last, c->snd.una);
1015                 assert(data_acked <= bufused);
1016
1017                 if(data_acked)
1018                         buffer_get(&c->sndbuf, NULL, data_acked);
1019
1020                 // Also advance snd.nxt if possible
1021                 if(seqdiff(c->snd.nxt, hdr.ack) < 0)
1022                         c->snd.nxt = hdr.ack;
1023
1024                 c->snd.una = hdr.ack;
1025
1026                 c->dupack = 0;
1027                 c->snd.cwnd += utcp->mtu;
1028                 if(c->snd.cwnd > c->sndbuf.maxsize)
1029                         c->snd.cwnd = c->sndbuf.maxsize;
1030
1031                 // Check if we have sent a FIN that is now ACKed.
1032                 switch(c->state) {
1033                 case FIN_WAIT_1:
1034                         if(c->snd.una == c->snd.last)
1035                                 set_state(c, FIN_WAIT_2);
1036                         break;
1037                 case CLOSING:
1038                         if(c->snd.una == c->snd.last) {
1039                                 gettimeofday(&c->conn_timeout, NULL);
1040                                 c->conn_timeout.tv_sec += 60;
1041                                 set_state(c, TIME_WAIT);
1042                         }
1043                         break;
1044                 default:
1045                         break;
1046                 }
1047         } else {
1048                 if(!len) {
1049                         c->dupack++;
1050                         if(c->dupack == 3) {
1051                                 debug("Triplicate ACK\n");
1052                                 //TODO: Resend one packet and go to fast recovery mode. See RFC 6582.
1053                                 //We do a very simple variant here; reset the nxt pointer to the last acknowledged packet from the peer.
1054                                 //Reset the congestion window so we wait for ACKs.
1055                                 c->snd.nxt = c->snd.una;
1056                                 c->snd.cwnd = utcp->mtu;
1057                                 start_retransmit_timer(c);
1058                         }
1059                 }
1060         }
1061
1062         // 4. Update timers
1063
1064         if(advanced) {
1065                 timerclear(&c->conn_timeout); // It will be set anew in utcp_timeout() if c->snd.una != c->snd.nxt.
1066                 if(c->snd.una == c->snd.last)
1067                         stop_retransmit_timer(c);
1068                 else
1069                         start_retransmit_timer(c);
1070         }
1071
1072         // 5. Process SYN stuff
1073
1074         if(hdr.ctl & SYN) {
1075                 switch(c->state) {
1076                 case SYN_SENT:
1077                         // This is a SYNACK. It should always have ACKed the SYN.
1078                         if(!advanced)
1079                                 goto reset;
1080                         c->rcv.irs = hdr.seq;
1081                         c->rcv.nxt = hdr.seq;
1082                         set_state(c, ESTABLISHED);
1083                         // TODO: notify application of this somehow.
1084                         break;
1085                 case SYN_RECEIVED:
1086                 case ESTABLISHED:
1087                 case FIN_WAIT_1:
1088                 case FIN_WAIT_2:
1089                 case CLOSE_WAIT:
1090                 case CLOSING:
1091                 case LAST_ACK:
1092                 case TIME_WAIT:
1093                         // Ehm, no. We should never receive a second SYN.
1094                         goto reset;
1095                 default:
1096 #ifdef UTCP_DEBUG
1097                         abort();
1098 #endif
1099                         return 0;
1100                 }
1101
1102                 // SYN counts as one sequence number
1103                 c->rcv.nxt++;
1104         }
1105
1106         // 6. Process new data
1107
1108         if(c->state == SYN_RECEIVED) {
1109                 // This is the ACK after the SYNACK. It should always have ACKed the SYNACK.
1110                 if(!advanced)
1111                         goto reset;
1112
1113                 // Are we still LISTENing?
1114                 if(utcp->accept)
1115                         utcp->accept(c, c->src);
1116
1117                 if(c->state != ESTABLISHED) {
1118                         set_state(c, CLOSED);
1119                         c->reapable = true;
1120                         goto reset;
1121                 }
1122         }
1123
1124         if(len) {
1125                 switch(c->state) {
1126                 case SYN_SENT:
1127                 case SYN_RECEIVED:
1128                         // This should never happen.
1129 #ifdef UTCP_DEBUG
1130                         abort();
1131 #endif
1132                         return 0;
1133                 case ESTABLISHED:
1134                 case FIN_WAIT_1:
1135                 case FIN_WAIT_2:
1136                         break;
1137                 case CLOSE_WAIT:
1138                 case CLOSING:
1139                 case LAST_ACK:
1140                 case TIME_WAIT:
1141                         // Ehm no, We should never receive more data after a FIN.
1142                         goto reset;
1143                 default:
1144 #ifdef UTCP_DEBUG
1145                         abort();
1146 #endif
1147                         return 0;
1148                 }
1149
1150                 handle_incoming_data(c, hdr.seq, data, len);
1151         }
1152
1153         // 7. Process FIN stuff
1154
1155         if((hdr.ctl & FIN) && hdr.seq + len == c->rcv.nxt) {
1156                 switch(c->state) {
1157                 case SYN_SENT:
1158                 case SYN_RECEIVED:
1159                         // This should never happen.
1160 #ifdef UTCP_DEBUG
1161                         abort();
1162 #endif
1163                         break;
1164                 case ESTABLISHED:
1165                         set_state(c, CLOSE_WAIT);
1166                         break;
1167                 case FIN_WAIT_1:
1168                         set_state(c, CLOSING);
1169                         break;
1170                 case FIN_WAIT_2:
1171                         gettimeofday(&c->conn_timeout, NULL);
1172                         c->conn_timeout.tv_sec += 60;
1173                         set_state(c, TIME_WAIT);
1174                         break;
1175                 case CLOSE_WAIT:
1176                 case CLOSING:
1177                 case LAST_ACK:
1178                 case TIME_WAIT:
1179                         // Ehm, no. We should never receive a second FIN.
1180                         goto reset;
1181                 default:
1182 #ifdef UTCP_DEBUG
1183                         abort();
1184 #endif
1185                         break;
1186                 }
1187
1188                 // FIN counts as one sequence number
1189                 c->rcv.nxt++;
1190                 len++;
1191
1192                 // Inform the application that the peer closed the connection.
1193                 if(c->recv) {
1194                         errno = 0;
1195                         c->recv(c, NULL, 0);
1196                 }
1197         }
1198
1199         // Now we send something back if:
1200         // - we advanced rcv.nxt (ie, we got some data that needs to be ACKed)
1201         //   -> sendatleastone = true
1202         // - or we got an ack, so we should maybe send a bit more data
1203         //   -> sendatleastone = false
1204
1205         ack(c, len || prevrcvnxt != c->rcv.nxt);
1206         return 0;
1207
1208 reset:
1209         swap_ports(&hdr);
1210         hdr.wnd = 0;
1211         if(hdr.ctl & ACK) {
1212                 hdr.seq = hdr.ack;
1213                 hdr.ctl = RST;
1214         } else {
1215                 hdr.ack = hdr.seq + len;
1216                 hdr.seq = 0;
1217                 hdr.ctl = RST | ACK;
1218         }
1219         print_packet(utcp, "send", &hdr, sizeof hdr);
1220         utcp->send(utcp, &hdr, sizeof hdr);
1221         return 0;
1222
1223 }
1224
1225 int utcp_shutdown(struct utcp_connection *c, int dir) {
1226         debug("%p shutdown %d at %u\n", c ? c->utcp : NULL, dir, c ? c->snd.last : 0);
1227         if(!c) {
1228                 errno = EFAULT;
1229                 return -1;
1230         }
1231
1232         if(c->reapable) {
1233                 debug("Error: shutdown() called on closed connection %p\n", c);
1234                 errno = EBADF;
1235                 return -1;
1236         }
1237
1238         if(!(dir == UTCP_SHUT_RD || dir == UTCP_SHUT_WR || dir == UTCP_SHUT_RDWR)) {
1239                 errno = EINVAL;
1240                 return -1;
1241         }
1242
1243         // TCP does not have a provision for stopping incoming packets.
1244         // The best we can do is to just ignore them.
1245         if(dir == UTCP_SHUT_RD || dir == UTCP_SHUT_RDWR)
1246                 c->recv = NULL;
1247
1248         // The rest of the code deals with shutting down writes.
1249         if(dir == UTCP_SHUT_RD)
1250                 return 0;
1251
1252         switch(c->state) {
1253         case CLOSED:
1254         case LISTEN:
1255                 errno = ENOTCONN;
1256                 return -1;
1257
1258         case SYN_SENT:
1259                 set_state(c, CLOSED);
1260                 return 0;
1261
1262         case SYN_RECEIVED:
1263         case ESTABLISHED:
1264                 set_state(c, FIN_WAIT_1);
1265                 break;
1266         case FIN_WAIT_1:
1267         case FIN_WAIT_2:
1268                 return 0;
1269         case CLOSE_WAIT:
1270                 set_state(c, CLOSING);
1271                 break;
1272
1273         case CLOSING:
1274         case LAST_ACK:
1275         case TIME_WAIT:
1276                 return 0;
1277         }
1278
1279         c->snd.last++;
1280
1281         ack(c, false);
1282         if(!timerisset(&c->rtrx_timeout))
1283                 start_retransmit_timer(c);
1284         return 0;
1285 }
1286
1287 int utcp_close(struct utcp_connection *c) {
1288         if(utcp_shutdown(c, SHUT_RDWR) && errno != ENOTCONN)
1289                 return -1;
1290         c->recv = NULL;
1291         c->poll = NULL;
1292         c->reapable = true;
1293         return 0;
1294 }
1295
1296 int utcp_abort(struct utcp_connection *c) {
1297         if(!c) {
1298                 errno = EFAULT;
1299                 return -1;
1300         }
1301
1302         if(c->reapable) {
1303                 debug("Error: abort() called on closed connection %p\n", c);
1304                 errno = EBADF;
1305                 return -1;
1306         }
1307
1308         c->recv = NULL;
1309         c->poll = NULL;
1310         c->reapable = true;
1311
1312         switch(c->state) {
1313         case CLOSED:
1314                 return 0;
1315         case LISTEN:
1316         case SYN_SENT:
1317         case CLOSING:
1318         case LAST_ACK:
1319         case TIME_WAIT:
1320                 set_state(c, CLOSED);
1321                 return 0;
1322
1323         case SYN_RECEIVED:
1324         case ESTABLISHED:
1325         case FIN_WAIT_1:
1326         case FIN_WAIT_2:
1327         case CLOSE_WAIT:
1328                 set_state(c, CLOSED);
1329                 break;
1330         }
1331
1332         // Send RST
1333
1334         struct hdr hdr;
1335
1336         hdr.src = c->src;
1337         hdr.dst = c->dst;
1338         hdr.seq = c->snd.nxt;
1339         hdr.ack = 0;
1340         hdr.wnd = 0;
1341         hdr.ctl = RST;
1342
1343         print_packet(c->utcp, "send", &hdr, sizeof hdr);
1344         c->utcp->send(c->utcp, &hdr, sizeof hdr);
1345         return 0;
1346 }
1347
1348 /* Handle timeouts.
1349  * One call to this function will loop through all connections,
1350  * checking if something needs to be resent or not.
1351  * The return value is the time to the next timeout in milliseconds,
1352  * or maybe a negative value if the timeout is infinite.
1353  */
1354 struct timeval utcp_timeout(struct utcp *utcp) {
1355         struct timeval now;
1356         gettimeofday(&now, NULL);
1357         struct timeval next = {now.tv_sec + 3600, now.tv_usec};
1358
1359         for(int i = 0; i < utcp->nconnections; i++) {
1360                 struct utcp_connection *c = utcp->connections[i];
1361                 if(!c)
1362                         continue;
1363
1364                 // delete connections that have been utcp_close()d.
1365                 if(c->state == CLOSED) {
1366                         if(c->reapable) {
1367                                 debug("Reaping %p\n", c);
1368                                 free_connection(c);
1369                                 i--;
1370                         }
1371                         continue;
1372                 }
1373
1374                 if(timerisset(&c->conn_timeout) && timercmp(&c->conn_timeout, &now, <)) {
1375                         errno = ETIMEDOUT;
1376                         c->state = CLOSED;
1377                         if(c->recv)
1378                                 c->recv(c, NULL, 0);
1379                         continue;
1380                 }
1381
1382                 if(timerisset(&c->rtrx_timeout) && timercmp(&c->rtrx_timeout, &now, <)) {
1383                         debug("retransmit()\n");
1384                         retransmit(c);
1385                 }
1386
1387                 if(c->poll && buffer_free(&c->sndbuf) && (c->state == ESTABLISHED || c->state == CLOSE_WAIT))
1388                         c->poll(c, buffer_free(&c->sndbuf));
1389
1390                 if(timerisset(&c->conn_timeout) && timercmp(&c->conn_timeout, &next, <))
1391                         next = c->conn_timeout;
1392
1393                 if(timerisset(&c->rtrx_timeout) && timercmp(&c->rtrx_timeout, &next, <))
1394                         next = c->rtrx_timeout;
1395         }
1396
1397         struct timeval diff;
1398         timersub(&next, &now, &diff);
1399         return diff;
1400 }
1401
1402 bool utcp_is_active(struct utcp *utcp) {
1403         if(!utcp)
1404                 return false;
1405
1406         for(int i = 0; i < utcp->nconnections; i++)
1407                 if(utcp->connections[i]->state != CLOSED && utcp->connections[i]->state != TIME_WAIT)
1408                         return true;
1409
1410         return false;
1411 }
1412
1413 struct utcp *utcp_init(utcp_accept_t accept, utcp_pre_accept_t pre_accept, utcp_send_t send, void *priv) {
1414         if(!send) {
1415                 errno = EFAULT;
1416                 return NULL;
1417         }
1418
1419         struct utcp *utcp = calloc(1, sizeof *utcp);
1420         if(!utcp)
1421                 return NULL;
1422
1423         utcp->accept = accept;
1424         utcp->pre_accept = pre_accept;
1425         utcp->send = send;
1426         utcp->priv = priv;
1427         utcp->mtu = DEFAULT_MTU;
1428         utcp->timeout = DEFAULT_USER_TIMEOUT; // sec
1429         utcp->rto = START_RTO; // usec
1430
1431         return utcp;
1432 }
1433
1434 void utcp_exit(struct utcp *utcp) {
1435         if(!utcp)
1436                 return;
1437         for(int i = 0; i < utcp->nconnections; i++) {
1438                 if(!utcp->connections[i]->reapable)
1439                         debug("Warning, freeing unclosed connection %p\n", utcp->connections[i]);
1440                 buffer_exit(&utcp->connections[i]->rcvbuf);
1441                 buffer_exit(&utcp->connections[i]->sndbuf);
1442                 free(utcp->connections[i]);
1443         }
1444         free(utcp->connections);
1445         free(utcp);
1446 }
1447
1448 uint16_t utcp_get_mtu(struct utcp *utcp) {
1449         return utcp ? utcp->mtu : 0;
1450 }
1451
1452 void utcp_set_mtu(struct utcp *utcp, uint16_t mtu) {
1453         // TODO: handle overhead of the header
1454         if(utcp)
1455                 utcp->mtu = mtu;
1456 }
1457
1458 int utcp_get_user_timeout(struct utcp *u) {
1459         return u ? u->timeout : 0;
1460 }
1461
1462 void utcp_set_user_timeout(struct utcp *u, int timeout) {
1463         if(u)
1464                 u->timeout = timeout;
1465 }
1466
1467 size_t utcp_get_sndbuf(struct utcp_connection *c) {
1468         return c ? c->sndbuf.maxsize : 0;
1469 }
1470
1471 size_t utcp_get_sndbuf_free(struct utcp_connection *c) {
1472         if(c && (c->state == ESTABLISHED || c->state == CLOSE_WAIT))
1473                 return buffer_free(&c->sndbuf);
1474         else
1475                 return 0;
1476 }
1477
1478 void utcp_set_sndbuf(struct utcp_connection *c, size_t size) {
1479         if(!c)
1480                 return;
1481         c->sndbuf.maxsize = size;
1482         if(c->sndbuf.maxsize != size)
1483                 c->sndbuf.maxsize = -1;
1484 }
1485
1486 size_t utcp_get_rcvbuf(struct utcp_connection *c) {
1487         return c ? c->rcvbuf.maxsize : 0;
1488 }
1489
1490 size_t utcp_get_rcvbuf_free(struct utcp_connection *c) {
1491         if(c && (c->state == ESTABLISHED || c->state == CLOSE_WAIT))
1492                 return buffer_free(&c->rcvbuf);
1493         else
1494                 return 0;
1495 }
1496
1497 void utcp_set_rcvbuf(struct utcp_connection *c, size_t size) {
1498         if(!c)
1499                 return;
1500         c->rcvbuf.maxsize = size;
1501         if(c->rcvbuf.maxsize != size)
1502                 c->rcvbuf.maxsize = -1;
1503 }
1504
1505 bool utcp_get_nodelay(struct utcp_connection *c) {
1506         return c ? c->nodelay : false;
1507 }
1508
1509 void utcp_set_nodelay(struct utcp_connection *c, bool nodelay) {
1510         if(c)
1511                 c->nodelay = nodelay;
1512 }
1513
1514 bool utcp_get_keepalive(struct utcp_connection *c) {
1515         return c ? c->keepalive : false;
1516 }
1517
1518 void utcp_set_keepalive(struct utcp_connection *c, bool keepalive) {
1519         if(c)
1520                 c->keepalive = keepalive;
1521 }
1522
1523 size_t utcp_get_outq(struct utcp_connection *c) {
1524         return c ? seqdiff(c->snd.nxt, c->snd.una) : 0;
1525 }
1526
1527 void utcp_set_recv_cb(struct utcp_connection *c, utcp_recv_t recv) {
1528         if(c)
1529                 c->recv = recv;
1530 }
1531
1532 void utcp_set_poll_cb(struct utcp_connection *c, utcp_poll_t poll) {
1533         if(c)
1534                 c->poll = poll;
1535 }
1536
1537 void utcp_set_accept_cb(struct utcp *utcp, utcp_accept_t accept, utcp_pre_accept_t pre_accept) {
1538         if(utcp) {
1539                 utcp->accept = accept;
1540                 utcp->pre_accept = pre_accept;
1541         }
1542 }