]> git.meshlink.io Git - utcp/blobdiff - utcp.c
Fix the logic for determining whether a packets has an acceptable ack seqno.
[utcp] / utcp.c
diff --git a/utcp.c b/utcp.c
index 74dfc015db89db1fd8e25cd891a1d16498110643..1b498a2897d0c4063b568040c24804c6235f636d 100644 (file)
--- a/utcp.c
+++ b/utcp.c
 } while (0)
 #endif
 
+#ifndef max
+#define max(a, b) ((a) > (b) ? (a) : (b))
+#endif
+
 #ifdef UTCP_DEBUG
 #include <stdarg.h>
 
@@ -103,6 +107,19 @@ static void set_state(struct utcp_connection *c, enum state state) {
        debug("%p new state: %s\n", c->utcp, strstate[state]);
 }
 
+static bool fin_wanted(struct utcp_connection *c, uint32_t seq) {
+       if(seq != c->snd.last)
+               return false;
+       switch(c->state) {
+       case FIN_WAIT_1:
+       case CLOSING:
+       case LAST_ACK:
+               return true;
+       default:
+               return false;
+       }
+}
+
 static inline void list_connections(struct utcp *utcp) {
        debug("%p has %d connections:\n", utcp, utcp->nconnections);
        for(int i = 0; i < utcp->nconnections; i++)
@@ -113,13 +130,106 @@ static int32_t seqdiff(uint32_t a, uint32_t b) {
        return a - b;
 }
 
+// Buffer functions
+// TODO: convert to ringbuffers to avoid memmove() operations.
+
+// Store data into the buffer
+static ssize_t buffer_put_at(struct buffer *buf, size_t offset, const void *data, size_t len) {
+       if(buf->maxsize <= buf->used)
+               return 0;
+
+       debug("buffer_put_at %zu %zu %zu\n", buf->used, offset, len);
+
+       size_t required = offset + len;
+       if(required > buf->maxsize) {
+               if(offset >= buf->maxsize)
+                       return 0;
+               abort();
+               len = buf->maxsize - offset;
+               required = buf->maxsize;
+       }
+
+       if(required > buf->size) {
+               size_t newsize = buf->size;
+               if(!newsize) {
+                       newsize = required;
+               } else {
+                       do {
+                               newsize *= 2;
+                       } while(newsize < buf->used + len);
+               }
+               if(newsize > buf->maxsize)
+                       newsize = buf->maxsize;
+               char *newdata = realloc(buf->data, newsize);
+               if(!newdata)
+                       return -1;
+               buf->data = newdata;
+               buf->size = newsize;
+       }
+
+       memcpy(buf->data + offset, data, len);
+       if(required > buf->used)
+               buf->used = required;
+       return len;
+}
+
+static ssize_t buffer_put(struct buffer *buf, const void *data, size_t len) {
+       return buffer_put_at(buf, buf->used, data, len);
+}
+
+// Get data from the buffer. data can be NULL.
+static ssize_t buffer_get(struct buffer *buf, void *data, size_t len) {
+       if(len > buf->used)
+               len = buf->used;
+       if(data)
+               memcpy(data, buf->data, len);
+       if(len < buf->used)
+               memmove(buf->data, buf->data + len, buf->used - len);
+       buf->used -= len;
+       return len;
+}
+
+// Copy data from the buffer without removing it.
+static ssize_t buffer_copy(struct buffer *buf, void *data, size_t offset, size_t len) {
+       if(offset >= buf->used)
+               return 0;
+       if(offset + len > buf->used)
+               len = buf->used - offset;
+       memcpy(data, buf->data + offset, len);
+       return len;
+}
+
+static bool buffer_init(struct buffer *buf, uint32_t len, uint32_t maxlen) {
+       memset(buf, 0, sizeof *buf);
+       if(len) {
+               buf->data = malloc(len);
+               if(!buf->data)
+                       return false;
+       }
+       buf->size = len;
+       buf->maxsize = maxlen;
+       return true;
+}
+
+static void buffer_exit(struct buffer *buf) {
+       free(buf->data);
+       memset(buf, 0, sizeof *buf);
+}
+
+static uint32_t buffer_free(const struct buffer *buf) {
+       return buf->maxsize - buf->used;
+}
+
 // Connections are stored in a sorted list.
 // This gives O(log(N)) lookup time, O(N log(N)) insertion time and O(N) deletion time.
 
 static int compare(const void *va, const void *vb) {
+       assert(va && vb);
+
        const struct utcp_connection *a = *(struct utcp_connection **)va;
        const struct utcp_connection *b = *(struct utcp_connection **)vb;
 
+       assert(a && b);
        assert(a->src && b->src);
 
        int c = (int)a->src - (int)b->src;
@@ -147,10 +257,10 @@ static void free_connection(struct utcp_connection *c) {
        assert(cp);
 
        int i = cp - utcp->connections;
-       memmove(cp + i, cp + i + 1, (utcp->nconnections - i - 1) * sizeof *cp);
+       memmove(cp, cp + 1, (utcp->nconnections - i - 1) * sizeof *cp);
        utcp->nconnections--;
 
-       free(c->sndbuf);
+       buffer_exit(&c->sndbuf);
        free(c);
 }
 
@@ -189,10 +299,12 @@ static struct utcp_connection *allocate_connection(struct utcp *utcp, uint16_t s
        if(!c)
                return NULL;
 
-       c->sndbufsize = DEFAULT_SNDBUFSIZE;
-       c->maxsndbufsize = DEFAULT_MAXSNDBUFSIZE;
-       c->sndbuf = malloc(c->sndbufsize);
-       if(!c->sndbuf) {
+       if(!buffer_init(&c->sndbuf, DEFAULT_SNDBUFSIZE, DEFAULT_MAXSNDBUFSIZE)) {
+               free(c);
+               return NULL;
+       }
+
+       if(!buffer_init(&c->rcvbuf, DEFAULT_RCVBUFSIZE, DEFAULT_MAXRCVBUFSIZE)) {
                free(c);
                return NULL;
        }
@@ -201,7 +313,11 @@ static struct utcp_connection *allocate_connection(struct utcp *utcp, uint16_t s
 
        c->src = src;
        c->dst = dst;
+#ifdef UTCP_DEBUG
+       c->snd.iss = 0;
+#else
        c->snd.iss = rand();
+#endif
        c->snd.una = c->snd.iss;
        c->snd.nxt = c->snd.iss + 1;
        c->rcv.wnd = utcp->mtu;
@@ -217,6 +333,46 @@ static struct utcp_connection *allocate_connection(struct utcp *utcp, uint16_t s
        return c;
 }
 
+// Update RTT variables. See RFC 6298.
+static void update_rtt(struct utcp_connection *c, uint32_t rtt) {
+       if(!rtt) {
+               debug("invalid rtt\n");
+               return;
+       }
+
+       struct utcp *utcp = c->utcp;
+
+       if(!utcp->srtt) {
+               utcp->srtt = rtt;
+               utcp->rttvar = rtt / 2;
+               utcp->rto = rtt + max(2 * rtt, CLOCK_GRANULARITY);
+       } else {
+               utcp->rttvar = (utcp->rttvar * 3 + abs(utcp->srtt - rtt)) / 4;
+               utcp->srtt = (utcp->srtt * 7 + rtt) / 8;
+               utcp->rto = utcp->srtt + max(utcp->rttvar, CLOCK_GRANULARITY);
+       }
+
+       if(utcp->rto > MAX_RTO)
+               utcp->rto = MAX_RTO;
+
+       debug("rtt %u srtt %u rttvar %u rto %u\n", rtt, utcp->srtt, utcp->rttvar, utcp->rto);
+}
+
+static void start_retransmit_timer(struct utcp_connection *c) {
+       gettimeofday(&c->rtrx_timeout, NULL);
+       c->rtrx_timeout.tv_usec += c->utcp->rto;
+       while(c->rtrx_timeout.tv_usec >= 1000000) {
+               c->rtrx_timeout.tv_usec -= 1000000;
+               c->rtrx_timeout.tv_sec++;
+       }
+       debug("timeout set to %lu.%06lu (%u)\n", c->rtrx_timeout.tv_sec, c->rtrx_timeout.tv_usec, c->utcp->rto);
+}
+
+static void stop_retransmit_timer(struct utcp_connection *c) {
+       timerclear(&c->rtrx_timeout);
+       debug("timeout cleared\n");
+}
+
 struct utcp_connection *utcp_connect(struct utcp *utcp, uint16_t dst, utcp_recv_t recv, void *priv) {
        struct utcp_connection *c = allocate_connection(utcp, 0, dst);
        if(!c)
@@ -261,7 +417,7 @@ void utcp_accept(struct utcp_connection *c, utcp_recv_t recv, void *priv) {
 static void ack(struct utcp_connection *c, bool sendatleastone) {
        int32_t left = seqdiff(c->snd.last, c->snd.nxt);
        int32_t cwndleft = c->snd.cwnd - seqdiff(c->snd.nxt, c->snd.una);
-       char *data = c->sndbuf + seqdiff(c->snd.nxt, c->snd.una);
+       debug("cwndleft = %d\n", cwndleft);
 
        assert(left >= 0);
 
@@ -280,7 +436,7 @@ static void ack(struct utcp_connection *c, bool sendatleastone) {
        } *pkt;
 
        pkt = malloc(sizeof pkt->hdr + c->utcp->mtu);
-       if(!pkt->data)
+       if(!pkt)
                return;
 
        pkt->hdr.src = c->src;
@@ -294,22 +450,21 @@ static void ack(struct utcp_connection *c, bool sendatleastone) {
                uint32_t seglen = left > c->utcp->mtu ? c->utcp->mtu : left;
                pkt->hdr.seq = c->snd.nxt;
 
-               memcpy(pkt->data, data, seglen);
+               buffer_copy(&c->sndbuf, pkt->data, seqdiff(c->snd.nxt, c->snd.una), seglen);
 
                c->snd.nxt += seglen;
-               data += seglen;
                left -= seglen;
 
-               if(c->state != ESTABLISHED && !left && seglen) {
-                       switch(c->state) {
-                       case FIN_WAIT_1:
-                       case CLOSING:
-                               seglen--;
-                               pkt->hdr.ctl |= FIN;
-                               break;
-                       default:
-                               break;
-                       }
+               if(seglen && fin_wanted(c, c->snd.nxt)) {
+                       seglen--;
+                       pkt->hdr.ctl |= FIN;
+               }
+
+               if(!c->rtt_start.tv_sec) {
+                       // Start RTT measurement
+                       gettimeofday(&c->rtt_start, NULL);
+                       c->rtt_seq = pkt->hdr.seq + seglen;
+                       debug("Starting RTT measurement, expecting ack %u\n", c->rtt_seq);
                }
 
                print_packet(c->utcp, "send", pkt, sizeof pkt->hdr + seglen);
@@ -357,45 +512,16 @@ ssize_t utcp_send(struct utcp_connection *c, const void *data, size_t len) {
                return -1;
        }
 
-       uint32_t bufused = seqdiff(c->snd.nxt, c->snd.una);
-
-       /* Check our send buffer.
-        * - If it's big enough, just put the data in there.
-        * - If not, decide whether to enlarge if possible.
-        * - Cap len so it doesn't overflow our buffer.
-        */
-
-       if(len > c->sndbufsize - bufused && c->sndbufsize < c->maxsndbufsize) {
-               uint32_t newbufsize;
-               if(c->sndbufsize > c->maxsndbufsize / 2)
-                       newbufsize = c->maxsndbufsize;
-               else
-                       newbufsize = c->sndbufsize * 2;
-               if(bufused + len > newbufsize) {
-                       if(bufused + len > c->maxsndbufsize)
-                               newbufsize = c->maxsndbufsize;
-                       else
-                               newbufsize = bufused + len;
-               }
-               char *newbuf = realloc(c->sndbuf, newbufsize);
-               if(newbuf) {
-                       c->sndbuf = newbuf;
-                       c->sndbufsize = newbufsize;
-               }
-       }
-
-       if(len > c->sndbufsize - bufused)
-               len = c->sndbufsize - bufused;
-
-       if(!len) {
-               errno == EWOULDBLOCK;
+       len = buffer_put(&c->sndbuf, data, len);
+       if(len <= 0) {
+               errno = EWOULDBLOCK;
                return 0;
        }
 
-       memcpy(c->sndbuf + bufused, data, len);
        c->snd.last += len;
-
        ack(c, false);
+       if(!timerisset(&c->rtrx_timeout))
+               start_retransmit_timer(c);
        return len;
 }
 
@@ -405,6 +531,198 @@ static void swap_ports(struct hdr *hdr) {
        hdr->dst = tmp;
 }
 
+static void retransmit(struct utcp_connection *c) {
+       if(c->state == CLOSED || c->snd.nxt == c->snd.una)
+               return;
+
+       struct utcp *utcp = c->utcp;
+
+       struct {
+               struct hdr hdr;
+               char data[];
+       } *pkt;
+
+       pkt = malloc(sizeof pkt->hdr + c->utcp->mtu);
+       if(!pkt)
+               return;
+
+       pkt->hdr.src = c->src;
+       pkt->hdr.dst = c->dst;
+
+       switch(c->state) {
+               case SYN_SENT:
+                       // Send our SYN again
+                       pkt->hdr.seq = c->snd.iss;
+                       pkt->hdr.ack = 0;
+                       pkt->hdr.wnd = c->rcv.wnd;
+                       pkt->hdr.ctl = SYN;
+                       print_packet(c->utcp, "rtrx", pkt, sizeof pkt->hdr);
+                       utcp->send(utcp, pkt, sizeof pkt->hdr);
+                       break;
+
+               case SYN_RECEIVED:
+                       // Send SYNACK again
+                       pkt->hdr.seq = c->snd.nxt;
+                       pkt->hdr.ack = c->rcv.nxt;
+                       pkt->hdr.ctl = SYN | ACK;
+                       print_packet(c->utcp, "rtrx", pkt, sizeof pkt->hdr);
+                       utcp->send(utcp, pkt, sizeof pkt->hdr);
+                       break;
+
+               case ESTABLISHED:
+               case FIN_WAIT_1:
+               case CLOSE_WAIT:
+               case CLOSING:
+               case LAST_ACK:
+                       // Send unacked data again.
+                       pkt->hdr.seq = c->snd.una;
+                       pkt->hdr.ack = c->rcv.nxt;
+                       pkt->hdr.ctl = ACK;
+                       uint32_t len = seqdiff(c->snd.last, c->snd.una);
+                       if(len > utcp->mtu)
+                               len = utcp->mtu;
+                       if(fin_wanted(c, c->snd.una + len)) {
+                               len--;
+                               pkt->hdr.ctl |= FIN;
+                       }
+                       c->snd.nxt = c->snd.una + len;
+                       c->snd.cwnd = utcp->mtu; // reduce cwnd on retransmit
+                       buffer_copy(&c->sndbuf, pkt->data, 0, len);
+                       print_packet(c->utcp, "rtrx", pkt, sizeof pkt->hdr + len);
+                       utcp->send(utcp, pkt, sizeof pkt->hdr + len);
+                       break;
+
+               case CLOSED:
+               case LISTEN:
+               case TIME_WAIT:
+               case FIN_WAIT_2:
+                       // We shouldn't need to retransmit anything in this state.
+#ifdef UTCP_DEBUG
+                       abort();
+#endif
+                       stop_retransmit_timer(c);
+                       goto cleanup;
+       }
+
+       start_retransmit_timer(c);
+       utcp->rto *= 2;
+       if(utcp->rto > MAX_RTO)
+               utcp->rto = MAX_RTO;
+       c->rtt_start.tv_sec = 0; // invalidate RTT timer
+
+cleanup:
+       free(pkt);
+}
+
+// Update receive buffer and SACK entries after consuming data.
+static void sack_consume(struct utcp_connection *c, size_t len) {
+       debug("sack_consume %zu\n", len);
+       if(len > c->rcvbuf.used)
+               abort();
+
+       buffer_get(&c->rcvbuf, NULL, len);
+
+       for(int i = 0; i < NSACKS && c->sacks[i].len; ) {
+               if(len < c->sacks[i].offset) {
+                       c->sacks[i].offset -= len;
+                       i++;
+               } else if(len < c->sacks[i].offset + c->sacks[i].len) {
+                       c->sacks[i].offset = 0;
+                       c->sacks[i].len -= len - c->sacks[i].offset;
+                       i++;
+               } else {
+                       if(i < NSACKS - 1) {
+                               memmove(&c->sacks[i], &c->sacks[i + 1], (NSACKS - 1 - i) * sizeof c->sacks[i]);
+                               c->sacks[i + 1].len = 0;
+                       } else {
+                               c->sacks[i].len = 0;
+                               break;
+                       }
+               }
+       }
+
+       for(int i = 0; i < NSACKS && c->sacks[i].len; i++)
+               debug("SACK[%d] offset %u len %u\n", i, c->sacks[i].offset, c->sacks[i].len);
+}
+
+static void handle_out_of_order(struct utcp_connection *c, uint32_t offset, const void *data, size_t len) {
+       debug("out of order packet, offset %u\n", offset);
+       // Packet loss or reordering occured. Store the data in the buffer.
+       ssize_t rxd = buffer_put_at(&c->rcvbuf, offset, data, len);
+       if(rxd < len)
+               abort();
+
+       // Make note of where we put it.
+       for(int i = 0; i < NSACKS; i++) {
+               if(!c->sacks[i].len) { // nothing to merge, add new entry
+                       debug("New SACK entry %d\n", i);
+                       c->sacks[i].offset = offset;
+                       c->sacks[i].len = rxd;
+                       break;
+               } else if(offset < c->sacks[i].offset) {
+                       if(offset + rxd < c->sacks[i].offset) { // insert before
+                               if(!c->sacks[NSACKS - 1].len) { // only if room left
+                                       debug("Insert SACK entry at %d\n", i);
+                                       memmove(&c->sacks[i + 1], &c->sacks[i], (NSACKS - i - 1) * sizeof c->sacks[i]);
+                                       c->sacks[i].offset = offset;
+                                       c->sacks[i].len = rxd;
+                               }
+                               break;
+                       } else { // merge
+                               debug("Merge with start of SACK entry at %d\n", i);
+                               c->sacks[i].offset = offset;
+                               break;
+                       }
+               } else if(offset <= c->sacks[i].offset + c->sacks[i].len) {
+                       if(offset + rxd > c->sacks[i].offset + c->sacks[i].len) { // merge
+                               debug("Merge with end of SACK entry at %d\n", i);
+                               c->sacks[i].len = offset + rxd - c->sacks[i].offset;
+                               // TODO: handle potential merge with next entry
+                       }
+                       break;
+               }
+       }
+
+       for(int i = 0; i < NSACKS && c->sacks[i].len; i++)
+               debug("SACK[%d] offset %u len %u\n", i, c->sacks[i].offset, c->sacks[i].len);
+}
+
+static void handle_in_order(struct utcp_connection *c, const void *data, size_t len) {
+       // Check if we can process out-of-order data now.
+       if(c->sacks[0].len && len >= c->sacks[0].offset) { // TODO: handle overlap with second SACK
+               debug("incoming packet len %zu connected with SACK at %u\n", len, c->sacks[0].offset);
+               buffer_put_at(&c->rcvbuf, 0, data, len); // TODO: handle return value
+               len = max(len, c->sacks[0].offset + c->sacks[0].len);
+               data = c->rcvbuf.data;
+       }
+
+       if(c->recv) {
+               ssize_t rxd = c->recv(c, data, len);
+               if(rxd != len) {
+                       // TODO: handle the application not accepting all data.
+                       abort();
+               }
+       }
+
+       if(c->rcvbuf.used)
+               sack_consume(c, len);
+
+       c->rcv.nxt += len;
+}
+
+
+static void handle_incoming_data(struct utcp_connection *c, uint32_t seq, const void *data, size_t len) {
+       uint32_t offset = seqdiff(seq, c->rcv.nxt);
+       if(offset + len > c->rcvbuf.maxsize)
+               abort();
+
+       if(offset)
+               handle_out_of_order(c, offset, data, len);
+       else
+               handle_in_order(c, data, len);
+}
+
+
 ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) {
        if(!utcp) {
                errno = EFAULT;
@@ -502,6 +820,8 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) {
 
        // It is for an existing connection.
 
+       uint32_t prevrcvnxt = c->rcv.nxt;
+
        // 1. Drop invalid packets.
 
        // 1a. Drop packets that should not happen in our current state.
@@ -518,7 +838,10 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) {
        case TIME_WAIT:
                break;
        default:
+#ifdef UTCP_DEBUG
                abort();
+#endif
+               break;
        }
 
        // 1b. Drop packets with a sequence number not in our receive window.
@@ -529,42 +852,35 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) {
                acceptable = true;
 
        // TODO: handle packets overlapping c->rcv.nxt.
-#if 0
+#if 1
        // Only use this when accepting out-of-order packets.
        else if(len == 0)
-               if(c->rcv.wnd == 0)
-                       acceptable = hdr.seq == c->rcv.nxt;
-               else
-                       acceptable = (seqdiff(hdr.seq, c->rcv.nxt) >= 0 && seqdiff(hdr.seq, c->rcv.nxt + c->rcv.wnd) < 0);
+               acceptable = seqdiff(hdr.seq, c->rcv.nxt) >= 0;
        else
-               if(c->rcv.wnd == 0)
-                       // We don't accept data when the receive window is zero.
-                       acceptable = false;
-               else
-                       // Both start and end of packet must be within the receive window
-                       acceptable = (seqdiff(hdr.seq, c->rcv.nxt) >= 0 && seqdiff(hdr.seq, c->rcv.nxt + c->rcv.wnd) < 0)
-                               || (seqdiff(hdr.seq + len + 1, c->rcv.nxt) >= 0 && seqdiff(hdr.seq + len - 1, c->rcv.nxt + c->rcv.wnd) < 0);
+               acceptable = seqdiff(hdr.seq, c->rcv.nxt) >= 0 && seqdiff(hdr.seq, c->rcv.nxt) + len <= c->rcvbuf.maxsize;
 #else
        if(c->state != SYN_SENT)
                acceptable = hdr.seq == c->rcv.nxt;
 #endif
 
        if(!acceptable) {
-               debug("Packet not acceptable, %u  <= %u + %zu < %u\n", c->rcv.nxt, hdr.seq, len, c->rcv.nxt + c->rcv.wnd);
+               debug("Packet not acceptable, %u <= %u + %zu < %u\n", c->rcv.nxt, hdr.seq, len, c->rcv.nxt + c->rcvbuf.maxsize);
                // Ignore unacceptable RST packets.
                if(hdr.ctl & RST)
                        return 0;
                // Otherwise, send an ACK back in the hope things improve.
-               goto ack;
+               ack(c, true);
+               return 0;
        }
 
        c->snd.wnd = hdr.wnd; // TODO: move below
 
        // 1c. Drop packets with an invalid ACK.
-       // ackno should not roll back, and it should also not be bigger than snd.nxt.
+       // ackno should not roll back, and it should also not be bigger than what we ever could have sent
+       // (= snd.una + c->sndbuf.used).
 
-       if(hdr.ctl & ACK && (seqdiff(hdr.ack, c->snd.nxt) > 0 || seqdiff(hdr.ack, c->snd.una) < 0)) {
-               debug("Packet ack seqno out of range, %u %u %u\n", hdr.ack, c->snd.una, c->snd.nxt);
+       if(hdr.ctl & ACK && (seqdiff(hdr.ack, c->snd.last) > 0 || seqdiff(hdr.ack, c->snd.una) < 0)) {
+               debug("Packet ack seqno out of range, %u <= %u < %u\n", c->snd.una, hdr.ack, c->snd.una + c->sndbuf.used);
                // Ignore unacceptable RST packets.
                if(hdr.ctl & RST)
                        return 0;
@@ -617,16 +933,33 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) {
                        set_state(c, CLOSED);
                        return 0;
                default:
+#ifdef UTCP_DEBUG
                        abort();
+#endif
+                       break;
                }
        }
 
        // 3. Advance snd.una
 
        uint32_t advanced = seqdiff(hdr.ack, c->snd.una);
-       uint32_t prevrcvnxt = c->rcv.nxt;
+       prevrcvnxt = c->rcv.nxt;
 
        if(advanced) {
+               // RTT measurement
+               if(c->rtt_start.tv_sec) {
+                       if(c->rtt_seq == hdr.ack) {
+                               struct timeval now, diff;
+                               gettimeofday(&now, NULL);
+                               timersub(&now, &c->rtt_start, &diff);
+                               update_rtt(c, diff.tv_sec * 1000000 + diff.tv_usec);
+                               c->rtt_start.tv_sec = 0;
+                       } else if(c->rtt_seq < hdr.ack) {
+                               debug("Cancelling RTT measurement: %u < %u\n", c->rtt_seq, hdr.ack);
+                               c->rtt_start.tv_sec = 0;
+                       }
+               }
+
                int32_t data_acked = advanced;
 
                switch(c->state) {
@@ -644,18 +977,19 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) {
                int32_t bufused = seqdiff(c->snd.last, c->snd.una);
                assert(data_acked <= bufused);
 
-               // Make room in the send buffer.
-               // TODO: try to avoid memmoving too much. Circular buffer?
-               uint32_t left = bufused - data_acked;
-               if(data_acked && left)
-                       memmove(c->sndbuf, c->sndbuf + data_acked, left);
+               if(data_acked)
+                       buffer_get(&c->sndbuf, NULL, data_acked);
+
+               // Also advance snd.nxt if possible
+               if(seqdiff(c->snd.nxt, hdr.ack) < 0)
+                       c->snd.nxt = hdr.ack;
 
                c->snd.una = hdr.ack;
 
                c->dupack = 0;
                c->snd.cwnd += utcp->mtu;
-               if(c->snd.cwnd > c->maxsndbufsize)
-                       c->snd.cwnd = c->maxsndbufsize;
+               if(c->snd.cwnd > c->sndbuf.maxsize)
+                       c->snd.cwnd = c->sndbuf.maxsize;
 
                // Check if we have sent a FIN that is now ACKed.
                switch(c->state) {
@@ -676,10 +1010,13 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) {
        } else {
                if(!len) {
                        c->dupack++;
-                       if(c->dupack >= 3) {
+                       if(c->dupack == 3) {
                                debug("Triplicate ACK\n");
                                //TODO: Resend one packet and go to fast recovery mode. See RFC 6582.
-                               //abort();
+                               //We do a very simple variant here; reset the nxt pointer to the last acknowledged packet from the peer.
+                               //Reset the congestion window so we wait for ACKs.
+                               c->snd.nxt = c->snd.una;
+                               c->snd.cwnd = utcp->mtu;
                        }
                }
        }
@@ -688,8 +1025,10 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) {
 
        if(advanced) {
                timerclear(&c->conn_timeout); // It will be set anew in utcp_timeout() if c->snd.una != c->snd.nxt.
-               if(c->snd.una == c->snd.nxt)
-                       timerclear(&c->rtrx_timeout);
+               if(c->snd.una == c->snd.last)
+                       stop_retransmit_timer(c);
+               else
+                       start_retransmit_timer(c);
        }
 
        // 5. Process SYN stuff
@@ -716,7 +1055,10 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) {
                        // Ehm, no. We should never receive a second SYN.
                        goto reset;
                default:
+#ifdef UTCP_DEBUG
                        abort();
+#endif
+                       return 0;
                }
 
                // SYN counts as one sequence number
@@ -746,7 +1088,10 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) {
                case SYN_SENT:
                case SYN_RECEIVED:
                        // This should never happen.
+#ifdef UTCP_DEBUG
                        abort();
+#endif
+                       return 0;
                case ESTABLISHED:
                case FIN_WAIT_1:
                case FIN_WAIT_2:
@@ -758,37 +1103,26 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) {
                        // Ehm no, We should never receive more data after a FIN.
                        goto reset;
                default:
+#ifdef UTCP_DEBUG
                        abort();
+#endif
+                       return 0;
                }
 
-               ssize_t rxd;
-
-               if(c->recv) {
-                       rxd = c->recv(c, data, len);
-                       if(rxd != len) {
-                               // TODO: once we have a receive buffer, handle the application not accepting all data.
-                               fprintf(stderr, "c->recv(%p, %p, %zu) returned %zd\n", c, data, len, rxd);
-                               abort();
-                       }
-                       if(rxd < 0)
-                               rxd = 0;
-                       else if(rxd > len)
-                               rxd = len; // Bad application, bad!
-               } else {
-                       rxd = len;
-               }
-
-               c->rcv.nxt += len;
+               handle_incoming_data(c, hdr.seq, data, len);
        }
 
        // 7. Process FIN stuff
 
-       if(hdr.ctl & FIN) {
+       if((hdr.ctl & FIN) && hdr.seq + len == c->rcv.nxt) {
                switch(c->state) {
                case SYN_SENT:
                case SYN_RECEIVED:
                        // This should never happen.
+#ifdef UTCP_DEBUG
                        abort();
+#endif
+                       break;
                case ESTABLISHED:
                        set_state(c, CLOSE_WAIT);
                        break;
@@ -807,7 +1141,10 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) {
                        // Ehm, no. We should never receive a second FIN.
                        goto reset;
                default:
+#ifdef UTCP_DEBUG
                        abort();
+#endif
+                       break;
                }
 
                // FIN counts as one sequence number
@@ -849,7 +1186,7 @@ reset:
 }
 
 int utcp_shutdown(struct utcp_connection *c, int dir) {
-       debug("%p shutdown %d\n", c ? c->utcp : NULL, dir);
+       debug("%p shutdown %d at %u\n", c ? c->utcp : NULL, dir, c ? c->snd.last : 0);
        if(!c) {
                errno = EFAULT;
                return -1;
@@ -861,12 +1198,26 @@ int utcp_shutdown(struct utcp_connection *c, int dir) {
                return -1;
        }
 
-       // TODO: handle dir
+       if(!(dir == UTCP_SHUT_RD || dir == UTCP_SHUT_WR || dir == UTCP_SHUT_RDWR)) {
+               errno = EINVAL;
+               return -1;
+       }
+
+       // TCP does not have a provision for stopping incoming packets.
+       // The best we can do is to just ignore them.
+       if(dir == UTCP_SHUT_RD || dir == UTCP_SHUT_RDWR)
+               c->recv = NULL;
+
+       // The rest of the code deals with shutting down writes.
+       if(dir == UTCP_SHUT_RD)
+               return 0;
 
        switch(c->state) {
        case CLOSED:
-               return 0;
        case LISTEN:
+               errno = ENOTCONN;
+               return -1;
+
        case SYN_SENT:
                set_state(c, CLOSED);
                return 0;
@@ -897,6 +1248,8 @@ int utcp_shutdown(struct utcp_connection *c, int dir) {
 int utcp_close(struct utcp_connection *c) {
        if(utcp_shutdown(c, SHUT_RDWR))
                return -1;
+       c->recv = NULL;
+       c->poll = NULL;
        c->reapable = true;
        return 0;
 }
@@ -913,6 +1266,8 @@ int utcp_abort(struct utcp_connection *c) {
                return -1;
        }
 
+       c->recv = NULL;
+       c->poll = NULL;
        c->reapable = true;
 
        switch(c->state) {
@@ -951,80 +1306,13 @@ int utcp_abort(struct utcp_connection *c) {
        return 0;
 }
 
-static void retransmit(struct utcp_connection *c) {
-       if(c->state == CLOSED || c->snd.nxt == c->snd.una)
-               return;
-
-       struct utcp *utcp = c->utcp;
-
-       struct {
-               struct hdr hdr;
-               char *data;
-       } pkt;
-
-       pkt.data = malloc(c->utcp->mtu);
-       if(!pkt.data)
-               return;
-
-       pkt.hdr.src = c->src;
-       pkt.hdr.dst = c->dst;
-
-       switch(c->state) {
-               case LISTEN:
-                       // TODO: this should not happen
-                       break;
-
-               case SYN_SENT:
-                       pkt.hdr.seq = c->snd.iss;
-                       pkt.hdr.ack = 0;
-                       pkt.hdr.wnd = c->rcv.wnd;
-                       pkt.hdr.ctl = SYN;
-                       print_packet(c->utcp, "rtrx", &pkt, sizeof pkt.hdr);
-                       utcp->send(utcp, &pkt, sizeof pkt.hdr);
-                       break;
-
-               case SYN_RECEIVED:
-                       pkt.hdr.seq = c->snd.nxt;
-                       pkt.hdr.ack = c->rcv.nxt;
-                       pkt.hdr.ctl = SYN | ACK;
-                       print_packet(c->utcp, "rtrx", &pkt, sizeof pkt.hdr);
-                       utcp->send(utcp, &pkt, sizeof pkt.hdr);
-                       break;
-
-               case ESTABLISHED:
-               case FIN_WAIT_1:
-                       pkt.hdr.seq = c->snd.una;
-                       pkt.hdr.ack = c->rcv.nxt;
-                       pkt.hdr.ctl = ACK;
-                       uint32_t len = seqdiff(c->snd.nxt, c->snd.una);
-                       if(c->state == FIN_WAIT_1)
-                               len--;
-                       if(len > utcp->mtu)
-                               len = utcp->mtu;
-                       else {
-                               if(c->state == FIN_WAIT_1)
-                                       pkt.hdr.ctl |= FIN;
-                       }
-                       memcpy(pkt.data, c->sndbuf, len);
-                       print_packet(c->utcp, "rtrx", &pkt, sizeof pkt.hdr + len);
-                       utcp->send(utcp, &pkt, sizeof pkt.hdr + len);
-                       break;
-
-               default:
-                       // TODO: implement
-                       abort();
-       }
-
-       free(pkt.data);
-}
-
 /* Handle timeouts.
  * One call to this function will loop through all connections,
  * checking if something needs to be resent or not.
  * The return value is the time to the next timeout in milliseconds,
  * or maybe a negative value if the timeout is infinite.
  */
-int utcp_timeout(struct utcp *utcp) {
+struct timeval utcp_timeout(struct utcp *utcp) {
        struct timeval now;
        gettimeofday(&now, NULL);
        struct timeval next = {now.tv_sec + 3600, now.tv_usec};
@@ -1055,46 +1343,49 @@ int utcp_timeout(struct utcp *utcp) {
                        retransmit(c);
                }
 
-               if(c->poll && c->sndbufsize < c->maxsndbufsize / 2)
-                       c->poll(c, c->maxsndbufsize - c->sndbufsize);
+               if(c->poll && buffer_free(&c->sndbuf) && (c->state == ESTABLISHED || c->state == CLOSE_WAIT))
+                       c->poll(c, buffer_free(&c->sndbuf));
 
                if(timerisset(&c->conn_timeout) && timercmp(&c->conn_timeout, &next, <))
                        next = c->conn_timeout;
 
-               if(c->snd.nxt != c->snd.una) {
-                       c->rtrx_timeout = now;
-                       c->rtrx_timeout.tv_sec++;
-               } else {
-                       timerclear(&c->rtrx_timeout);
-               }
-
                if(timerisset(&c->rtrx_timeout) && timercmp(&c->rtrx_timeout, &next, <))
                        next = c->rtrx_timeout;
        }
 
        struct timeval diff;
        timersub(&next, &now, &diff);
-       if(diff.tv_sec < 0)
-               return 0;
-       return diff.tv_sec * 1000 + diff.tv_usec / 1000;
+       return diff;
 }
 
-struct utcp *utcp_init(utcp_accept_t accept, utcp_pre_accept_t pre_accept, utcp_send_t send, void *priv) {
-       struct utcp *utcp = calloc(1, sizeof *utcp);
+bool utcp_is_active(struct utcp *utcp) {
        if(!utcp)
-               return NULL;
+               return false;
+
+       for(int i = 0; i < utcp->nconnections; i++)
+               if(utcp->connections[i]->state != CLOSED && utcp->connections[i]->state != TIME_WAIT)
+                       return true;
+
+       return false;
+}
 
+struct utcp *utcp_init(utcp_accept_t accept, utcp_pre_accept_t pre_accept, utcp_send_t send, void *priv) {
        if(!send) {
                errno = EFAULT;
                return NULL;
        }
 
+       struct utcp *utcp = calloc(1, sizeof *utcp);
+       if(!utcp)
+               return NULL;
+
        utcp->accept = accept;
        utcp->pre_accept = pre_accept;
        utcp->send = send;
        utcp->priv = priv;
-       utcp->mtu = 1000;
-       utcp->timeout = 60;
+       utcp->mtu = DEFAULT_MTU;
+       utcp->timeout = DEFAULT_USER_TIMEOUT; // s
+       utcp->rto = START_RTO; // us
 
        return utcp;
 }
@@ -1105,7 +1396,7 @@ void utcp_exit(struct utcp *utcp) {
        for(int i = 0; i < utcp->nconnections; i++) {
                if(!utcp->connections[i]->reapable)
                        debug("Warning, freeing unclosed connection %p\n", utcp->connections[i]);
-               free(utcp->connections[i]->sndbuf);
+               buffer_exit(&utcp->connections[i]->sndbuf);
                free(utcp->connections[i]);
        }
        free(utcp->connections);
@@ -1113,60 +1404,97 @@ void utcp_exit(struct utcp *utcp) {
 }
 
 uint16_t utcp_get_mtu(struct utcp *utcp) {
-       return utcp->mtu;
+       return utcp ? utcp->mtu : 0;
 }
 
 void utcp_set_mtu(struct utcp *utcp, uint16_t mtu) {
        // TODO: handle overhead of the header
-       utcp->mtu = mtu;
+       if(utcp)
+               utcp->mtu = mtu;
 }
 
 int utcp_get_user_timeout(struct utcp *u) {
-       return u->timeout;
+       return u ? u->timeout : 0;
 }
 
 void utcp_set_user_timeout(struct utcp *u, int timeout) {
-       u->timeout = timeout;
+       if(u)
+               u->timeout = timeout;
 }
 
 size_t utcp_get_sndbuf(struct utcp_connection *c) {
-       return c->maxsndbufsize;
+       return c ? c->sndbuf.maxsize : 0;
 }
 
 size_t utcp_get_sndbuf_free(struct utcp_connection *c) {
-       return c->maxsndbufsize - c->sndbufsize;
+       if(c && (c->state == ESTABLISHED || c->state == CLOSE_WAIT))
+               return buffer_free(&c->sndbuf);
+       else
+               return 0;
 }
 
 void utcp_set_sndbuf(struct utcp_connection *c, size_t size) {
-       c->maxsndbufsize = size;
-       if(c->maxsndbufsize != size)
-               c->maxsndbufsize = -1;
+       if(!c)
+               return;
+       c->sndbuf.maxsize = size;
+       if(c->sndbuf.maxsize != size)
+               c->sndbuf.maxsize = -1;
+}
+
+size_t utcp_get_rcvbuf(struct utcp_connection *c) {
+       return c ? c->rcvbuf.maxsize : 0;
+}
+
+size_t utcp_get_rcvbuf_free(struct utcp_connection *c) {
+       if(c && (c->state == ESTABLISHED || c->state == CLOSE_WAIT))
+               return buffer_free(&c->rcvbuf);
+       else
+               return 0;
+}
+
+void utcp_set_rcvbuf(struct utcp_connection *c, size_t size) {
+       if(!c)
+               return;
+       c->rcvbuf.maxsize = size;
+       if(c->rcvbuf.maxsize != size)
+               c->rcvbuf.maxsize = -1;
 }
 
 bool utcp_get_nodelay(struct utcp_connection *c) {
-       return c->nodelay;
+       return c ? c->nodelay : false;
 }
 
 void utcp_set_nodelay(struct utcp_connection *c, bool nodelay) {
-       c->nodelay = nodelay;
+       if(c)
+               c->nodelay = nodelay;
 }
 
 bool utcp_get_keepalive(struct utcp_connection *c) {
-       return c->keepalive;
+       return c ? c->keepalive : false;
 }
 
 void utcp_set_keepalive(struct utcp_connection *c, bool keepalive) {
-       c->keepalive = keepalive;
+       if(c)
+               c->keepalive = keepalive;
 }
 
 size_t utcp_get_outq(struct utcp_connection *c) {
-       return seqdiff(c->snd.nxt, c->snd.una);
+       return c ? seqdiff(c->snd.nxt, c->snd.una) : 0;
 }
 
 void utcp_set_recv_cb(struct utcp_connection *c, utcp_recv_t recv) {
-       c->recv = recv;
+       if(c)
+               c->recv = recv;
 }
 
 void utcp_set_poll_cb(struct utcp_connection *c, utcp_poll_t poll) {
-       c->poll = poll;
+       if(c)
+               c->poll = poll;
+}
+
+void utcp_set_accept_cb(struct utcp *utcp, utcp_accept_t accept, utcp_pre_accept_t pre_accept) {
+       if(utcp) {
+               utcp->accept = accept;
+               utcp->pre_accept = pre_accept;
+       }
 }