X-Git-Url: http://git.meshlink.io/?p=utcp;a=blobdiff_plain;f=utcp.c;h=04d2b194c20f193aa44f2d4006b9afc1bd356b29;hp=7c577018888b0fd99b7cac6f9d66e29b9a0fd91c;hb=HEAD;hpb=64ef5642d83fc13829262ec6c1bddb6a20d9aa7a diff --git a/utcp.c b/utcp.c index 7c57701..04d2b19 100644 --- a/utcp.c +++ b/utcp.c @@ -1,6 +1,6 @@ /* utcp.c -- Userspace TCP - Copyright (C) 2014 Guus Sliepen + Copyright (C) 2014-2017 Guus Sliepen This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -27,8 +27,7 @@ #include #include #include -#include -#include +#include #include "utcp_priv.h" @@ -44,99 +43,156 @@ #undef poll #endif -#ifndef timersub -#define timersub(a, b, r) do {\ - (r)->tv_sec = (a)->tv_sec - (b)->tv_sec;\ - (r)->tv_usec = (a)->tv_usec - (b)->tv_usec;\ - if((r)->tv_usec < 0)\ - (r)->tv_sec--, (r)->tv_usec += 1000000;\ -} while (0) +#ifndef UTCP_CLOCK +#if defined(CLOCK_MONOTONIC_RAW) && defined(__x86_64__) +#define UTCP_CLOCK CLOCK_MONOTONIC_RAW +#else +#define UTCP_CLOCK CLOCK_MONOTONIC #endif - -#ifndef max -#define max(a, b) ((a) > (b) ? (a) : (b)) #endif +static void timespec_sub(const struct timespec *a, const struct timespec *b, struct timespec *r) { + r->tv_sec = a->tv_sec - b->tv_sec; + r->tv_nsec = a->tv_nsec - b->tv_nsec; + + if(r->tv_nsec < 0) { + r->tv_sec--, r->tv_nsec += NSEC_PER_SEC; + } +} + +static int32_t timespec_diff_usec(const struct timespec *a, const struct timespec *b) { + return (a->tv_sec - b->tv_sec) * 1000000 + (a->tv_nsec - b->tv_nsec) / 1000; +} + +static bool timespec_lt(const struct timespec *a, const struct timespec *b) { + if(a->tv_sec == b->tv_sec) { + return a->tv_nsec < b->tv_nsec; + } else { + return a->tv_sec < b->tv_sec; + } +} + +static void timespec_clear(struct timespec *a) { + a->tv_sec = 0; + a->tv_nsec = 0; +} + +static bool timespec_isset(const struct timespec *a) { + return a->tv_sec; +} + +static long CLOCK_GRANULARITY; // usec + +static inline size_t min(size_t a, size_t b) { + return a < b ? a : b; +} + +static inline size_t max(size_t a, size_t b) { + return a > b ? a : b; +} + #ifdef UTCP_DEBUG #include -static void debug(const char *format, ...) { +#ifndef UTCP_DEBUG_DATALEN +#define UTCP_DEBUG_DATALEN 20 +#endif + +static void debug(struct utcp_connection *c, const char *format, ...) { + struct timespec tv; + char buf[1024]; + int len; + + clock_gettime(CLOCK_REALTIME, &tv); + len = snprintf(buf, sizeof(buf), "%ld.%06lu %u:%u ", (long)tv.tv_sec, tv.tv_nsec / 1000, c ? c->src : 0, c ? c->dst : 0); va_list ap; va_start(ap, format); - vfprintf(stderr, format, ap); + len += vsnprintf(buf + len, sizeof(buf) - len, format, ap); va_end(ap); + + if(len > 0 && (size_t)len < sizeof(buf)) { + fwrite(buf, len, 1, stderr); + } } -static void print_packet(struct utcp *utcp, const char *dir, const void *pkt, size_t len) { +static void print_packet(struct utcp_connection *c, const char *dir, const void *pkt, size_t len) { struct hdr hdr; - if(len < sizeof hdr) { - debug("%p %s: short packet (%zu bytes)\n", utcp, dir, len); + + if(len < sizeof(hdr)) { + debug(c, "%s: short packet (%lu bytes)\n", dir, (unsigned long)len); return; } - memcpy(&hdr, pkt, sizeof hdr); - fprintf (stderr, "%p %s: len=%zu, src=%u dst=%u seq=%u ack=%u wnd=%u ctl=", utcp, dir, len, hdr.src, hdr.dst, hdr.seq, hdr.ack, hdr.wnd); - if(hdr.ctl & SYN) - debug("SYN"); - if(hdr.ctl & RST) - debug("RST"); - if(hdr.ctl & FIN) - debug("FIN"); - if(hdr.ctl & ACK) - debug("ACK"); + memcpy(&hdr, pkt, sizeof(hdr)); - if(len > sizeof hdr) { - uint32_t datalen = len - sizeof hdr; - uint8_t *str = malloc((datalen << 1) + 7); - memcpy(str, " data=", 6); - uint8_t *strptr = str + 6; - const uint8_t *data = pkt; - const uint8_t *dataend = data + datalen; + uint32_t datalen; + + if(len > sizeof(hdr)) { + datalen = min(len - sizeof(hdr), UTCP_DEBUG_DATALEN); + } else { + datalen = 0; + } - while(data != dataend) { - *strptr = (*data >> 4) > 9? (*data >> 4) + 55 : (*data >> 4) + 48; - ++strptr; - *strptr = (*data & 0xf) > 9? (*data & 0xf) + 55 : (*data & 0xf) + 48; - ++strptr; - ++data; - } - *strptr = 0; - debug(str); - free(str); + const uint8_t *data = (uint8_t *)pkt + sizeof(hdr); + char str[datalen * 2 + 1]; + char *p = str; + + for(uint32_t i = 0; i < datalen; i++) { + *p++ = "0123456789ABCDEF"[data[i] >> 4]; + *p++ = "0123456789ABCDEF"[data[i] & 15]; } - debug("\n"); + *p = 0; + + debug(c, "%s: len %lu src %u dst %u seq %u ack %u wnd %u aux %x ctl %s%s%s%s%s data %s\n", + dir, (unsigned long)len, hdr.src, hdr.dst, hdr.seq, hdr.ack, hdr.wnd, hdr.aux, + hdr.ctl & SYN ? "SYN" : "", + hdr.ctl & RST ? "RST" : "", + hdr.ctl & FIN ? "FIN" : "", + hdr.ctl & ACK ? "ACK" : "", + hdr.ctl & MF ? "MF" : "", + str + ); +} + +static void debug_cwnd(struct utcp_connection *c) { + debug(c, "snd.cwnd %u snd.ssthresh %u\n", c->snd.cwnd, ~c->snd.ssthresh ? c->snd.ssthresh : 0); } #else -#define debug(...) -#define print_packet(...) +#define debug(...) do {} while(0) +#define print_packet(...) do {} while(0) +#define debug_cwnd(...) do {} while(0) #endif static void set_state(struct utcp_connection *c, enum state state) { c->state = state; - if(state == ESTABLISHED) - timerclear(&c->conn_timeout); - debug("%p new state: %s\n", c->utcp, strstate[state]); + + if(state == ESTABLISHED) { + timespec_clear(&c->conn_timeout); + } + + debug(c, "state %s\n", strstate[state]); } static bool fin_wanted(struct utcp_connection *c, uint32_t seq) { - if(seq != c->snd.last) + if(seq != c->snd.last) { return false; + } + switch(c->state) { case FIN_WAIT_1: case CLOSING: case LAST_ACK: return true; + default: return false; } } -static inline void list_connections(struct utcp *utcp) { - debug("%p has %d connections:\n", utcp, utcp->nconnections); - for(int i = 0; i < utcp->nconnections; i++) - debug(" %u -> %u state %s\n", utcp->connections[i]->src, utcp->connections[i]->dst, strstate[utcp->connections[i]->state]); +static bool is_reliable(struct utcp_connection *c) { + return c->flags & UTCP_RELIABLE; } static int32_t seqdiff(uint32_t a, uint32_t b) { @@ -144,45 +200,91 @@ static int32_t seqdiff(uint32_t a, uint32_t b) { } // Buffer functions -// TODO: convert to ringbuffers to avoid memmove() operations. +static bool buffer_wraps(struct buffer *buf) { + return buf->size - buf->offset < buf->used; +} + +static bool buffer_resize(struct buffer *buf, uint32_t newsize) { + char *newdata = realloc(buf->data, newsize); + + if(!newdata) { + return false; + } + + buf->data = newdata; + + if(buffer_wraps(buf)) { + // Shift the right part of the buffer until it hits the end of the new buffer. + // Old situation: + // [345......012] + // New situation: + // [345.........|........012] + uint32_t tailsize = buf->size - buf->offset; + uint32_t newoffset = newsize - tailsize; + memmove(buf->data + newoffset, buf->data + buf->offset, tailsize); + buf->offset = newoffset; + } + + buf->size = newsize; + return true; +} // Store data into the buffer static ssize_t buffer_put_at(struct buffer *buf, size_t offset, const void *data, size_t len) { - if(buf->maxsize <= buf->used) - return 0; - - debug("buffer_put_at %zu %zu %zu\n", buf->used, offset, len); + debug(NULL, "buffer_put_at %lu %lu %lu\n", (unsigned long)buf->used, (unsigned long)offset, (unsigned long)len); + // Ensure we don't store more than maxsize bytes in total size_t required = offset + len; + if(required > buf->maxsize) { - if(offset >= buf->maxsize) + if(offset >= buf->maxsize) { return 0; - abort(); + } + len = buf->maxsize - offset; required = buf->maxsize; } + // Check if we need to resize the buffer if(required > buf->size) { size_t newsize = buf->size; + if(!newsize) { - newsize = required; - } else { - do { - newsize *= 2; - } while(newsize < buf->used + len); + newsize = 4096; } - if(newsize > buf->maxsize) + + do { + newsize *= 2; + } while(newsize < required); + + if(newsize > buf->maxsize) { newsize = buf->maxsize; - char *newdata = realloc(buf->data, newsize); - if(!newdata) + } + + if(!buffer_resize(buf, newsize)) { return -1; - buf->data = newdata; - buf->size = newsize; + } + } + + uint32_t realoffset = buf->offset + offset; + + if(buf->size - buf->offset <= offset) { + // The offset wrapped + realoffset -= buf->size; } - memcpy(buf->data + offset, data, len); - if(required > buf->used) + if(buf->size - realoffset < len) { + // The new chunk of data must be wrapped + memcpy(buf->data + realoffset, data, buf->size - realoffset); + memcpy(buf->data, (char *)data + buf->size - realoffset, len - (buf->size - realoffset)); + } else { + memcpy(buf->data + realoffset, data, len); + } + + if(required > buf->used) { buf->used = required; + } + return len; } @@ -190,47 +292,125 @@ static ssize_t buffer_put(struct buffer *buf, const void *data, size_t len) { return buffer_put_at(buf, buf->used, data, len); } -// Get data from the buffer. data can be NULL. -static ssize_t buffer_get(struct buffer *buf, void *data, size_t len) { - if(len > buf->used) - len = buf->used; - if(data) - memcpy(data, buf->data, len); - if(len < buf->used) - memmove(buf->data, buf->data + len, buf->used - len); - buf->used -= len; +// Copy data from the buffer without removing it. +static ssize_t buffer_copy(struct buffer *buf, void *data, size_t offset, size_t len) { + // Ensure we don't copy more than is actually stored in the buffer + if(offset >= buf->used) { + return 0; + } + + if(buf->used - offset < len) { + len = buf->used - offset; + } + + uint32_t realoffset = buf->offset + offset; + + if(buf->size - buf->offset <= offset) { + // The offset wrapped + realoffset -= buf->size; + } + + if(buf->size - realoffset < len) { + // The data is wrapped + memcpy(data, buf->data + realoffset, buf->size - realoffset); + memcpy((char *)data + buf->size - realoffset, buf->data, len - (buf->size - realoffset)); + } else { + memcpy(data, buf->data + realoffset, len); + } + return len; } // Copy data from the buffer without removing it. -static ssize_t buffer_copy(struct buffer *buf, void *data, size_t offset, size_t len) { - if(offset >= buf->used) +static ssize_t buffer_call(struct utcp_connection *c, struct buffer *buf, size_t offset, size_t len) { + if(!c->recv) { + return len; + } + + // Ensure we don't copy more than is actually stored in the buffer + if(offset >= buf->used) { return 0; - if(offset + len > buf->used) + } + + if(buf->used - offset < len) { len = buf->used - offset; - memcpy(data, buf->data + offset, len); + } + + uint32_t realoffset = buf->offset + offset; + + if(buf->size - buf->offset <= offset) { + // The offset wrapped + realoffset -= buf->size; + } + + if(buf->size - realoffset < len) { + // The data is wrapped + ssize_t rx1 = c->recv(c, buf->data + realoffset, buf->size - realoffset); + + if(rx1 < buf->size - realoffset) { + return rx1; + } + + // The channel might have been closed by the previous callback + if(!c->recv) { + return len; + } + + ssize_t rx2 = c->recv(c, buf->data, len - (buf->size - realoffset)); + + if(rx2 < 0) { + return rx2; + } else { + return rx1 + rx2; + } + } else { + return c->recv(c, buf->data + realoffset, len); + } +} + +// Discard data from the buffer. +static ssize_t buffer_discard(struct buffer *buf, size_t len) { + if(buf->used < len) { + len = buf->used; + } + + if(buf->size - buf->offset <= len) { + buf->offset -= buf->size; + } + + if(buf->used == len) { + buf->offset = 0; + } else { + buf->offset += len; + } + + buf->used -= len; + return len; } -static bool buffer_init(struct buffer *buf, uint32_t len, uint32_t maxlen) { - memset(buf, 0, sizeof *buf); - if(len) { - buf->data = malloc(len); - if(!buf->data) - return false; +static void buffer_clear(struct buffer *buf) { + buf->used = 0; + buf->offset = 0; +} + +static bool buffer_set_size(struct buffer *buf, uint32_t minsize, uint32_t maxsize) { + if(maxsize < minsize) { + maxsize = minsize; } - buf->size = len; - buf->maxsize = maxlen; - return true; + + buf->maxsize = maxsize; + + return buf->size >= minsize || buffer_resize(buf, minsize); } static void buffer_exit(struct buffer *buf) { free(buf->data); - memset(buf, 0, sizeof *buf); + memset(buf, 0, sizeof(*buf)); } static uint32_t buffer_free(const struct buffer *buf) { - return buf->maxsize - buf->used; + return buf->maxsize > buf->used ? buf->maxsize - buf->used : 0; } // Connections are stored in a sorted list. @@ -246,31 +426,36 @@ static int compare(const void *va, const void *vb) { assert(a->src && b->src); int c = (int)a->src - (int)b->src; - if(c) + + if(c) { return c; + } + c = (int)a->dst - (int)b->dst; return c; } static struct utcp_connection *find_connection(const struct utcp *utcp, uint16_t src, uint16_t dst) { - if(!utcp->nconnections) + if(!utcp->nconnections) { return NULL; + } + struct utcp_connection key = { .src = src, .dst = dst, }, *keyp = &key; - struct utcp_connection **match = bsearch(&keyp, utcp->connections, utcp->nconnections, sizeof *utcp->connections, compare); + struct utcp_connection **match = bsearch(&keyp, utcp->connections, utcp->nconnections, sizeof(*utcp->connections), compare); return match ? *match : NULL; } static void free_connection(struct utcp_connection *c) { struct utcp *utcp = c->utcp; - struct utcp_connection **cp = bsearch(&c, utcp->connections, utcp->nconnections, sizeof *utcp->connections, compare); + struct utcp_connection **cp = bsearch(&c, utcp->connections, utcp->nconnections, sizeof(*utcp->connections), compare); assert(cp); int i = cp - utcp->connections; - memmove(cp, cp + 1, (utcp->nconnections - i - 1) * sizeof *cp); + memmove(cp, cp + 1, (utcp->nconnections - i - 1) * sizeof(*cp)); utcp->nconnections--; buffer_exit(&c->rcvbuf); @@ -291,34 +476,44 @@ static struct utcp_connection *allocate_connection(struct utcp *utcp, uint16_t s errno = ENOMEM; return NULL; } + src = rand() | 0x8000; - while(find_connection(utcp, src, dst)) + + while(find_connection(utcp, src, dst)) { src++; + } } // Allocate memory for the new connection if(utcp->nconnections >= utcp->nallocated) { - if(!utcp->nallocated) + if(!utcp->nallocated) { utcp->nallocated = 4; - else + } else { utcp->nallocated *= 2; - struct utcp_connection **new_array = realloc(utcp->connections, utcp->nallocated * sizeof *utcp->connections); - if(!new_array) + } + + struct utcp_connection **new_array = realloc(utcp->connections, utcp->nallocated * sizeof(*utcp->connections)); + + if(!new_array) { return NULL; + } + utcp->connections = new_array; } - struct utcp_connection *c = calloc(1, sizeof *c); - if(!c) + struct utcp_connection *c = calloc(1, sizeof(*c)); + + if(!c) { return NULL; + } - if(!buffer_init(&c->sndbuf, DEFAULT_SNDBUFSIZE, DEFAULT_MAXSNDBUFSIZE)) { + if(!buffer_set_size(&c->sndbuf, DEFAULT_SNDBUFSIZE, DEFAULT_MAXSNDBUFSIZE)) { free(c); return NULL; } - if(!buffer_init(&c->rcvbuf, DEFAULT_RCVBUFSIZE, DEFAULT_MAXRCVBUFSIZE)) { + if(!buffer_set_size(&c->rcvbuf, DEFAULT_RCVBUFSIZE, DEFAULT_MAXRCVBUFSIZE)) { buffer_exit(&c->sndbuf); free(c); return NULL; @@ -335,95 +530,134 @@ static struct utcp_connection *allocate_connection(struct utcp *utcp, uint16_t s #endif c->snd.una = c->snd.iss; c->snd.nxt = c->snd.iss + 1; - c->rcv.wnd = utcp->mtu; c->snd.last = c->snd.nxt; - c->snd.cwnd = utcp->mtu; + c->snd.cwnd = (utcp->mss > 2190 ? 2 : utcp->mss > 1095 ? 3 : 4) * utcp->mss; + c->snd.ssthresh = ~0; + debug_cwnd(c); + c->srtt = 0; + c->rttvar = 0; + c->rto = START_RTO; c->utcp = utcp; // Add it to the sorted list of connections utcp->connections[utcp->nconnections++] = c; - qsort(utcp->connections, utcp->nconnections, sizeof *utcp->connections, compare); + qsort(utcp->connections, utcp->nconnections, sizeof(*utcp->connections), compare); return c; } +static inline uint32_t absdiff(uint32_t a, uint32_t b) { + if(a > b) { + return a - b; + } else { + return b - a; + } +} + // Update RTT variables. See RFC 6298. static void update_rtt(struct utcp_connection *c, uint32_t rtt) { if(!rtt) { - debug("invalid rtt\n"); + debug(c, "invalid rtt\n"); return; } - struct utcp *utcp = c->utcp; - - if(!utcp->srtt) { - utcp->srtt = rtt; - utcp->rttvar = rtt / 2; - utcp->rto = rtt + max(2 * rtt, CLOCK_GRANULARITY); + if(!c->srtt) { + c->srtt = rtt; + c->rttvar = rtt / 2; } else { - utcp->rttvar = (utcp->rttvar * 3 + abs(utcp->srtt - rtt)) / 4; - utcp->srtt = (utcp->srtt * 7 + rtt) / 8; - utcp->rto = utcp->srtt + max(utcp->rttvar, CLOCK_GRANULARITY); + c->rttvar = (c->rttvar * 3 + absdiff(c->srtt, rtt)) / 4; + c->srtt = (c->srtt * 7 + rtt) / 8; } - if(utcp->rto > MAX_RTO) - utcp->rto = MAX_RTO; + c->rto = c->srtt + max(4 * c->rttvar, CLOCK_GRANULARITY); - debug("rtt %u srtt %u rttvar %u rto %u\n", rtt, utcp->srtt, utcp->rttvar, utcp->rto); + if(c->rto > MAX_RTO) { + c->rto = MAX_RTO; + } + + debug(c, "rtt %u srtt %u rttvar %u rto %u\n", rtt, c->srtt, c->rttvar, c->rto); } static void start_retransmit_timer(struct utcp_connection *c) { - gettimeofday(&c->rtrx_timeout, NULL); - c->rtrx_timeout.tv_usec += c->utcp->rto; - while(c->rtrx_timeout.tv_usec >= 1000000) { - c->rtrx_timeout.tv_usec -= 1000000; + clock_gettime(UTCP_CLOCK, &c->rtrx_timeout); + + uint32_t rto = c->rto; + + while(rto > USEC_PER_SEC) { + c->rtrx_timeout.tv_sec++; + rto -= USEC_PER_SEC; + } + + c->rtrx_timeout.tv_nsec += rto * 1000; + + if(c->rtrx_timeout.tv_nsec >= NSEC_PER_SEC) { + c->rtrx_timeout.tv_nsec -= NSEC_PER_SEC; c->rtrx_timeout.tv_sec++; } - debug("timeout set to %lu.%06lu (%u)\n", c->rtrx_timeout.tv_sec, c->rtrx_timeout.tv_usec, c->utcp->rto); + + debug(c, "rtrx_timeout %ld.%06lu\n", c->rtrx_timeout.tv_sec, c->rtrx_timeout.tv_nsec); } static void stop_retransmit_timer(struct utcp_connection *c) { - timerclear(&c->rtrx_timeout); - debug("timeout cleared\n"); + timespec_clear(&c->rtrx_timeout); + debug(c, "rtrx_timeout cleared\n"); } -struct utcp_connection *utcp_connect(struct utcp *utcp, uint16_t dst, utcp_recv_t recv, void *priv) { +struct utcp_connection *utcp_connect_ex(struct utcp *utcp, uint16_t dst, utcp_recv_t recv, void *priv, uint32_t flags) { struct utcp_connection *c = allocate_connection(utcp, 0, dst); - if(!c) + + if(!c) { return NULL; + } + assert((flags & ~0x1f) == 0); + + c->flags = flags; c->recv = recv; c->priv = priv; - struct hdr hdr; - - hdr.src = c->src; - hdr.dst = c->dst; - hdr.seq = c->snd.iss; - hdr.ack = 0; - hdr.wnd = c->rcv.wnd; - hdr.ctl = SYN; - hdr.aux = 0; + struct { + struct hdr hdr; + uint8_t init[4]; + } pkt; + + pkt.hdr.src = c->src; + pkt.hdr.dst = c->dst; + pkt.hdr.seq = c->snd.iss; + pkt.hdr.ack = 0; + pkt.hdr.wnd = c->rcvbuf.maxsize; + pkt.hdr.ctl = SYN; + pkt.hdr.aux = 0x0101; + pkt.init[0] = 1; + pkt.init[1] = 0; + pkt.init[2] = 0; + pkt.init[3] = flags & 0x7; set_state(c, SYN_SENT); - print_packet(utcp, "send", &hdr, sizeof hdr); - utcp->send(utcp, &hdr, sizeof hdr); + print_packet(c, "send", &pkt, sizeof(pkt)); + utcp->send(utcp, &pkt, sizeof(pkt)); - gettimeofday(&c->conn_timeout, NULL); + clock_gettime(UTCP_CLOCK, &c->conn_timeout); c->conn_timeout.tv_sec += utcp->timeout; + start_retransmit_timer(c); + return c; } +struct utcp_connection *utcp_connect(struct utcp *utcp, uint16_t dst, utcp_recv_t recv, void *priv) { + return utcp_connect_ex(utcp, dst, recv, priv, UTCP_TCP); +} + void utcp_accept(struct utcp_connection *c, utcp_recv_t recv, void *priv) { if(c->reapable || c->state != SYN_RECEIVED) { - debug("Error: accept() called on invalid connection %p in state %s\n", c, strstate[c->state]); + debug(c, "accept() called on invalid connection in state %s\n", c, strstate[c->state]); return; } - debug("%p accepted, %p %p\n", c, recv, priv); + debug(c, "accepted %p %p\n", c, recv, priv); c->recv = recv; c->priv = priv; set_state(c, ESTABLISHED); @@ -431,38 +665,40 @@ void utcp_accept(struct utcp_connection *c, utcp_recv_t recv, void *priv) { static void ack(struct utcp_connection *c, bool sendatleastone) { int32_t left = seqdiff(c->snd.last, c->snd.nxt); - int32_t cwndleft = c->snd.cwnd - seqdiff(c->snd.nxt, c->snd.una); - debug("cwndleft = %d\n", cwndleft); + int32_t cwndleft = is_reliable(c) ? min(c->snd.cwnd, c->snd.wnd) - seqdiff(c->snd.nxt, c->snd.una) : MAX_UNRELIABLE_SIZE; assert(left >= 0); - if(cwndleft <= 0) - cwndleft = 0; - - if(cwndleft < left) + if(cwndleft <= 0) { + left = 0; + } else if(cwndleft < left) { left = cwndleft; - if(!left && !sendatleastone) + if(!sendatleastone || cwndleft > c->utcp->mss) { + left -= left % c->utcp->mss; + } + } + + debug(c, "cwndleft %d left %d\n", cwndleft, left); + + if(!left && !sendatleastone) { return; + } struct { struct hdr hdr; - char data[]; - } *pkt; - - pkt = malloc(sizeof pkt->hdr + c->utcp->mtu); - if(!pkt) - return; + uint8_t data[]; + } *pkt = c->utcp->pkt; pkt->hdr.src = c->src; pkt->hdr.dst = c->dst; pkt->hdr.ack = c->rcv.nxt; - pkt->hdr.wnd = c->snd.wnd; + pkt->hdr.wnd = is_reliable(c) ? c->rcvbuf.maxsize : 0; pkt->hdr.ctl = ACK; pkt->hdr.aux = 0; do { - uint32_t seglen = left > c->utcp->mtu ? c->utcp->mtu : left; + uint32_t seglen = left > c->utcp->mss ? c->utcp->mss : left; pkt->hdr.seq = c->snd.nxt; buffer_copy(&c->sndbuf, pkt->data, seqdiff(c->snd.nxt, c->snd.una), seglen); @@ -470,6 +706,14 @@ static void ack(struct utcp_connection *c, bool sendatleastone) { c->snd.nxt += seglen; left -= seglen; + if(!is_reliable(c)) { + if(left) { + pkt->hdr.ctl |= MF; + } else { + pkt->hdr.ctl &= ~MF; + } + } + if(seglen && fin_wanted(c, c->snd.nxt)) { seglen--; pkt->hdr.ctl |= FIN; @@ -477,21 +721,23 @@ static void ack(struct utcp_connection *c, bool sendatleastone) { if(!c->rtt_start.tv_sec) { // Start RTT measurement - gettimeofday(&c->rtt_start, NULL); + clock_gettime(UTCP_CLOCK, &c->rtt_start); c->rtt_seq = pkt->hdr.seq + seglen; - debug("Starting RTT measurement, expecting ack %u\n", c->rtt_seq); + debug(c, "starting RTT measurement, expecting ack %u\n", c->rtt_seq); } - print_packet(c->utcp, "send", pkt, sizeof pkt->hdr + seglen); - c->utcp->send(c->utcp, pkt, sizeof pkt->hdr + seglen); - } while(left); + print_packet(c, "send", pkt, sizeof(pkt->hdr) + seglen); + c->utcp->send(c->utcp, pkt, sizeof(pkt->hdr) + seglen); - free(pkt); + if(left && !is_reliable(c)) { + pkt->hdr.wnd += seglen; + } + } while(left); } ssize_t utcp_send(struct utcp_connection *c, const void *data, size_t len) { if(c->reapable) { - debug("Error: send() called on closed connection %p\n", c); + debug(c, "send() called on closed connection\n"); errno = EBADF; return -1; } @@ -499,44 +745,97 @@ ssize_t utcp_send(struct utcp_connection *c, const void *data, size_t len) { switch(c->state) { case CLOSED: case LISTEN: - case SYN_SENT: - case SYN_RECEIVED: - debug("Error: send() called on unconnected connection %p\n", c); + debug(c, "send() called on unconnected connection\n"); errno = ENOTCONN; return -1; + + case SYN_SENT: + case SYN_RECEIVED: case ESTABLISHED: case CLOSE_WAIT: break; + case FIN_WAIT_1: case FIN_WAIT_2: case CLOSING: case LAST_ACK: case TIME_WAIT: - debug("Error: send() called on closing connection %p\n", c); + debug(c, "send() called on closed connection\n"); errno = EPIPE; return -1; } - // Add data to send buffer + // Exit early if we have nothing to send. - if(!len) + if(!len) { return 0; + } if(!data) { errno = EFAULT; return -1; } - len = buffer_put(&c->sndbuf, data, len); - if(len <= 0) { - errno = EWOULDBLOCK; + // Check if we need to be able to buffer all data + + if(c->flags & UTCP_NO_PARTIAL) { + if(len > buffer_free(&c->sndbuf)) { + if(len > c->sndbuf.maxsize) { + errno = EMSGSIZE; + return -1; + } else { + errno = EWOULDBLOCK; + return 0; + } + } + } + + // Add data to send buffer. + + if(is_reliable(c)) { + len = buffer_put(&c->sndbuf, data, len); + } else if(c->state != SYN_SENT && c->state != SYN_RECEIVED) { + if(len > MAX_UNRELIABLE_SIZE || buffer_put(&c->sndbuf, data, len) != (ssize_t)len) { + errno = EMSGSIZE; + return -1; + } + } else { return 0; } + if(len <= 0) { + if(is_reliable(c)) { + errno = EWOULDBLOCK; + return 0; + } else { + return len; + } + } + c->snd.last += len; + + // Don't send anything yet if the connection has not fully established yet + + if(c->state == SYN_SENT || c->state == SYN_RECEIVED) { + return len; + } + ack(c, false); - if(!timerisset(&c->rtrx_timeout)) + + if(!is_reliable(c)) { + c->snd.una = c->snd.nxt = c->snd.last; + buffer_discard(&c->sndbuf, c->sndbuf.used); + } + + if(is_reliable(c) && !timespec_isset(&c->rtrx_timeout)) { start_retransmit_timer(c); + } + + if(is_reliable(c) && !timespec_isset(&c->conn_timeout)) { + clock_gettime(UTCP_CLOCK, &c->conn_timeout); + c->conn_timeout.tv_sec += c->utcp->timeout; + } + return len; } @@ -546,10 +845,9 @@ static void swap_ports(struct hdr *hdr) { hdr->dst = tmp; } -static void retransmit(struct utcp_connection *c) { +static void fast_retransmit(struct utcp_connection *c) { if(c->state == CLOSED || c->snd.last == c->snd.una) { - debug("Retransmit() called but nothing to retransmit!\n"); - stop_retransmit_timer(c); + debug(c, "fast_retransmit() called but nothing to retransmit!\n"); return; } @@ -557,102 +855,184 @@ static void retransmit(struct utcp_connection *c) { struct { struct hdr hdr; - char data[]; - } *pkt; + uint8_t data[]; + } *pkt = c->utcp->pkt; + + pkt->hdr.src = c->src; + pkt->hdr.dst = c->dst; + pkt->hdr.wnd = c->rcvbuf.maxsize; + pkt->hdr.aux = 0; + + switch(c->state) { + case ESTABLISHED: + case FIN_WAIT_1: + case CLOSE_WAIT: + case CLOSING: + case LAST_ACK: + // Send unacked data again. + pkt->hdr.seq = c->snd.una; + pkt->hdr.ack = c->rcv.nxt; + pkt->hdr.ctl = ACK; + uint32_t len = min(seqdiff(c->snd.last, c->snd.una), utcp->mss); + + if(fin_wanted(c, c->snd.una + len)) { + len--; + pkt->hdr.ctl |= FIN; + } - pkt = malloc(sizeof pkt->hdr + c->utcp->mtu); - if(!pkt) + buffer_copy(&c->sndbuf, pkt->data, 0, len); + print_packet(c, "rtrx", pkt, sizeof(pkt->hdr) + len); + utcp->send(utcp, pkt, sizeof(pkt->hdr) + len); + break; + + default: + break; + } +} + +static void retransmit(struct utcp_connection *c) { + if(c->state == CLOSED || c->snd.last == c->snd.una) { + debug(c, "retransmit() called but nothing to retransmit!\n"); + stop_retransmit_timer(c); return; + } + + struct utcp *utcp = c->utcp; + + if(utcp->retransmit) { + utcp->retransmit(c); + } + + struct { + struct hdr hdr; + uint8_t data[]; + } *pkt = c->utcp->pkt; pkt->hdr.src = c->src; pkt->hdr.dst = c->dst; - pkt->hdr.wnd = c->rcv.wnd; + pkt->hdr.wnd = c->rcvbuf.maxsize; pkt->hdr.aux = 0; switch(c->state) { - case SYN_SENT: - // Send our SYN again - pkt->hdr.seq = c->snd.iss; - pkt->hdr.ack = 0; - pkt->hdr.ctl = SYN; - print_packet(c->utcp, "rtrx", pkt, sizeof pkt->hdr); - utcp->send(utcp, pkt, sizeof pkt->hdr); - break; + case SYN_SENT: + // Send our SYN again + pkt->hdr.seq = c->snd.iss; + pkt->hdr.ack = 0; + pkt->hdr.ctl = SYN; + pkt->hdr.aux = 0x0101; + pkt->data[0] = 1; + pkt->data[1] = 0; + pkt->data[2] = 0; + pkt->data[3] = c->flags & 0x7; + print_packet(c, "rtrx", pkt, sizeof(pkt->hdr) + 4); + utcp->send(utcp, pkt, sizeof(pkt->hdr) + 4); + break; - case SYN_RECEIVED: - // Send SYNACK again - pkt->hdr.seq = c->snd.nxt; - pkt->hdr.ack = c->rcv.nxt; - pkt->hdr.ctl = SYN | ACK; - print_packet(c->utcp, "rtrx", pkt, sizeof pkt->hdr); - utcp->send(utcp, pkt, sizeof pkt->hdr); - break; + case SYN_RECEIVED: + // Send SYNACK again + pkt->hdr.seq = c->snd.nxt; + pkt->hdr.ack = c->rcv.nxt; + pkt->hdr.ctl = SYN | ACK; + print_packet(c, "rtrx", pkt, sizeof(pkt->hdr)); + utcp->send(utcp, pkt, sizeof(pkt->hdr)); + break; - case ESTABLISHED: - case FIN_WAIT_1: - case CLOSE_WAIT: - case CLOSING: - case LAST_ACK: - // Send unacked data again. - pkt->hdr.seq = c->snd.una; - pkt->hdr.ack = c->rcv.nxt; - pkt->hdr.ctl = ACK; - uint32_t len = seqdiff(c->snd.last, c->snd.una); - if(len > utcp->mtu) - len = utcp->mtu; - if(fin_wanted(c, c->snd.una + len)) { - len--; - pkt->hdr.ctl |= FIN; - } - c->snd.nxt = c->snd.una + len; - c->snd.cwnd = utcp->mtu; // reduce cwnd on retransmit - buffer_copy(&c->sndbuf, pkt->data, 0, len); - print_packet(c->utcp, "rtrx", pkt, sizeof pkt->hdr + len); - utcp->send(utcp, pkt, sizeof pkt->hdr + len); - break; + case ESTABLISHED: + case FIN_WAIT_1: + case CLOSE_WAIT: + case CLOSING: + case LAST_ACK: + // Send unacked data again. + pkt->hdr.seq = c->snd.una; + pkt->hdr.ack = c->rcv.nxt; + pkt->hdr.ctl = ACK; + uint32_t len = min(seqdiff(c->snd.last, c->snd.una), utcp->mss); + + if(fin_wanted(c, c->snd.una + len)) { + len--; + pkt->hdr.ctl |= FIN; + } - case CLOSED: - case LISTEN: - case TIME_WAIT: - case FIN_WAIT_2: - // We shouldn't need to retransmit anything in this state. + // RFC 5681 slow start after timeout + uint32_t flightsize = seqdiff(c->snd.nxt, c->snd.una); + c->snd.ssthresh = max(flightsize / 2, utcp->mss * 2); // eq. 4 + c->snd.cwnd = utcp->mss; + debug_cwnd(c); + + buffer_copy(&c->sndbuf, pkt->data, 0, len); + print_packet(c, "rtrx", pkt, sizeof(pkt->hdr) + len); + utcp->send(utcp, pkt, sizeof(pkt->hdr) + len); + + c->snd.nxt = c->snd.una + len; + break; + + case CLOSED: + case LISTEN: + case TIME_WAIT: + case FIN_WAIT_2: + // We shouldn't need to retransmit anything in this state. #ifdef UTCP_DEBUG - abort(); + abort(); #endif - stop_retransmit_timer(c); - goto cleanup; + stop_retransmit_timer(c); + goto cleanup; } start_retransmit_timer(c); - utcp->rto *= 2; - if(utcp->rto > MAX_RTO) - utcp->rto = MAX_RTO; + c->rto *= 2; + + if(c->rto > MAX_RTO) { + c->rto = MAX_RTO; + } + c->rtt_start.tv_sec = 0; // invalidate RTT timer + c->dupack = 0; // cancel any ongoing fast recovery cleanup: - free(pkt); + return; } -// Update receive buffer and SACK entries after consuming data. +/* Update receive buffer and SACK entries after consuming data. + * + * Situation: + * + * |.....0000..1111111111.....22222......3333| + * |---------------^ + * + * 0..3 represent the SACK entries. The ^ indicates up to which point we want + * to remove data from the receive buffer. The idea is to substract "len" + * from the offset of all the SACK entries, and then remove/cut down entries + * that are shifted to before the start of the receive buffer. + * + * There are three cases: + * - the SACK entry is after ^, in that case just change the offset. + * - the SACK entry starts before and ends after ^, so we have to + * change both its offset and size. + * - the SACK entry is completely before ^, in that case delete it. + */ static void sack_consume(struct utcp_connection *c, size_t len) { - debug("sack_consume %zu\n", len); - if(len > c->rcvbuf.used) - abort(); + debug(c, "sack_consume %lu\n", (unsigned long)len); - buffer_get(&c->rcvbuf, NULL, len); + if(len > c->rcvbuf.used) { + debug(c, "all SACK entries consumed\n"); + c->sacks[0].len = 0; + return; + } + + buffer_discard(&c->rcvbuf, len); - for(int i = 0; i < NSACKS && c->sacks[i].len; ) { + for(int i = 0; i < NSACKS && c->sacks[i].len;) { if(len < c->sacks[i].offset) { c->sacks[i].offset -= len; i++; } else if(len < c->sacks[i].offset + c->sacks[i].len) { - c->sacks[i].offset = 0; c->sacks[i].len -= len - c->sacks[i].offset; + c->sacks[i].offset = 0; i++; } else { if(i < NSACKS - 1) { - memmove(&c->sacks[i], &c->sacks[i + 1], (NSACKS - 1 - i) * sizeof c->sacks[i]); - c->sacks[i + 1].len = 0; + memmove(&c->sacks[i], &c->sacks[i + 1], (NSACKS - 1 - i) * sizeof(c->sacks)[i]); + c->sacks[NSACKS - 1].len = 0; } else { c->sacks[i].len = 0; break; @@ -660,136 +1040,263 @@ static void sack_consume(struct utcp_connection *c, size_t len) { } } - for(int i = 0; i < NSACKS && c->sacks[i].len; i++) - debug("SACK[%d] offset %u len %u\n", i, c->sacks[i].offset, c->sacks[i].len); + for(int i = 0; i < NSACKS && c->sacks[i].len; i++) { + debug(c, "SACK[%d] offset %u len %u\n", i, c->sacks[i].offset, c->sacks[i].len); + } } static void handle_out_of_order(struct utcp_connection *c, uint32_t offset, const void *data, size_t len) { - debug("out of order packet, offset %u\n", offset); + debug(c, "out of order packet, offset %u\n", offset); // Packet loss or reordering occured. Store the data in the buffer. ssize_t rxd = buffer_put_at(&c->rcvbuf, offset, data, len); - if(rxd < len) - abort(); + + if(rxd <= 0) { + debug(c, "packet outside receive buffer, dropping\n"); + return; + } + + if((size_t)rxd < len) { + debug(c, "packet partially outside receive buffer\n"); + len = rxd; + } // Make note of where we put it. for(int i = 0; i < NSACKS; i++) { if(!c->sacks[i].len) { // nothing to merge, add new entry - debug("New SACK entry %d\n", i); + debug(c, "new SACK entry %d\n", i); c->sacks[i].offset = offset; c->sacks[i].len = rxd; break; } else if(offset < c->sacks[i].offset) { if(offset + rxd < c->sacks[i].offset) { // insert before if(!c->sacks[NSACKS - 1].len) { // only if room left - debug("Insert SACK entry at %d\n", i); - memmove(&c->sacks[i + 1], &c->sacks[i], (NSACKS - i - 1) * sizeof c->sacks[i]); + debug(c, "insert SACK entry at %d\n", i); + memmove(&c->sacks[i + 1], &c->sacks[i], (NSACKS - i - 1) * sizeof(c->sacks)[i]); c->sacks[i].offset = offset; c->sacks[i].len = rxd; + } else { + debug(c, "SACK entries full, dropping packet\n"); } + break; } else { // merge - debug("Merge with start of SACK entry at %d\n", i); + debug(c, "merge with start of SACK entry at %d\n", i); c->sacks[i].offset = offset; break; } } else if(offset <= c->sacks[i].offset + c->sacks[i].len) { if(offset + rxd > c->sacks[i].offset + c->sacks[i].len) { // merge - debug("Merge with end of SACK entry at %d\n", i); + debug(c, "merge with end of SACK entry at %d\n", i); c->sacks[i].len = offset + rxd - c->sacks[i].offset; // TODO: handle potential merge with next entry } + break; } } - for(int i = 0; i < NSACKS && c->sacks[i].len; i++) - debug("SACK[%d] offset %u len %u\n", i, c->sacks[i].offset, c->sacks[i].len); + for(int i = 0; i < NSACKS && c->sacks[i].len; i++) { + debug(c, "SACK[%d] offset %u len %u\n", i, c->sacks[i].offset, c->sacks[i].len); + } } static void handle_in_order(struct utcp_connection *c, const void *data, size_t len) { - // Check if we can process out-of-order data now. - if(c->sacks[0].len && len >= c->sacks[0].offset) { // TODO: handle overlap with second SACK - debug("incoming packet len %zu connected with SACK at %u\n", len, c->sacks[0].offset); - buffer_put_at(&c->rcvbuf, 0, data, len); // TODO: handle return value - len = max(len, c->sacks[0].offset + c->sacks[0].len); - data = c->rcvbuf.data; - } - if(c->recv) { ssize_t rxd = c->recv(c, data, len); - if(rxd != len) { + + if(rxd != (ssize_t)len) { // TODO: handle the application not accepting all data. abort(); } } - if(c->rcvbuf.used) + // Check if we can process out-of-order data now. + if(c->sacks[0].len && len >= c->sacks[0].offset) { + debug(c, "incoming packet len %lu connected with SACK at %u\n", (unsigned long)len, c->sacks[0].offset); + + if(len < c->sacks[0].offset + c->sacks[0].len) { + size_t offset = len; + len = c->sacks[0].offset + c->sacks[0].len; + size_t remainder = len - offset; + + ssize_t rxd = buffer_call(c, &c->rcvbuf, offset, remainder); + + if(rxd != (ssize_t)remainder) { + // TODO: handle the application not accepting all data. + abort(); + } + } + } + + if(c->rcvbuf.used) { sack_consume(c, len); + } c->rcv.nxt += len; } +static void handle_unreliable(struct utcp_connection *c, const struct hdr *hdr, const void *data, size_t len) { + // Fast path for unfragmented packets + if(!hdr->wnd && !(hdr->ctl & MF)) { + if(c->recv) { + c->recv(c, data, len); + } -static void handle_incoming_data(struct utcp_connection *c, uint32_t seq, const void *data, size_t len) { - uint32_t offset = seqdiff(seq, c->rcv.nxt); - if(offset + len > c->rcvbuf.maxsize) - abort(); + c->rcv.nxt = hdr->seq + len; + return; + } + + // Ensure reassembled packet are not larger than 64 kiB + if(hdr->wnd >= MAX_UNRELIABLE_SIZE || hdr->wnd + len > MAX_UNRELIABLE_SIZE) { + return; + } + + // Don't accept out of order fragments + if(hdr->wnd && hdr->seq != c->rcv.nxt) { + return; + } + + // Reset the receive buffer for the first fragment + if(!hdr->wnd) { + buffer_clear(&c->rcvbuf); + } + + ssize_t rxd = buffer_put_at(&c->rcvbuf, hdr->wnd, data, len); + + if(rxd != (ssize_t)len) { + return; + } + + // Send the packet if it's the final fragment + if(!(hdr->ctl & MF)) { + buffer_call(c, &c->rcvbuf, 0, hdr->wnd + len); + } + + c->rcv.nxt = hdr->seq + len; +} + +static void handle_incoming_data(struct utcp_connection *c, const struct hdr *hdr, const void *data, size_t len) { + if(!is_reliable(c)) { + handle_unreliable(c, hdr, data, len); + return; + } - if(offset) + uint32_t offset = seqdiff(hdr->seq, c->rcv.nxt); + + if(offset) { handle_out_of_order(c, offset, data, len); - else + } else { handle_in_order(c, data, len); + } } ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { + const uint8_t *ptr = data; + if(!utcp) { errno = EFAULT; return -1; } - if(!len) + if(!len) { return 0; + } if(!data) { errno = EFAULT; return -1; } - print_packet(utcp, "recv", data, len); - // Drop packets smaller than the header struct hdr hdr; - if(len < sizeof hdr) { + + if(len < sizeof(hdr)) { + print_packet(NULL, "recv", data, len); errno = EBADMSG; return -1; } // Make a copy from the potentially unaligned data to a struct hdr - memcpy(&hdr, data, sizeof hdr); - data += sizeof hdr; - len -= sizeof hdr; + memcpy(&hdr, ptr, sizeof(hdr)); + + // Try to match the packet to an existing connection + + struct utcp_connection *c = find_connection(utcp, hdr.dst, hdr.src); + print_packet(c, "recv", data, len); + + // Process the header + + ptr += sizeof(hdr); + len -= sizeof(hdr); // Drop packets with an unknown CTL flag - if(hdr.ctl & ~(SYN | ACK | RST | FIN)) { + if(hdr.ctl & ~(SYN | ACK | RST | FIN | MF)) { + print_packet(NULL, "recv", data, len); errno = EBADMSG; return -1; } - // Try to match the packet to an existing connection + // Check for auxiliary headers - struct utcp_connection *c = find_connection(utcp, hdr.dst, hdr.src); + const uint8_t *init = NULL; + + uint16_t aux = hdr.aux; + + while(aux) { + size_t auxlen = 4 * (aux >> 8) & 0xf; + uint8_t auxtype = aux & 0xff; + + if(len < auxlen) { + errno = EBADMSG; + return -1; + } + + switch(auxtype) { + case AUX_INIT: + if(!(hdr.ctl & SYN) || auxlen != 4) { + errno = EBADMSG; + return -1; + } + + init = ptr; + break; + + default: + errno = EBADMSG; + return -1; + } + + len -= auxlen; + ptr += auxlen; + + if(!(aux & 0x800)) { + break; + } + + if(len < 2) { + errno = EBADMSG; + return -1; + } + + memcpy(&aux, ptr, 2); + len -= 2; + ptr += 2; + } + + bool has_data = len || (hdr.ctl & (SYN | FIN)); // Is it for a new connection? if(!c) { // Ignore RST packets - if(hdr.ctl & RST) + if(hdr.ctl & RST) { return 0; + } // Is it a SYN packet and are we LISTENing? @@ -802,24 +1309,58 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { // Try to allocate memory, otherwise send a RST back c = allocate_connection(utcp, hdr.dst, hdr.src); + if(!c) { len = 1; goto reset; } + // Parse auxilliary information + if(init) { + if(init[0] < 1) { + len = 1; + goto reset; + } + + c->flags = init[3] & 0x7; + } else { + c->flags = UTCP_TCP; + } + +synack: // Return SYN+ACK, go to SYN_RECEIVED state c->snd.wnd = hdr.wnd; c->rcv.irs = hdr.seq; c->rcv.nxt = c->rcv.irs + 1; set_state(c, SYN_RECEIVED); - hdr.dst = c->dst; - hdr.src = c->src; - hdr.ack = c->rcv.irs + 1; - hdr.seq = c->snd.iss; - hdr.ctl = SYN | ACK; - print_packet(c->utcp, "send", &hdr, sizeof hdr); - utcp->send(utcp, &hdr, sizeof hdr); + struct { + struct hdr hdr; + uint8_t data[4]; + } pkt; + + pkt.hdr.src = c->src; + pkt.hdr.dst = c->dst; + pkt.hdr.ack = c->rcv.irs + 1; + pkt.hdr.seq = c->snd.iss; + pkt.hdr.wnd = c->rcvbuf.maxsize; + pkt.hdr.ctl = SYN | ACK; + + if(init) { + pkt.hdr.aux = 0x0101; + pkt.data[0] = 1; + pkt.data[1] = 0; + pkt.data[2] = 0; + pkt.data[3] = c->flags & 0x7; + print_packet(c, "send", &pkt, sizeof(hdr) + 4); + utcp->send(utcp, &pkt, sizeof(hdr) + 4); + } else { + pkt.hdr.aux = 0; + print_packet(c, "send", &pkt, sizeof(hdr)); + utcp->send(utcp, &pkt, sizeof(hdr)); + } + + start_retransmit_timer(c); } else { // No, we don't want your packets, send a RST back len = 1; @@ -829,18 +1370,18 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { return 0; } - debug("%p state %s\n", c->utcp, strstate[c->state]); + debug(c, "state %s\n", strstate[c->state]); // In case this is for a CLOSED connection, ignore the packet. // TODO: make it so incoming packets can never match a CLOSED connection. - if(c->state == CLOSED) + if(c->state == CLOSED) { + debug(c, "got packet for closed connection\n"); return 0; + } // It is for an existing connection. - uint32_t prevrcvnxt = c->rcv.nxt; - // 1. Drop invalid packets. // 1a. Drop packets that should not happen in our current state. @@ -856,6 +1397,7 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { case LAST_ACK: case TIME_WAIT: break; + default: #ifdef UTCP_DEBUG abort(); @@ -863,37 +1405,52 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { break; } - // 1b. Drop packets with a sequence number not in our receive window. + // 1b. Discard data that is not in our receive window. - bool acceptable; + if(is_reliable(c)) { + bool acceptable; - if(c->state == SYN_SENT) - acceptable = true; - else if(len == 0) - acceptable = seqdiff(hdr.seq, c->rcv.nxt) >= 0; - else { - int32_t rcv_offset = seqdiff(hdr.seq, c->rcv.nxt); + if(c->state == SYN_SENT) { + acceptable = true; + } else if(len == 0) { + acceptable = seqdiff(hdr.seq, c->rcv.nxt) >= 0; + } else { + int32_t rcv_offset = seqdiff(hdr.seq, c->rcv.nxt); - // cut already accepted front overlapping - if(rcv_offset < 0) { - acceptable = rcv_offset + len >= 0; - if(acceptable) { - data -= rcv_offset; - len += rcv_offset; + // cut already accepted front overlapping + if(rcv_offset < 0) { + acceptable = len > (size_t) - rcv_offset; + + if(acceptable) { + ptr -= rcv_offset; + len += rcv_offset; + hdr.seq -= rcv_offset; + } + } else { + acceptable = seqdiff(hdr.seq, c->rcv.nxt) >= 0 && seqdiff(hdr.seq, c->rcv.nxt) + len <= c->rcvbuf.maxsize; } } - acceptable = seqdiff(hdr.seq, c->rcv.nxt) >= 0 && seqdiff(hdr.seq, c->rcv.nxt) + len <= c->rcvbuf.maxsize; - } + if(!acceptable) { + debug(c, "packet not acceptable, %u <= %u + %lu < %u\n", c->rcv.nxt, hdr.seq, (unsigned long)len, c->rcv.nxt + c->rcvbuf.maxsize); - if(!acceptable) { - debug("Packet not acceptable, %u <= %u + %zu < %u\n", c->rcv.nxt, hdr.seq, len, c->rcv.nxt + c->rcvbuf.maxsize); - // Ignore unacceptable RST packets. - if(hdr.ctl & RST) - return 0; - // Otherwise, send an ACK back in the hope things improve. - ack(c, true); - return 0; + // Ignore unacceptable RST packets. + if(hdr.ctl & RST) { + return 0; + } + + // Otherwise, continue processing. + len = 0; + } + } else { +#if UTCP_DEBUG + int32_t rcv_offset = seqdiff(hdr.seq, c->rcv.nxt); + + if(rcv_offset) { + debug(c, "packet out of order, offset %u bytes", rcv_offset); + } + +#endif } c->snd.wnd = hdr.wnd; // TODO: move below @@ -902,11 +1459,20 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { // ackno should not roll back, and it should also not be bigger than what we ever could have sent // (= snd.una + c->sndbuf.used). + if(!is_reliable(c)) { + if(hdr.ack != c->snd.last && c->state >= ESTABLISHED) { + hdr.ack = c->snd.una; + } + } + if(hdr.ctl & ACK && (seqdiff(hdr.ack, c->snd.last) > 0 || seqdiff(hdr.ack, c->snd.una) < 0)) { - debug("Packet ack seqno out of range, %u <= %u < %u\n", c->snd.una, hdr.ack, c->snd.una + c->sndbuf.used); + debug(c, "packet ack seqno out of range, %u <= %u < %u\n", c->snd.una, hdr.ack, c->snd.una + c->sndbuf.used); + // Ignore unacceptable RST packets. - if(hdr.ctl & RST) + if(hdr.ctl & RST) { return 0; + } + goto reset; } @@ -915,46 +1481,73 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { if(hdr.ctl & RST) { switch(c->state) { case SYN_SENT: - if(!(hdr.ctl & ACK)) + if(!(hdr.ctl & ACK)) { return 0; + } + // The peer has refused our connection. set_state(c, CLOSED); errno = ECONNREFUSED; - if(c->recv) + + if(c->recv) { c->recv(c, NULL, 0); + } + + if(c->poll && !c->reapable) { + c->poll(c, 0); + } + return 0; + case SYN_RECEIVED: - if(hdr.ctl & ACK) + if(hdr.ctl & ACK) { return 0; + } + // We haven't told the application about this connection yet. Silently delete. free_connection(c); return 0; + case ESTABLISHED: case FIN_WAIT_1: case FIN_WAIT_2: case CLOSE_WAIT: - if(hdr.ctl & ACK) + if(hdr.ctl & ACK) { return 0; + } + // The peer has aborted our connection. set_state(c, CLOSED); errno = ECONNRESET; - if(c->recv) + + if(c->recv) { c->recv(c, NULL, 0); + } + + if(c->poll && !c->reapable) { + c->poll(c, 0); + } + return 0; + case CLOSING: case LAST_ACK: case TIME_WAIT: - if(hdr.ctl & ACK) + if(hdr.ctl & ACK) { return 0; + } + // As far as the application is concerned, the connection has already been closed. // If it has called utcp_close() already, we can immediately free this connection. if(c->reapable) { free_connection(c); return 0; } + // Otherwise, immediately move to the CLOSED state. set_state(c, CLOSED); return 0; + default: #ifdef UTCP_DEBUG abort(); @@ -963,22 +1556,28 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { } } + uint32_t advanced; + + if(!(hdr.ctl & ACK)) { + advanced = 0; + goto skip_ack; + } + // 3. Advance snd.una - uint32_t advanced = seqdiff(hdr.ack, c->snd.una); - prevrcvnxt = c->rcv.nxt; + advanced = seqdiff(hdr.ack, c->snd.una); if(advanced) { // RTT measurement if(c->rtt_start.tv_sec) { if(c->rtt_seq == hdr.ack) { - struct timeval now, diff; - gettimeofday(&now, NULL); - timersub(&now, &c->rtt_start, &diff); - update_rtt(c, diff.tv_sec * 1000000 + diff.tv_usec); + struct timespec now; + clock_gettime(UTCP_CLOCK, &now); + int32_t diff = timespec_diff_usec(&now, &c->rtt_start); + update_rtt(c, diff); c->rtt_start.tv_sec = 0; } else if(c->rtt_seq < hdr.ack) { - debug("Cancelling RTT measurement: %u < %u\n", c->rtt_seq, hdr.ack); + debug(c, "cancelling RTT measurement: %u < %u\n", c->rtt_seq, hdr.ack); c->rtt_start.tv_sec = 0; } } @@ -986,89 +1585,158 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { int32_t data_acked = advanced; switch(c->state) { - case SYN_SENT: - case SYN_RECEIVED: - data_acked--; - break; - // TODO: handle FIN as well. - default: - break; + case SYN_SENT: + case SYN_RECEIVED: + data_acked--; + break; + + // TODO: handle FIN as well. + default: + break; } assert(data_acked >= 0); +#ifndef NDEBUG int32_t bufused = seqdiff(c->snd.last, c->snd.una); assert(data_acked <= bufused); +#endif + + if(data_acked) { + buffer_discard(&c->sndbuf, data_acked); - if(data_acked) - buffer_get(&c->sndbuf, NULL, data_acked); + if(is_reliable(c)) { + c->do_poll = true; + } + } // Also advance snd.nxt if possible - if(seqdiff(c->snd.nxt, hdr.ack) < 0) + if(seqdiff(c->snd.nxt, hdr.ack) < 0) { c->snd.nxt = hdr.ack; + } c->snd.una = hdr.ack; - c->dupack = 0; - c->snd.cwnd += utcp->mtu; - if(c->snd.cwnd > c->sndbuf.maxsize) + if(c->dupack) { + if(c->dupack >= 3) { + debug(c, "fast recovery ended\n"); + c->snd.cwnd = c->snd.ssthresh; + } + + c->dupack = 0; + } + + // Increase the congestion window according to RFC 5681 + if(c->snd.cwnd < c->snd.ssthresh) { + c->snd.cwnd += min(advanced, utcp->mss); // eq. 2 + } else { + c->snd.cwnd += max(1, (utcp->mss * utcp->mss) / c->snd.cwnd); // eq. 3 + } + + if(c->snd.cwnd > c->sndbuf.maxsize) { c->snd.cwnd = c->sndbuf.maxsize; + } + + debug_cwnd(c); // Check if we have sent a FIN that is now ACKed. switch(c->state) { case FIN_WAIT_1: - if(c->snd.una == c->snd.last) + if(c->snd.una == c->snd.last) { set_state(c, FIN_WAIT_2); + } + break; + case CLOSING: if(c->snd.una == c->snd.last) { - gettimeofday(&c->conn_timeout, NULL); - c->conn_timeout.tv_sec += 60; + clock_gettime(UTCP_CLOCK, &c->conn_timeout); + c->conn_timeout.tv_sec += utcp->timeout; set_state(c, TIME_WAIT); } + break; + default: break; } } else { - if(!len) { + if(!len && is_reliable(c) && c->snd.una != c->snd.last) { c->dupack++; + debug(c, "duplicate ACK %d\n", c->dupack); + if(c->dupack == 3) { - debug("Triplicate ACK\n"); - //TODO: Resend one packet and go to fast recovery mode. See RFC 6582. - //We do a very simple variant here; reset the nxt pointer to the last acknowledged packet from the peer. - //Reset the congestion window so we wait for ACKs. - c->snd.nxt = c->snd.una; - c->snd.cwnd = utcp->mtu; - start_retransmit_timer(c); + // RFC 5681 fast recovery + debug(c, "fast recovery started\n", c->dupack); + uint32_t flightsize = seqdiff(c->snd.nxt, c->snd.una); + c->snd.ssthresh = max(flightsize / 2, utcp->mss * 2); // eq. 4 + c->snd.cwnd = min(c->snd.ssthresh + 3 * utcp->mss, c->sndbuf.maxsize); + + if(c->snd.cwnd > c->sndbuf.maxsize) { + c->snd.cwnd = c->sndbuf.maxsize; + } + + debug_cwnd(c); + + fast_retransmit(c); + } else if(c->dupack > 3) { + c->snd.cwnd += utcp->mss; + + if(c->snd.cwnd > c->sndbuf.maxsize) { + c->snd.cwnd = c->sndbuf.maxsize; + } + + debug_cwnd(c); } + + // We got an ACK which indicates the other side did get one of our packets. + // Reset the retransmission timer to avoid going to slow start, + // but don't touch the connection timeout. + start_retransmit_timer(c); } } // 4. Update timers if(advanced) { - timerclear(&c->conn_timeout); // It will be set anew in utcp_timeout() if c->snd.una != c->snd.nxt. - if(c->snd.una == c->snd.last) + if(c->snd.una == c->snd.last) { stop_retransmit_timer(c); - else + timespec_clear(&c->conn_timeout); + } else if(is_reliable(c)) { start_retransmit_timer(c); + clock_gettime(UTCP_CLOCK, &c->conn_timeout); + c->conn_timeout.tv_sec += utcp->timeout; + } } +skip_ack: // 5. Process SYN stuff if(hdr.ctl & SYN) { switch(c->state) { case SYN_SENT: + // This is a SYNACK. It should always have ACKed the SYN. - if(!advanced) + if(!advanced) { goto reset; + } + c->rcv.irs = hdr.seq; - c->rcv.nxt = hdr.seq; - set_state(c, ESTABLISHED); - // TODO: notify application of this somehow. + c->rcv.nxt = hdr.seq + 1; + + if(c->shut_wr) { + c->snd.last++; + set_state(c, FIN_WAIT_1); + } else { + set_state(c, ESTABLISHED); + } + break; + case SYN_RECEIVED: + // This is a retransmit of a SYN, send back the SYNACK. + goto synack; + case ESTABLISHED: case FIN_WAIT_1: case FIN_WAIT_2: @@ -1076,29 +1744,29 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { case CLOSING: case LAST_ACK: case TIME_WAIT: - // Ehm, no. We should never receive a second SYN. - goto reset; + // This could be a retransmission. Ignore the SYN flag, but send an ACK back. + break; + default: #ifdef UTCP_DEBUG abort(); #endif return 0; } - - // SYN counts as one sequence number - c->rcv.nxt++; } // 6. Process new data if(c->state == SYN_RECEIVED) { // This is the ACK after the SYNACK. It should always have ACKed the SYNACK. - if(!advanced) + if(!advanced) { goto reset; + } // Are we still LISTENing? - if(utcp->accept) + if(utcp->accept) { utcp->accept(c, c->src); + } if(c->state != ESTABLISHED) { set_state(c, CLOSED); @@ -1116,16 +1784,19 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { abort(); #endif return 0; + case ESTABLISHED: case FIN_WAIT_1: case FIN_WAIT_2: break; + case CLOSE_WAIT: case CLOSING: case LAST_ACK: case TIME_WAIT: // Ehm no, We should never receive more data after a FIN. goto reset; + default: #ifdef UTCP_DEBUG abort(); @@ -1133,12 +1804,12 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { return 0; } - handle_incoming_data(c, hdr.seq, data, len); + handle_incoming_data(c, &hdr, ptr, len); } // 7. Process FIN stuff - if((hdr.ctl & FIN) && hdr.seq + len == c->rcv.nxt) { + if((hdr.ctl & FIN) && (!is_reliable(c) || hdr.seq + len == c->rcv.nxt)) { switch(c->state) { case SYN_SENT: case SYN_RECEIVED: @@ -1147,23 +1818,28 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { abort(); #endif break; + case ESTABLISHED: set_state(c, CLOSE_WAIT); break; + case FIN_WAIT_1: set_state(c, CLOSING); break; + case FIN_WAIT_2: - gettimeofday(&c->conn_timeout, NULL); - c->conn_timeout.tv_sec += 60; + clock_gettime(UTCP_CLOCK, &c->conn_timeout); + c->conn_timeout.tv_sec += utcp->timeout; set_state(c, TIME_WAIT); break; + case CLOSE_WAIT: case CLOSING: case LAST_ACK: case TIME_WAIT: // Ehm, no. We should never receive a second FIN. goto reset; + default: #ifdef UTCP_DEBUG abort(); @@ -1175,7 +1851,7 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { c->rcv.nxt++; len++; - // Inform the application that the peer closed the connection. + // Inform the application that the peer closed its end of the connection. if(c->recv) { errno = 0; c->recv(c, NULL, 0); @@ -1183,17 +1859,22 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { } // Now we send something back if: - // - we advanced rcv.nxt (ie, we got some data that needs to be ACKed) + // - we received data, so we have to send back an ACK // -> sendatleastone = true // - or we got an ack, so we should maybe send a bit more data // -> sendatleastone = false - ack(c, len || prevrcvnxt != c->rcv.nxt); + if(is_reliable(c) || hdr.ctl & SYN || hdr.ctl & FIN) { + ack(c, has_data); + } + return 0; reset: swap_ports(&hdr); hdr.wnd = 0; + hdr.aux = 0; + if(hdr.ctl & ACK) { hdr.seq = hdr.ack; hdr.ctl = RST; @@ -1202,21 +1883,23 @@ reset: hdr.seq = 0; hdr.ctl = RST | ACK; } - print_packet(utcp, "send", &hdr, sizeof hdr); - utcp->send(utcp, &hdr, sizeof hdr); + + print_packet(c, "send", &hdr, sizeof(hdr)); + utcp->send(utcp, &hdr, sizeof(hdr)); return 0; } int utcp_shutdown(struct utcp_connection *c, int dir) { - debug("%p shutdown %d at %u\n", c ? c->utcp : NULL, dir, c ? c->snd.last : 0); + debug(c, "shutdown %d at %u\n", dir, c ? c->snd.last : 0); + if(!c) { errno = EFAULT; return -1; } if(c->reapable) { - debug("Error: shutdown() called on closed connection %p\n", c); + debug(c, "shutdown() called on closed connection\n"); errno = EBADF; return -1; } @@ -1228,12 +1911,21 @@ int utcp_shutdown(struct utcp_connection *c, int dir) { // TCP does not have a provision for stopping incoming packets. // The best we can do is to just ignore them. - if(dir == UTCP_SHUT_RD || dir == UTCP_SHUT_RDWR) + if(dir == UTCP_SHUT_RD || dir == UTCP_SHUT_RDWR) { c->recv = NULL; + } // The rest of the code deals with shutting down writes. - if(dir == UTCP_SHUT_RD) + if(dir == UTCP_SHUT_RD) { + return 0; + } + + // Only process shutting down writes once. + if(c->shut_wr) { return 0; + } + + c->shut_wr = true; switch(c->state) { case CLOSED: @@ -1242,16 +1934,17 @@ int utcp_shutdown(struct utcp_connection *c, int dir) { return -1; case SYN_SENT: - set_state(c, CLOSED); return 0; case SYN_RECEIVED: case ESTABLISHED: set_state(c, FIN_WAIT_1); break; + case FIN_WAIT_1: case FIN_WAIT_2: return 0; + case CLOSE_WAIT: set_state(c, CLOSING); break; @@ -1265,46 +1958,40 @@ int utcp_shutdown(struct utcp_connection *c, int dir) { c->snd.last++; ack(c, false); - if(!timerisset(&c->rtrx_timeout)) + + if(!timespec_isset(&c->rtrx_timeout)) { start_retransmit_timer(c); - return 0; -} + } -int utcp_close(struct utcp_connection *c) { - if(utcp_shutdown(c, SHUT_RDWR)) - return -1; - c->recv = NULL; - c->poll = NULL; - c->reapable = true; return 0; } -int utcp_abort(struct utcp_connection *c) { +static bool reset_connection(struct utcp_connection *c) { if(!c) { errno = EFAULT; - return -1; + return false; } if(c->reapable) { - debug("Error: abort() called on closed connection %p\n", c); + debug(c, "abort() called on closed connection\n"); errno = EBADF; - return -1; + return false; } c->recv = NULL; c->poll = NULL; - c->reapable = true; switch(c->state) { case CLOSED: - return 0; + return true; + case LISTEN: case SYN_SENT: case CLOSING: case LAST_ACK: case TIME_WAIT: set_state(c, CLOSED); - return 0; + return true; case SYN_RECEIVED: case ESTABLISHED: @@ -1326,8 +2013,61 @@ int utcp_abort(struct utcp_connection *c) { hdr.wnd = 0; hdr.ctl = RST; - print_packet(c->utcp, "send", &hdr, sizeof hdr); - c->utcp->send(c->utcp, &hdr, sizeof hdr); + print_packet(c, "send", &hdr, sizeof(hdr)); + c->utcp->send(c->utcp, &hdr, sizeof(hdr)); + return true; +} + +// Closes all the opened connections +void utcp_abort_all_connections(struct utcp *utcp) { + if(!utcp) { + errno = EINVAL; + return; + } + + for(int i = 0; i < utcp->nconnections; i++) { + struct utcp_connection *c = utcp->connections[i]; + + if(c->reapable || c->state == CLOSED) { + continue; + } + + utcp_recv_t old_recv = c->recv; + utcp_poll_t old_poll = c->poll; + + reset_connection(c); + + if(old_recv) { + errno = 0; + old_recv(c, NULL, 0); + } + + if(old_poll && !c->reapable) { + errno = 0; + old_poll(c, 0); + } + } + + return; +} + +int utcp_close(struct utcp_connection *c) { + if(utcp_shutdown(c, SHUT_RDWR) && errno != ENOTCONN) { + return -1; + } + + c->recv = NULL; + c->poll = NULL; + c->reapable = true; + return 0; +} + +int utcp_abort(struct utcp_connection *c) { + if(!reset_connection(c)) { + return -1; + } + + c->reapable = true; return 0; } @@ -1337,61 +2077,87 @@ int utcp_abort(struct utcp_connection *c) { * The return value is the time to the next timeout in milliseconds, * or maybe a negative value if the timeout is infinite. */ -struct timeval utcp_timeout(struct utcp *utcp) { - struct timeval now; - gettimeofday(&now, NULL); - struct timeval next = {now.tv_sec + 3600, now.tv_usec}; +struct timespec utcp_timeout(struct utcp *utcp) { + struct timespec now; + clock_gettime(UTCP_CLOCK, &now); + struct timespec next = {now.tv_sec + 3600, now.tv_nsec}; for(int i = 0; i < utcp->nconnections; i++) { struct utcp_connection *c = utcp->connections[i]; - if(!c) + + if(!c) { continue; + } // delete connections that have been utcp_close()d. if(c->state == CLOSED) { if(c->reapable) { - debug("Reaping %p\n", c); + debug(c, "reaping\n"); free_connection(c); i--; } + continue; } - if(timerisset(&c->conn_timeout) && timercmp(&c->conn_timeout, &now, <)) { + if(timespec_isset(&c->conn_timeout) && timespec_lt(&c->conn_timeout, &now)) { errno = ETIMEDOUT; c->state = CLOSED; - if(c->recv) + + if(c->recv) { c->recv(c, NULL, 0); + } + + if(c->poll && !c->reapable) { + c->poll(c, 0); + } + continue; } - if(timerisset(&c->rtrx_timeout) && timercmp(&c->rtrx_timeout, &now, <)) { - debug("retransmit()\n"); + if(timespec_isset(&c->rtrx_timeout) && timespec_lt(&c->rtrx_timeout, &now)) { + debug(c, "retransmitting after timeout\n"); retransmit(c); } - if(c->poll && buffer_free(&c->sndbuf) && (c->state == ESTABLISHED || c->state == CLOSE_WAIT)) - c->poll(c, buffer_free(&c->sndbuf)); + if(c->poll) { + if((c->state == ESTABLISHED || c->state == CLOSE_WAIT) && c->do_poll) { + c->do_poll = false; + uint32_t len = buffer_free(&c->sndbuf); + + if(len) { + c->poll(c, len); + } + } else if(c->state == CLOSED) { + c->poll(c, 0); + } + } - if(timerisset(&c->conn_timeout) && timercmp(&c->conn_timeout, &next, <)) + if(timespec_isset(&c->conn_timeout) && timespec_lt(&c->conn_timeout, &next)) { next = c->conn_timeout; + } - if(timerisset(&c->rtrx_timeout) && timercmp(&c->rtrx_timeout, &next, <)) + if(timespec_isset(&c->rtrx_timeout) && timespec_lt(&c->rtrx_timeout, &next)) { next = c->rtrx_timeout; + } } - struct timeval diff; - timersub(&next, &now, &diff); + struct timespec diff; + + timespec_sub(&next, &now, &diff); + return diff; } bool utcp_is_active(struct utcp *utcp) { - if(!utcp) + if(!utcp) { return false; + } for(int i = 0; i < utcp->nconnections; i++) - if(utcp->connections[i]->state != CLOSED && utcp->connections[i]->state != TIME_WAIT) + if(utcp->connections[i]->state != CLOSED && utcp->connections[i]->state != TIME_WAIT) { return true; + } return false; } @@ -1402,32 +2168,59 @@ struct utcp *utcp_init(utcp_accept_t accept, utcp_pre_accept_t pre_accept, utcp_ return NULL; } - struct utcp *utcp = calloc(1, sizeof *utcp); - if(!utcp) + struct utcp *utcp = calloc(1, sizeof(*utcp)); + + if(!utcp) { + return NULL; + } + + utcp_set_mtu(utcp, DEFAULT_MTU); + + if(!utcp->pkt) { + free(utcp); return NULL; + } + + if(!CLOCK_GRANULARITY) { + struct timespec res; + clock_getres(UTCP_CLOCK, &res); + CLOCK_GRANULARITY = res.tv_sec * USEC_PER_SEC + res.tv_nsec / 1000; + } utcp->accept = accept; utcp->pre_accept = pre_accept; utcp->send = send; utcp->priv = priv; - utcp->mtu = DEFAULT_MTU; - utcp->timeout = DEFAULT_USER_TIMEOUT; // s - utcp->rto = START_RTO; // us + utcp->timeout = DEFAULT_USER_TIMEOUT; // sec return utcp; } void utcp_exit(struct utcp *utcp) { - if(!utcp) + if(!utcp) { return; + } + for(int i = 0; i < utcp->nconnections; i++) { - if(!utcp->connections[i]->reapable) - debug("Warning, freeing unclosed connection %p\n", utcp->connections[i]); - buffer_exit(&utcp->connections[i]->rcvbuf); - buffer_exit(&utcp->connections[i]->sndbuf); - free(utcp->connections[i]); + struct utcp_connection *c = utcp->connections[i]; + + if(!c->reapable) { + if(c->recv) { + c->recv(c, NULL, 0); + } + + if(c->poll && !c->reapable) { + c->poll(c, 0); + } + } + + buffer_exit(&c->rcvbuf); + buffer_exit(&c->sndbuf); + free(c); } + free(utcp->connections); + free(utcp->pkt); free(utcp); } @@ -1435,10 +2228,67 @@ uint16_t utcp_get_mtu(struct utcp *utcp) { return utcp ? utcp->mtu : 0; } +uint16_t utcp_get_mss(struct utcp *utcp) { + return utcp ? utcp->mss : 0; +} + void utcp_set_mtu(struct utcp *utcp, uint16_t mtu) { - // TODO: handle overhead of the header - if(utcp) - utcp->mtu = mtu; + if(!utcp) { + return; + } + + if(mtu <= sizeof(struct hdr)) { + return; + } + + if(mtu > utcp->mtu) { + char *new = realloc(utcp->pkt, mtu + sizeof(struct hdr)); + + if(!new) { + return; + } + + utcp->pkt = new; + } + + utcp->mtu = mtu; + utcp->mss = mtu - sizeof(struct hdr); +} + +void utcp_reset_timers(struct utcp *utcp) { + if(!utcp) { + return; + } + + struct timespec now, then; + + clock_gettime(UTCP_CLOCK, &now); + + then = now; + + then.tv_sec += utcp->timeout; + + for(int i = 0; i < utcp->nconnections; i++) { + struct utcp_connection *c = utcp->connections[i]; + + if(c->reapable) { + continue; + } + + if(timespec_isset(&c->rtrx_timeout)) { + c->rtrx_timeout = now; + } + + if(timespec_isset(&c->conn_timeout)) { + c->conn_timeout = then; + } + + c->rtt_start.tv_sec = 0; + + if(c->rto > START_RTO) { + c->rto = START_RTO; + } + } } int utcp_get_user_timeout(struct utcp *u) { @@ -1446,8 +2296,9 @@ int utcp_get_user_timeout(struct utcp *u) { } void utcp_set_user_timeout(struct utcp *u, int timeout) { - if(u) + if(u) { u->timeout = timeout; + } } size_t utcp_get_sndbuf(struct utcp_connection *c) { @@ -1455,18 +2306,34 @@ size_t utcp_get_sndbuf(struct utcp_connection *c) { } size_t utcp_get_sndbuf_free(struct utcp_connection *c) { - if(c && (c->state == ESTABLISHED || c->state == CLOSE_WAIT)) + if(!c) { + return 0; + } + + switch(c->state) { + case SYN_SENT: + case SYN_RECEIVED: + case ESTABLISHED: + case CLOSE_WAIT: return buffer_free(&c->sndbuf); - else + + default: return 0; + } } void utcp_set_sndbuf(struct utcp_connection *c, size_t size) { - if(!c) + if(!c) { return; + } + c->sndbuf.maxsize = size; - if(c->sndbuf.maxsize != size) + + if(c->sndbuf.maxsize != size) { c->sndbuf.maxsize = -1; + } + + c->do_poll = is_reliable(c) && buffer_free(&c->sndbuf); } size_t utcp_get_rcvbuf(struct utcp_connection *c) { @@ -1474,18 +2341,31 @@ size_t utcp_get_rcvbuf(struct utcp_connection *c) { } size_t utcp_get_rcvbuf_free(struct utcp_connection *c) { - if(c && (c->state == ESTABLISHED || c->state == CLOSE_WAIT)) + if(c && (c->state == ESTABLISHED || c->state == CLOSE_WAIT)) { return buffer_free(&c->rcvbuf); - else + } else { return 0; + } } void utcp_set_rcvbuf(struct utcp_connection *c, size_t size) { - if(!c) + if(!c) { return; + } + c->rcvbuf.maxsize = size; - if(c->rcvbuf.maxsize != size) + + if(c->rcvbuf.maxsize != size) { c->rcvbuf.maxsize = -1; + } +} + +size_t utcp_get_sendq(struct utcp_connection *c) { + return c->sndbuf.used; +} + +size_t utcp_get_recvq(struct utcp_connection *c) { + return c->rcvbuf.used; } bool utcp_get_nodelay(struct utcp_connection *c) { @@ -1493,8 +2373,9 @@ bool utcp_get_nodelay(struct utcp_connection *c) { } void utcp_set_nodelay(struct utcp_connection *c, bool nodelay) { - if(c) + if(c) { c->nodelay = nodelay; + } } bool utcp_get_keepalive(struct utcp_connection *c) { @@ -1502,8 +2383,9 @@ bool utcp_get_keepalive(struct utcp_connection *c) { } void utcp_set_keepalive(struct utcp_connection *c, bool keepalive) { - if(c) + if(c) { c->keepalive = keepalive; + } } size_t utcp_get_outq(struct utcp_connection *c) { @@ -1511,13 +2393,16 @@ size_t utcp_get_outq(struct utcp_connection *c) { } void utcp_set_recv_cb(struct utcp_connection *c, utcp_recv_t recv) { - if(c) + if(c) { c->recv = recv; + } } void utcp_set_poll_cb(struct utcp_connection *c, utcp_poll_t poll) { - if(c) + if(c) { c->poll = poll; + c->do_poll = is_reliable(c) && buffer_free(&c->sndbuf); + } } void utcp_set_accept_cb(struct utcp *utcp, utcp_accept_t accept, utcp_pre_accept_t pre_accept) { @@ -1526,3 +2411,61 @@ void utcp_set_accept_cb(struct utcp *utcp, utcp_accept_t accept, utcp_pre_accept utcp->pre_accept = pre_accept; } } + +void utcp_expect_data(struct utcp_connection *c, bool expect) { + if(!c || c->reapable) { + return; + } + + if(!(c->state == ESTABLISHED || c->state == FIN_WAIT_1 || c->state == FIN_WAIT_2)) { + return; + } + + if(expect) { + // If we expect data, start the connection timer. + if(!timespec_isset(&c->conn_timeout)) { + clock_gettime(UTCP_CLOCK, &c->conn_timeout); + c->conn_timeout.tv_sec += c->utcp->timeout; + } + } else { + // If we want to cancel expecting data, only clear the timer when there is no unACKed data. + if(c->snd.una == c->snd.last) { + timespec_clear(&c->conn_timeout); + } + } +} + +void utcp_offline(struct utcp *utcp, bool offline) { + struct timespec now; + clock_gettime(UTCP_CLOCK, &now); + + for(int i = 0; i < utcp->nconnections; i++) { + struct utcp_connection *c = utcp->connections[i]; + + if(c->reapable) { + continue; + } + + utcp_expect_data(c, offline); + + if(!offline) { + if(timespec_isset(&c->rtrx_timeout)) { + c->rtrx_timeout = now; + } + + utcp->connections[i]->rtt_start.tv_sec = 0; + + if(c->rto > START_RTO) { + c->rto = START_RTO; + } + } + } +} + +void utcp_set_retransmit_cb(struct utcp *utcp, utcp_retransmit_t retransmit) { + utcp->retransmit = retransmit; +} + +void utcp_set_clock_granularity(long granularity) { + CLOCK_GRANULARITY = granularity; +}