X-Git-Url: http://git.meshlink.io/?p=utcp;a=blobdiff_plain;f=utcp.c;h=04d2b194c20f193aa44f2d4006b9afc1bd356b29;hp=2ed32175dc0f98c9b2bc045f7a62fb43caeb5089;hb=HEAD;hpb=7baf55a28cf48163c007ee2ff56678371d78c990 diff --git a/utcp.c b/utcp.c index 2ed3217..04d2b19 100644 --- a/utcp.c +++ b/utcp.c @@ -27,8 +27,7 @@ #include #include #include -#include -#include +#include #include "utcp_priv.h" @@ -44,16 +43,50 @@ #undef poll #endif -#ifndef timersub -#define timersub(a, b, r)\ - do {\ - (r)->tv_sec = (a)->tv_sec - (b)->tv_sec;\ - (r)->tv_usec = (a)->tv_usec - (b)->tv_usec;\ - if((r)->tv_usec < 0)\ - (r)->tv_sec--, (r)->tv_usec += USEC_PER_SEC;\ - } while (0) +#ifndef UTCP_CLOCK +#if defined(CLOCK_MONOTONIC_RAW) && defined(__x86_64__) +#define UTCP_CLOCK CLOCK_MONOTONIC_RAW +#else +#define UTCP_CLOCK CLOCK_MONOTONIC +#endif #endif +static void timespec_sub(const struct timespec *a, const struct timespec *b, struct timespec *r) { + r->tv_sec = a->tv_sec - b->tv_sec; + r->tv_nsec = a->tv_nsec - b->tv_nsec; + + if(r->tv_nsec < 0) { + r->tv_sec--, r->tv_nsec += NSEC_PER_SEC; + } +} + +static int32_t timespec_diff_usec(const struct timespec *a, const struct timespec *b) { + return (a->tv_sec - b->tv_sec) * 1000000 + (a->tv_nsec - b->tv_nsec) / 1000; +} + +static bool timespec_lt(const struct timespec *a, const struct timespec *b) { + if(a->tv_sec == b->tv_sec) { + return a->tv_nsec < b->tv_nsec; + } else { + return a->tv_sec < b->tv_sec; + } +} + +static void timespec_clear(struct timespec *a) { + a->tv_sec = 0; + a->tv_nsec = 0; +} + +static bool timespec_isset(const struct timespec *a) { + return a->tv_sec; +} + +static long CLOCK_GRANULARITY; // usec + +static inline size_t min(size_t a, size_t b) { + return a < b ? a : b; +} + static inline size_t max(size_t a, size_t b) { return a > b ? a : b; } @@ -61,71 +94,85 @@ static inline size_t max(size_t a, size_t b) { #ifdef UTCP_DEBUG #include -static void debug(const char *format, ...) { +#ifndef UTCP_DEBUG_DATALEN +#define UTCP_DEBUG_DATALEN 20 +#endif + +static void debug(struct utcp_connection *c, const char *format, ...) { + struct timespec tv; + char buf[1024]; + int len; + + clock_gettime(CLOCK_REALTIME, &tv); + len = snprintf(buf, sizeof(buf), "%ld.%06lu %u:%u ", (long)tv.tv_sec, tv.tv_nsec / 1000, c ? c->src : 0, c ? c->dst : 0); va_list ap; va_start(ap, format); - vfprintf(stderr, format, ap); + len += vsnprintf(buf + len, sizeof(buf) - len, format, ap); va_end(ap); + + if(len > 0 && (size_t)len < sizeof(buf)) { + fwrite(buf, len, 1, stderr); + } } -static void print_packet(struct utcp *utcp, const char *dir, const void *pkt, size_t len) { +static void print_packet(struct utcp_connection *c, const char *dir, const void *pkt, size_t len) { struct hdr hdr; if(len < sizeof(hdr)) { - debug("%p %s: short packet (%lu bytes)\n", utcp, dir, (unsigned long)len); + debug(c, "%s: short packet (%lu bytes)\n", dir, (unsigned long)len); return; } memcpy(&hdr, pkt, sizeof(hdr)); - debug("%p %s: len=%lu, src=%u dst=%u seq=%u ack=%u wnd=%u aux=%x ctl=", utcp, dir, (unsigned long)len, hdr.src, hdr.dst, hdr.seq, hdr.ack, hdr.wnd, hdr.aux); - if(hdr.ctl & SYN) { - debug("SYN"); - } - - if(hdr.ctl & RST) { - debug("RST"); - } + uint32_t datalen; - if(hdr.ctl & FIN) { - debug("FIN"); + if(len > sizeof(hdr)) { + datalen = min(len - sizeof(hdr), UTCP_DEBUG_DATALEN); + } else { + datalen = 0; } - if(hdr.ctl & ACK) { - debug("ACK"); - } - if(len > sizeof(hdr)) { - uint32_t datalen = len - sizeof(hdr); - const uint8_t *data = (uint8_t *)pkt + sizeof(hdr); - char str[datalen * 2 + 1]; - char *p = str; + const uint8_t *data = (uint8_t *)pkt + sizeof(hdr); + char str[datalen * 2 + 1]; + char *p = str; - for(uint32_t i = 0; i < datalen; i++) { - *p++ = "0123456789ABCDEF"[data[i] >> 4]; - *p++ = "0123456789ABCDEF"[data[i] & 15]; - } + for(uint32_t i = 0; i < datalen; i++) { + *p++ = "0123456789ABCDEF"[data[i] >> 4]; + *p++ = "0123456789ABCDEF"[data[i] & 15]; + } - *p = 0; + *p = 0; - debug(" data=%s", str); - } + debug(c, "%s: len %lu src %u dst %u seq %u ack %u wnd %u aux %x ctl %s%s%s%s%s data %s\n", + dir, (unsigned long)len, hdr.src, hdr.dst, hdr.seq, hdr.ack, hdr.wnd, hdr.aux, + hdr.ctl & SYN ? "SYN" : "", + hdr.ctl & RST ? "RST" : "", + hdr.ctl & FIN ? "FIN" : "", + hdr.ctl & ACK ? "ACK" : "", + hdr.ctl & MF ? "MF" : "", + str + ); +} - debug("\n"); +static void debug_cwnd(struct utcp_connection *c) { + debug(c, "snd.cwnd %u snd.ssthresh %u\n", c->snd.cwnd, ~c->snd.ssthresh ? c->snd.ssthresh : 0); } #else #define debug(...) do {} while(0) #define print_packet(...) do {} while(0) +#define debug_cwnd(...) do {} while(0) #endif static void set_state(struct utcp_connection *c, enum state state) { c->state = state; if(state == ESTABLISHED) { - timerclear(&c->conn_timeout); + timespec_clear(&c->conn_timeout); } - debug("%p new state: %s\n", c->utcp, strstate[state]); + debug(c, "state %s\n", strstate[state]); } static bool fin_wanted(struct utcp_connection *c, uint32_t seq) { @@ -153,12 +200,40 @@ static int32_t seqdiff(uint32_t a, uint32_t b) { } // Buffer functions -// TODO: convert to ringbuffers to avoid memmove() operations. +static bool buffer_wraps(struct buffer *buf) { + return buf->size - buf->offset < buf->used; +} + +static bool buffer_resize(struct buffer *buf, uint32_t newsize) { + char *newdata = realloc(buf->data, newsize); + + if(!newdata) { + return false; + } + + buf->data = newdata; + + if(buffer_wraps(buf)) { + // Shift the right part of the buffer until it hits the end of the new buffer. + // Old situation: + // [345......012] + // New situation: + // [345.........|........012] + uint32_t tailsize = buf->size - buf->offset; + uint32_t newoffset = newsize - tailsize; + memmove(buf->data + newoffset, buf->data + buf->offset, tailsize); + buf->offset = newoffset; + } + + buf->size = newsize; + return true; +} // Store data into the buffer static ssize_t buffer_put_at(struct buffer *buf, size_t offset, const void *data, size_t len) { - debug("buffer_put_at %lu %lu %lu\n", (unsigned long)buf->used, (unsigned long)offset, (unsigned long)len); + debug(NULL, "buffer_put_at %lu %lu %lu\n", (unsigned long)buf->used, (unsigned long)offset, (unsigned long)len); + // Ensure we don't store more than maxsize bytes in total size_t required = offset + len; if(required > buf->maxsize) { @@ -170,32 +245,41 @@ static ssize_t buffer_put_at(struct buffer *buf, size_t offset, const void *data required = buf->maxsize; } + // Check if we need to resize the buffer if(required > buf->size) { size_t newsize = buf->size; if(!newsize) { - newsize = required; - } else { - do { - newsize *= 2; - } while(newsize < required); + newsize = 4096; } + do { + newsize *= 2; + } while(newsize < required); + if(newsize > buf->maxsize) { newsize = buf->maxsize; } - char *newdata = realloc(buf->data, newsize); - - if(!newdata) { + if(!buffer_resize(buf, newsize)) { return -1; } + } - buf->data = newdata; - buf->size = newsize; + uint32_t realoffset = buf->offset + offset; + + if(buf->size - buf->offset <= offset) { + // The offset wrapped + realoffset -= buf->size; } - memcpy(buf->data + offset, data, len); + if(buf->size - realoffset < len) { + // The new chunk of data must be wrapped + memcpy(buf->data + realoffset, data, buf->size - realoffset); + memcpy(buf->data, (char *)data + buf->size - realoffset, len - (buf->size - realoffset)); + } else { + memcpy(buf->data + realoffset, data, len); + } if(required > buf->used) { buf->used = required; @@ -208,52 +292,116 @@ static ssize_t buffer_put(struct buffer *buf, const void *data, size_t len) { return buffer_put_at(buf, buf->used, data, len); } -// Get data from the buffer. data can be NULL. -static ssize_t buffer_get(struct buffer *buf, void *data, size_t len) { - if(len > buf->used) { - len = buf->used; +// Copy data from the buffer without removing it. +static ssize_t buffer_copy(struct buffer *buf, void *data, size_t offset, size_t len) { + // Ensure we don't copy more than is actually stored in the buffer + if(offset >= buf->used) { + return 0; + } + + if(buf->used - offset < len) { + len = buf->used - offset; } - if(data) { - memcpy(data, buf->data, len); + uint32_t realoffset = buf->offset + offset; + + if(buf->size - buf->offset <= offset) { + // The offset wrapped + realoffset -= buf->size; } - if(len < buf->used) { - memmove(buf->data, buf->data + len, buf->used - len); + if(buf->size - realoffset < len) { + // The data is wrapped + memcpy(data, buf->data + realoffset, buf->size - realoffset); + memcpy((char *)data + buf->size - realoffset, buf->data, len - (buf->size - realoffset)); + } else { + memcpy(data, buf->data + realoffset, len); } - buf->used -= len; return len; } // Copy data from the buffer without removing it. -static ssize_t buffer_copy(struct buffer *buf, void *data, size_t offset, size_t len) { +static ssize_t buffer_call(struct utcp_connection *c, struct buffer *buf, size_t offset, size_t len) { + if(!c->recv) { + return len; + } + + // Ensure we don't copy more than is actually stored in the buffer if(offset >= buf->used) { return 0; } - if(offset + len > buf->used) { + if(buf->used - offset < len) { len = buf->used - offset; } - memcpy(data, buf->data + offset, len); - return len; -} + uint32_t realoffset = buf->offset + offset; -static bool buffer_init(struct buffer *buf, uint32_t len, uint32_t maxlen) { - memset(buf, 0, sizeof(*buf)); + if(buf->size - buf->offset <= offset) { + // The offset wrapped + realoffset -= buf->size; + } - if(len) { - buf->data = malloc(len); + if(buf->size - realoffset < len) { + // The data is wrapped + ssize_t rx1 = c->recv(c, buf->data + realoffset, buf->size - realoffset); - if(!buf->data) { - return false; + if(rx1 < buf->size - realoffset) { + return rx1; } + + // The channel might have been closed by the previous callback + if(!c->recv) { + return len; + } + + ssize_t rx2 = c->recv(c, buf->data, len - (buf->size - realoffset)); + + if(rx2 < 0) { + return rx2; + } else { + return rx1 + rx2; + } + } else { + return c->recv(c, buf->data + realoffset, len); } +} - buf->size = len; - buf->maxsize = maxlen; - return true; +// Discard data from the buffer. +static ssize_t buffer_discard(struct buffer *buf, size_t len) { + if(buf->used < len) { + len = buf->used; + } + + if(buf->size - buf->offset <= len) { + buf->offset -= buf->size; + } + + if(buf->used == len) { + buf->offset = 0; + } else { + buf->offset += len; + } + + buf->used -= len; + + return len; +} + +static void buffer_clear(struct buffer *buf) { + buf->used = 0; + buf->offset = 0; +} + +static bool buffer_set_size(struct buffer *buf, uint32_t minsize, uint32_t maxsize) { + if(maxsize < minsize) { + maxsize = minsize; + } + + buf->maxsize = maxsize; + + return buf->size >= minsize || buffer_resize(buf, minsize); } static void buffer_exit(struct buffer *buf) { @@ -262,7 +410,7 @@ static void buffer_exit(struct buffer *buf) { } static uint32_t buffer_free(const struct buffer *buf) { - return buf->maxsize - buf->used; + return buf->maxsize > buf->used ? buf->maxsize - buf->used : 0; } // Connections are stored in a sorted list. @@ -360,12 +508,12 @@ static struct utcp_connection *allocate_connection(struct utcp *utcp, uint16_t s return NULL; } - if(!buffer_init(&c->sndbuf, DEFAULT_SNDBUFSIZE, DEFAULT_MAXSNDBUFSIZE)) { + if(!buffer_set_size(&c->sndbuf, DEFAULT_SNDBUFSIZE, DEFAULT_MAXSNDBUFSIZE)) { free(c); return NULL; } - if(!buffer_init(&c->rcvbuf, DEFAULT_RCVBUFSIZE, DEFAULT_MAXRCVBUFSIZE)) { + if(!buffer_set_size(&c->rcvbuf, DEFAULT_RCVBUFSIZE, DEFAULT_MAXRCVBUFSIZE)) { buffer_exit(&c->sndbuf); free(c); return NULL; @@ -382,9 +530,13 @@ static struct utcp_connection *allocate_connection(struct utcp *utcp, uint16_t s #endif c->snd.una = c->snd.iss; c->snd.nxt = c->snd.iss + 1; - c->rcv.wnd = utcp->mtu; c->snd.last = c->snd.nxt; - c->snd.cwnd = utcp->mtu; + c->snd.cwnd = (utcp->mss > 2190 ? 2 : utcp->mss > 1095 ? 3 : 4) * utcp->mss; + c->snd.ssthresh = ~0; + debug_cwnd(c); + c->srtt = 0; + c->rttvar = 0; + c->rto = START_RTO; c->utcp = utcp; // Add it to the sorted list of connections @@ -406,44 +558,50 @@ static inline uint32_t absdiff(uint32_t a, uint32_t b) { // Update RTT variables. See RFC 6298. static void update_rtt(struct utcp_connection *c, uint32_t rtt) { if(!rtt) { - debug("invalid rtt\n"); + debug(c, "invalid rtt\n"); return; } - struct utcp *utcp = c->utcp; - - if(!utcp->srtt) { - utcp->srtt = rtt; - utcp->rttvar = rtt / 2; - utcp->rto = rtt + max(2 * rtt, CLOCK_GRANULARITY); + if(!c->srtt) { + c->srtt = rtt; + c->rttvar = rtt / 2; } else { - utcp->rttvar = (utcp->rttvar * 3 + absdiff(utcp->srtt, rtt)) / 4; - utcp->srtt = (utcp->srtt * 7 + rtt) / 8; - utcp->rto = utcp->srtt + max(utcp->rttvar, CLOCK_GRANULARITY); + c->rttvar = (c->rttvar * 3 + absdiff(c->srtt, rtt)) / 4; + c->srtt = (c->srtt * 7 + rtt) / 8; } - if(utcp->rto > MAX_RTO) { - utcp->rto = MAX_RTO; + c->rto = c->srtt + max(4 * c->rttvar, CLOCK_GRANULARITY); + + if(c->rto > MAX_RTO) { + c->rto = MAX_RTO; } - debug("rtt %u srtt %u rttvar %u rto %u\n", rtt, utcp->srtt, utcp->rttvar, utcp->rto); + debug(c, "rtt %u srtt %u rttvar %u rto %u\n", rtt, c->srtt, c->rttvar, c->rto); } static void start_retransmit_timer(struct utcp_connection *c) { - gettimeofday(&c->rtrx_timeout, NULL); - c->rtrx_timeout.tv_usec += c->utcp->rto; + clock_gettime(UTCP_CLOCK, &c->rtrx_timeout); + + uint32_t rto = c->rto; - while(c->rtrx_timeout.tv_usec >= 1000000) { - c->rtrx_timeout.tv_usec -= 1000000; + while(rto > USEC_PER_SEC) { c->rtrx_timeout.tv_sec++; + rto -= USEC_PER_SEC; } - debug("timeout set to %lu.%06lu (%u)\n", c->rtrx_timeout.tv_sec, c->rtrx_timeout.tv_usec, c->utcp->rto); + c->rtrx_timeout.tv_nsec += rto * 1000; + + if(c->rtrx_timeout.tv_nsec >= NSEC_PER_SEC) { + c->rtrx_timeout.tv_nsec -= NSEC_PER_SEC; + c->rtrx_timeout.tv_sec++; + } + + debug(c, "rtrx_timeout %ld.%06lu\n", c->rtrx_timeout.tv_sec, c->rtrx_timeout.tv_nsec); } static void stop_retransmit_timer(struct utcp_connection *c) { - timerclear(&c->rtrx_timeout); - debug("timeout cleared\n"); + timespec_clear(&c->rtrx_timeout); + debug(c, "rtrx_timeout cleared\n"); } struct utcp_connection *utcp_connect_ex(struct utcp *utcp, uint16_t dst, utcp_recv_t recv, void *priv, uint32_t flags) { @@ -468,7 +626,7 @@ struct utcp_connection *utcp_connect_ex(struct utcp *utcp, uint16_t dst, utcp_re pkt.hdr.dst = c->dst; pkt.hdr.seq = c->snd.iss; pkt.hdr.ack = 0; - pkt.hdr.wnd = c->rcv.wnd; + pkt.hdr.wnd = c->rcvbuf.maxsize; pkt.hdr.ctl = SYN; pkt.hdr.aux = 0x0101; pkt.init[0] = 1; @@ -478,10 +636,10 @@ struct utcp_connection *utcp_connect_ex(struct utcp *utcp, uint16_t dst, utcp_re set_state(c, SYN_SENT); - print_packet(utcp, "send", &pkt, sizeof(pkt)); + print_packet(c, "send", &pkt, sizeof(pkt)); utcp->send(utcp, &pkt, sizeof(pkt)); - gettimeofday(&c->conn_timeout, NULL); + clock_gettime(UTCP_CLOCK, &c->conn_timeout); c->conn_timeout.tv_sec += utcp->timeout; start_retransmit_timer(c); @@ -495,11 +653,11 @@ struct utcp_connection *utcp_connect(struct utcp *utcp, uint16_t dst, utcp_recv_ void utcp_accept(struct utcp_connection *c, utcp_recv_t recv, void *priv) { if(c->reapable || c->state != SYN_RECEIVED) { - debug("Error: accept() called on invalid connection %p in state %s\n", c, strstate[c->state]); + debug(c, "accept() called on invalid connection in state %s\n", c, strstate[c->state]); return; } - debug("%p accepted, %p %p\n", c, recv, priv); + debug(c, "accepted %p %p\n", c, recv, priv); c->recv = recv; c->priv = priv; set_state(c, ESTABLISHED); @@ -507,19 +665,22 @@ void utcp_accept(struct utcp_connection *c, utcp_recv_t recv, void *priv) { static void ack(struct utcp_connection *c, bool sendatleastone) { int32_t left = seqdiff(c->snd.last, c->snd.nxt); - int32_t cwndleft = c->snd.cwnd - seqdiff(c->snd.nxt, c->snd.una); - debug("cwndleft = %d\n", cwndleft); + int32_t cwndleft = is_reliable(c) ? min(c->snd.cwnd, c->snd.wnd) - seqdiff(c->snd.nxt, c->snd.una) : MAX_UNRELIABLE_SIZE; assert(left >= 0); if(cwndleft <= 0) { - cwndleft = 0; - } - - if(cwndleft < left) { + left = 0; + } else if(cwndleft < left) { left = cwndleft; + + if(!sendatleastone || cwndleft > c->utcp->mss) { + left -= left % c->utcp->mss; + } } + debug(c, "cwndleft %d left %d\n", cwndleft, left); + if(!left && !sendatleastone) { return; } @@ -527,23 +688,17 @@ static void ack(struct utcp_connection *c, bool sendatleastone) { struct { struct hdr hdr; uint8_t data[]; - } *pkt; - - pkt = malloc(sizeof(pkt->hdr) + c->utcp->mtu); - - if(!pkt) { - return; - } + } *pkt = c->utcp->pkt; pkt->hdr.src = c->src; pkt->hdr.dst = c->dst; pkt->hdr.ack = c->rcv.nxt; - pkt->hdr.wnd = c->snd.wnd; + pkt->hdr.wnd = is_reliable(c) ? c->rcvbuf.maxsize : 0; pkt->hdr.ctl = ACK; pkt->hdr.aux = 0; do { - uint32_t seglen = left > c->utcp->mtu ? c->utcp->mtu : left; + uint32_t seglen = left > c->utcp->mss ? c->utcp->mss : left; pkt->hdr.seq = c->snd.nxt; buffer_copy(&c->sndbuf, pkt->data, seqdiff(c->snd.nxt, c->snd.una), seglen); @@ -551,6 +706,14 @@ static void ack(struct utcp_connection *c, bool sendatleastone) { c->snd.nxt += seglen; left -= seglen; + if(!is_reliable(c)) { + if(left) { + pkt->hdr.ctl |= MF; + } else { + pkt->hdr.ctl &= ~MF; + } + } + if(seglen && fin_wanted(c, c->snd.nxt)) { seglen--; pkt->hdr.ctl |= FIN; @@ -558,21 +721,23 @@ static void ack(struct utcp_connection *c, bool sendatleastone) { if(!c->rtt_start.tv_sec) { // Start RTT measurement - gettimeofday(&c->rtt_start, NULL); + clock_gettime(UTCP_CLOCK, &c->rtt_start); c->rtt_seq = pkt->hdr.seq + seglen; - debug("Starting RTT measurement, expecting ack %u\n", c->rtt_seq); + debug(c, "starting RTT measurement, expecting ack %u\n", c->rtt_seq); } - print_packet(c->utcp, "send", pkt, sizeof(pkt->hdr) + seglen); + print_packet(c, "send", pkt, sizeof(pkt->hdr) + seglen); c->utcp->send(c->utcp, pkt, sizeof(pkt->hdr) + seglen); - } while(left); - free(pkt); + if(left && !is_reliable(c)) { + pkt->hdr.wnd += seglen; + } + } while(left); } ssize_t utcp_send(struct utcp_connection *c, const void *data, size_t len) { if(c->reapable) { - debug("Error: send() called on closed connection %p\n", c); + debug(c, "send() called on closed connection\n"); errno = EBADF; return -1; } @@ -580,7 +745,7 @@ ssize_t utcp_send(struct utcp_connection *c, const void *data, size_t len) { switch(c->state) { case CLOSED: case LISTEN: - debug("Error: send() called on unconnected connection %p\n", c); + debug(c, "send() called on unconnected connection\n"); errno = ENOTCONN; return -1; @@ -595,7 +760,7 @@ ssize_t utcp_send(struct utcp_connection *c, const void *data, size_t len) { case CLOSING: case LAST_ACK: case TIME_WAIT: - debug("Error: send() called on closing connection %p\n", c); + debug(c, "send() called on closed connection\n"); errno = EPIPE; return -1; } @@ -627,8 +792,15 @@ ssize_t utcp_send(struct utcp_connection *c, const void *data, size_t len) { // Add data to send buffer. - if(is_reliable(c) || (c->state != SYN_SENT && c->state != SYN_RECEIVED)) { + if(is_reliable(c)) { len = buffer_put(&c->sndbuf, data, len); + } else if(c->state != SYN_SENT && c->state != SYN_RECEIVED) { + if(len > MAX_UNRELIABLE_SIZE || buffer_put(&c->sndbuf, data, len) != (ssize_t)len) { + errno = EMSGSIZE; + return -1; + } + } else { + return 0; } if(len <= 0) { @@ -652,15 +824,15 @@ ssize_t utcp_send(struct utcp_connection *c, const void *data, size_t len) { if(!is_reliable(c)) { c->snd.una = c->snd.nxt = c->snd.last; - buffer_get(&c->sndbuf, NULL, c->sndbuf.used); + buffer_discard(&c->sndbuf, c->sndbuf.used); } - if(is_reliable(c) && !timerisset(&c->rtrx_timeout)) { + if(is_reliable(c) && !timespec_isset(&c->rtrx_timeout)) { start_retransmit_timer(c); } - if(is_reliable(c) && !timerisset(&c->conn_timeout)) { - gettimeofday(&c->conn_timeout, NULL); + if(is_reliable(c) && !timespec_isset(&c->conn_timeout)) { + clock_gettime(UTCP_CLOCK, &c->conn_timeout); c->conn_timeout.tv_sec += c->utcp->timeout; } @@ -673,10 +845,9 @@ static void swap_ports(struct hdr *hdr) { hdr->dst = tmp; } -static void retransmit(struct utcp_connection *c) { +static void fast_retransmit(struct utcp_connection *c) { if(c->state == CLOSED || c->snd.last == c->snd.una) { - debug("Retransmit() called but nothing to retransmit!\n"); - stop_retransmit_timer(c); + debug(c, "fast_retransmit() called but nothing to retransmit!\n"); return; } @@ -685,17 +856,61 @@ static void retransmit(struct utcp_connection *c) { struct { struct hdr hdr; uint8_t data[]; - } *pkt; + } *pkt = c->utcp->pkt; - pkt = malloc(sizeof(pkt->hdr) + c->utcp->mtu); + pkt->hdr.src = c->src; + pkt->hdr.dst = c->dst; + pkt->hdr.wnd = c->rcvbuf.maxsize; + pkt->hdr.aux = 0; + + switch(c->state) { + case ESTABLISHED: + case FIN_WAIT_1: + case CLOSE_WAIT: + case CLOSING: + case LAST_ACK: + // Send unacked data again. + pkt->hdr.seq = c->snd.una; + pkt->hdr.ack = c->rcv.nxt; + pkt->hdr.ctl = ACK; + uint32_t len = min(seqdiff(c->snd.last, c->snd.una), utcp->mss); - if(!pkt) { + if(fin_wanted(c, c->snd.una + len)) { + len--; + pkt->hdr.ctl |= FIN; + } + + buffer_copy(&c->sndbuf, pkt->data, 0, len); + print_packet(c, "rtrx", pkt, sizeof(pkt->hdr) + len); + utcp->send(utcp, pkt, sizeof(pkt->hdr) + len); + break; + + default: + break; + } +} + +static void retransmit(struct utcp_connection *c) { + if(c->state == CLOSED || c->snd.last == c->snd.una) { + debug(c, "retransmit() called but nothing to retransmit!\n"); + stop_retransmit_timer(c); return; } + struct utcp *utcp = c->utcp; + + if(utcp->retransmit) { + utcp->retransmit(c); + } + + struct { + struct hdr hdr; + uint8_t data[]; + } *pkt = c->utcp->pkt; + pkt->hdr.src = c->src; pkt->hdr.dst = c->dst; - pkt->hdr.wnd = c->rcv.wnd; + pkt->hdr.wnd = c->rcvbuf.maxsize; pkt->hdr.aux = 0; switch(c->state) { @@ -709,7 +924,7 @@ static void retransmit(struct utcp_connection *c) { pkt->data[1] = 0; pkt->data[2] = 0; pkt->data[3] = c->flags & 0x7; - print_packet(c->utcp, "rtrx", pkt, sizeof(pkt->hdr) + 4); + print_packet(c, "rtrx", pkt, sizeof(pkt->hdr) + 4); utcp->send(utcp, pkt, sizeof(pkt->hdr) + 4); break; @@ -718,7 +933,7 @@ static void retransmit(struct utcp_connection *c) { pkt->hdr.seq = c->snd.nxt; pkt->hdr.ack = c->rcv.nxt; pkt->hdr.ctl = SYN | ACK; - print_packet(c->utcp, "rtrx", pkt, sizeof(pkt->hdr)); + print_packet(c, "rtrx", pkt, sizeof(pkt->hdr)); utcp->send(utcp, pkt, sizeof(pkt->hdr)); break; @@ -731,22 +946,24 @@ static void retransmit(struct utcp_connection *c) { pkt->hdr.seq = c->snd.una; pkt->hdr.ack = c->rcv.nxt; pkt->hdr.ctl = ACK; - uint32_t len = seqdiff(c->snd.last, c->snd.una); - - if(len > utcp->mtu) { - len = utcp->mtu; - } + uint32_t len = min(seqdiff(c->snd.last, c->snd.una), utcp->mss); if(fin_wanted(c, c->snd.una + len)) { len--; pkt->hdr.ctl |= FIN; } - c->snd.nxt = c->snd.una + len; - c->snd.cwnd = utcp->mtu; // reduce cwnd on retransmit + // RFC 5681 slow start after timeout + uint32_t flightsize = seqdiff(c->snd.nxt, c->snd.una); + c->snd.ssthresh = max(flightsize / 2, utcp->mss * 2); // eq. 4 + c->snd.cwnd = utcp->mss; + debug_cwnd(c); + buffer_copy(&c->sndbuf, pkt->data, 0, len); - print_packet(c->utcp, "rtrx", pkt, sizeof(pkt->hdr) + len); + print_packet(c, "rtrx", pkt, sizeof(pkt->hdr) + len); utcp->send(utcp, pkt, sizeof(pkt->hdr) + len); + + c->snd.nxt = c->snd.una + len; break; case CLOSED: @@ -762,16 +979,17 @@ static void retransmit(struct utcp_connection *c) { } start_retransmit_timer(c); - utcp->rto *= 2; + c->rto *= 2; - if(utcp->rto > MAX_RTO) { - utcp->rto = MAX_RTO; + if(c->rto > MAX_RTO) { + c->rto = MAX_RTO; } c->rtt_start.tv_sec = 0; // invalidate RTT timer + c->dupack = 0; // cancel any ongoing fast recovery cleanup: - free(pkt); + return; } /* Update receive buffer and SACK entries after consuming data. @@ -793,15 +1011,15 @@ cleanup: * - the SACK entry is completely before ^, in that case delete it. */ static void sack_consume(struct utcp_connection *c, size_t len) { - debug("sack_consume %lu\n", (unsigned long)len); + debug(c, "sack_consume %lu\n", (unsigned long)len); if(len > c->rcvbuf.used) { - debug("All SACK entries consumed"); + debug(c, "all SACK entries consumed\n"); c->sacks[0].len = 0; return; } - buffer_get(&c->rcvbuf, NULL, len); + buffer_discard(&c->rcvbuf, len); for(int i = 0; i < NSACKS && c->sacks[i].len;) { if(len < c->sacks[i].offset) { @@ -823,46 +1041,52 @@ static void sack_consume(struct utcp_connection *c, size_t len) { } for(int i = 0; i < NSACKS && c->sacks[i].len; i++) { - debug("SACK[%d] offset %u len %u\n", i, c->sacks[i].offset, c->sacks[i].len); + debug(c, "SACK[%d] offset %u len %u\n", i, c->sacks[i].offset, c->sacks[i].len); } } static void handle_out_of_order(struct utcp_connection *c, uint32_t offset, const void *data, size_t len) { - debug("out of order packet, offset %u\n", offset); + debug(c, "out of order packet, offset %u\n", offset); // Packet loss or reordering occured. Store the data in the buffer. ssize_t rxd = buffer_put_at(&c->rcvbuf, offset, data, len); - if(rxd < 0 || (size_t)rxd < len) { - abort(); + if(rxd <= 0) { + debug(c, "packet outside receive buffer, dropping\n"); + return; + } + + if((size_t)rxd < len) { + debug(c, "packet partially outside receive buffer\n"); + len = rxd; } // Make note of where we put it. for(int i = 0; i < NSACKS; i++) { if(!c->sacks[i].len) { // nothing to merge, add new entry - debug("New SACK entry %d\n", i); + debug(c, "new SACK entry %d\n", i); c->sacks[i].offset = offset; c->sacks[i].len = rxd; break; } else if(offset < c->sacks[i].offset) { if(offset + rxd < c->sacks[i].offset) { // insert before if(!c->sacks[NSACKS - 1].len) { // only if room left - debug("Insert SACK entry at %d\n", i); + debug(c, "insert SACK entry at %d\n", i); memmove(&c->sacks[i + 1], &c->sacks[i], (NSACKS - i - 1) * sizeof(c->sacks)[i]); c->sacks[i].offset = offset; c->sacks[i].len = rxd; } else { - debug("SACK entries full, dropping packet\n"); + debug(c, "SACK entries full, dropping packet\n"); } break; } else { // merge - debug("Merge with start of SACK entry at %d\n", i); + debug(c, "merge with start of SACK entry at %d\n", i); c->sacks[i].offset = offset; break; } } else if(offset <= c->sacks[i].offset + c->sacks[i].len) { if(offset + rxd > c->sacks[i].offset + c->sacks[i].len) { // merge - debug("Merge with end of SACK entry at %d\n", i); + debug(c, "merge with end of SACK entry at %d\n", i); c->sacks[i].len = offset + rxd - c->sacks[i].offset; // TODO: handle potential merge with next entry } @@ -872,28 +1096,38 @@ static void handle_out_of_order(struct utcp_connection *c, uint32_t offset, cons } for(int i = 0; i < NSACKS && c->sacks[i].len; i++) { - debug("SACK[%d] offset %u len %u\n", i, c->sacks[i].offset, c->sacks[i].len); + debug(c, "SACK[%d] offset %u len %u\n", i, c->sacks[i].offset, c->sacks[i].len); } } static void handle_in_order(struct utcp_connection *c, const void *data, size_t len) { - // Check if we can process out-of-order data now. - if(c->sacks[0].len && len >= c->sacks[0].offset) { // TODO: handle overlap with second SACK - debug("incoming packet len %lu connected with SACK at %u\n", (unsigned long)len, c->sacks[0].offset); - buffer_put_at(&c->rcvbuf, 0, data, len); // TODO: handle return value - len = max(len, c->sacks[0].offset + c->sacks[0].len); - data = c->rcvbuf.data; - } - if(c->recv) { ssize_t rxd = c->recv(c, data, len); - if(rxd < 0 || (size_t)rxd != len) { + if(rxd != (ssize_t)len) { // TODO: handle the application not accepting all data. abort(); } } + // Check if we can process out-of-order data now. + if(c->sacks[0].len && len >= c->sacks[0].offset) { + debug(c, "incoming packet len %lu connected with SACK at %u\n", (unsigned long)len, c->sacks[0].offset); + + if(len < c->sacks[0].offset + c->sacks[0].len) { + size_t offset = len; + len = c->sacks[0].offset + c->sacks[0].len; + size_t remainder = len - offset; + + ssize_t rxd = buffer_call(c, &c->rcvbuf, offset, remainder); + + if(rxd != (ssize_t)remainder) { + // TODO: handle the application not accepting all data. + abort(); + } + } + } + if(c->rcvbuf.used) { sack_consume(c, len); } @@ -901,20 +1135,54 @@ static void handle_in_order(struct utcp_connection *c, const void *data, size_t c->rcv.nxt += len; } +static void handle_unreliable(struct utcp_connection *c, const struct hdr *hdr, const void *data, size_t len) { + // Fast path for unfragmented packets + if(!hdr->wnd && !(hdr->ctl & MF)) { + if(c->recv) { + c->recv(c, data, len); + } -static void handle_incoming_data(struct utcp_connection *c, uint32_t seq, const void *data, size_t len) { - if(!is_reliable(c)) { - c->recv(c, data, len); - c->rcv.nxt = seq + len; + c->rcv.nxt = hdr->seq + len; return; } - uint32_t offset = seqdiff(seq, c->rcv.nxt); + // Ensure reassembled packet are not larger than 64 kiB + if(hdr->wnd >= MAX_UNRELIABLE_SIZE || hdr->wnd + len > MAX_UNRELIABLE_SIZE) { + return; + } - if(offset + len > c->rcvbuf.maxsize) { - abort(); + // Don't accept out of order fragments + if(hdr->wnd && hdr->seq != c->rcv.nxt) { + return; } + // Reset the receive buffer for the first fragment + if(!hdr->wnd) { + buffer_clear(&c->rcvbuf); + } + + ssize_t rxd = buffer_put_at(&c->rcvbuf, hdr->wnd, data, len); + + if(rxd != (ssize_t)len) { + return; + } + + // Send the packet if it's the final fragment + if(!(hdr->ctl & MF)) { + buffer_call(c, &c->rcvbuf, 0, hdr->wnd + len); + } + + c->rcv.nxt = hdr->seq + len; +} + +static void handle_incoming_data(struct utcp_connection *c, const struct hdr *hdr, const void *data, size_t len) { + if(!is_reliable(c)) { + handle_unreliable(c, hdr, data, len); + return; + } + + uint32_t offset = seqdiff(hdr->seq, c->rcv.nxt); + if(offset) { handle_out_of_order(c, offset, data, len); } else { @@ -940,13 +1208,12 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { return -1; } - print_packet(utcp, "recv", data, len); - // Drop packets smaller than the header struct hdr hdr; if(len < sizeof(hdr)) { + print_packet(NULL, "recv", data, len); errno = EBADMSG; return -1; } @@ -954,12 +1221,21 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { // Make a copy from the potentially unaligned data to a struct hdr memcpy(&hdr, ptr, sizeof(hdr)); + + // Try to match the packet to an existing connection + + struct utcp_connection *c = find_connection(utcp, hdr.dst, hdr.src); + print_packet(c, "recv", data, len); + + // Process the header + ptr += sizeof(hdr); len -= sizeof(hdr); // Drop packets with an unknown CTL flag - if(hdr.ctl & ~(SYN | ACK | RST | FIN)) { + if(hdr.ctl & ~(SYN | ACK | RST | FIN | MF)) { + print_packet(NULL, "recv", data, len); errno = EBADMSG; return -1; } @@ -1013,10 +1289,6 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { bool has_data = len || (hdr.ctl & (SYN | FIN)); - // Try to match the packet to an existing connection - - struct utcp_connection *c = find_connection(utcp, hdr.dst, hdr.src); - // Is it for a new connection? if(!c) { @@ -1055,6 +1327,7 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { c->flags = UTCP_TCP; } +synack: // Return SYN+ACK, go to SYN_RECEIVED state c->snd.wnd = hdr.wnd; c->rcv.irs = hdr.seq; @@ -1070,7 +1343,7 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { pkt.hdr.dst = c->dst; pkt.hdr.ack = c->rcv.irs + 1; pkt.hdr.seq = c->snd.iss; - pkt.hdr.wnd = c->rcv.wnd; + pkt.hdr.wnd = c->rcvbuf.maxsize; pkt.hdr.ctl = SYN | ACK; if(init) { @@ -1079,13 +1352,15 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { pkt.data[1] = 0; pkt.data[2] = 0; pkt.data[3] = c->flags & 0x7; - print_packet(c->utcp, "send", &pkt, sizeof(hdr) + 4); + print_packet(c, "send", &pkt, sizeof(hdr) + 4); utcp->send(utcp, &pkt, sizeof(hdr) + 4); } else { pkt.hdr.aux = 0; - print_packet(c->utcp, "send", &pkt, sizeof(hdr)); + print_packet(c, "send", &pkt, sizeof(hdr)); utcp->send(utcp, &pkt, sizeof(hdr)); } + + start_retransmit_timer(c); } else { // No, we don't want your packets, send a RST back len = 1; @@ -1095,13 +1370,13 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { return 0; } - debug("%p state %s\n", c->utcp, strstate[c->state]); + debug(c, "state %s\n", strstate[c->state]); // In case this is for a CLOSED connection, ignore the packet. // TODO: make it so incoming packets can never match a CLOSED connection. if(c->state == CLOSED) { - debug("Got packet for closed connection\n"); + debug(c, "got packet for closed connection\n"); return 0; } @@ -1157,7 +1432,7 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { } if(!acceptable) { - debug("Packet not acceptable, %u <= %u + %lu < %u\n", c->rcv.nxt, hdr.seq, (unsigned long)len, c->rcv.nxt + c->rcvbuf.maxsize); + debug(c, "packet not acceptable, %u <= %u + %lu < %u\n", c->rcv.nxt, hdr.seq, (unsigned long)len, c->rcv.nxt + c->rcvbuf.maxsize); // Ignore unacceptable RST packets. if(hdr.ctl & RST) { @@ -1167,6 +1442,15 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { // Otherwise, continue processing. len = 0; } + } else { +#if UTCP_DEBUG + int32_t rcv_offset = seqdiff(hdr.seq, c->rcv.nxt); + + if(rcv_offset) { + debug(c, "packet out of order, offset %u bytes", rcv_offset); + } + +#endif } c->snd.wnd = hdr.wnd; // TODO: move below @@ -1182,7 +1466,7 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { } if(hdr.ctl & ACK && (seqdiff(hdr.ack, c->snd.last) > 0 || seqdiff(hdr.ack, c->snd.una) < 0)) { - debug("Packet ack seqno out of range, %u <= %u < %u\n", c->snd.una, hdr.ack, c->snd.una + c->sndbuf.used); + debug(c, "packet ack seqno out of range, %u <= %u < %u\n", c->snd.una, hdr.ack, c->snd.una + c->sndbuf.used); // Ignore unacceptable RST packets. if(hdr.ctl & RST) { @@ -1287,13 +1571,13 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { // RTT measurement if(c->rtt_start.tv_sec) { if(c->rtt_seq == hdr.ack) { - struct timeval now, diff; - gettimeofday(&now, NULL); - timersub(&now, &c->rtt_start, &diff); - update_rtt(c, diff.tv_sec * 1000000 + diff.tv_usec); + struct timespec now; + clock_gettime(UTCP_CLOCK, &now); + int32_t diff = timespec_diff_usec(&now, &c->rtt_start); + update_rtt(c, diff); c->rtt_start.tv_sec = 0; } else if(c->rtt_seq < hdr.ack) { - debug("Cancelling RTT measurement: %u < %u\n", c->rtt_seq, hdr.ack); + debug(c, "cancelling RTT measurement: %u < %u\n", c->rtt_seq, hdr.ack); c->rtt_start.tv_sec = 0; } } @@ -1313,11 +1597,17 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { assert(data_acked >= 0); +#ifndef NDEBUG int32_t bufused = seqdiff(c->snd.last, c->snd.una); assert(data_acked <= bufused); +#endif if(data_acked) { - buffer_get(&c->sndbuf, NULL, data_acked); + buffer_discard(&c->sndbuf, data_acked); + + if(is_reliable(c)) { + c->do_poll = true; + } } // Also advance snd.nxt if possible @@ -1327,13 +1617,28 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { c->snd.una = hdr.ack; - c->dupack = 0; - c->snd.cwnd += utcp->mtu; + if(c->dupack) { + if(c->dupack >= 3) { + debug(c, "fast recovery ended\n"); + c->snd.cwnd = c->snd.ssthresh; + } + + c->dupack = 0; + } + + // Increase the congestion window according to RFC 5681 + if(c->snd.cwnd < c->snd.ssthresh) { + c->snd.cwnd += min(advanced, utcp->mss); // eq. 2 + } else { + c->snd.cwnd += max(1, (utcp->mss * utcp->mss) / c->snd.cwnd); // eq. 3 + } if(c->snd.cwnd > c->sndbuf.maxsize) { c->snd.cwnd = c->sndbuf.maxsize; } + debug_cwnd(c); + // Check if we have sent a FIN that is now ACKed. switch(c->state) { case FIN_WAIT_1: @@ -1345,8 +1650,8 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { case CLOSING: if(c->snd.una == c->snd.last) { - gettimeofday(&c->conn_timeout, NULL); - c->conn_timeout.tv_sec += 60; + clock_gettime(UTCP_CLOCK, &c->conn_timeout); + c->conn_timeout.tv_sec += utcp->timeout; set_state(c, TIME_WAIT); } @@ -1356,18 +1661,38 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { break; } } else { - if(!len && is_reliable(c)) { + if(!len && is_reliable(c) && c->snd.una != c->snd.last) { c->dupack++; + debug(c, "duplicate ACK %d\n", c->dupack); if(c->dupack == 3) { - debug("Triplicate ACK\n"); - //TODO: Resend one packet and go to fast recovery mode. See RFC 6582. - //We do a very simple variant here; reset the nxt pointer to the last acknowledged packet from the peer. - //Reset the congestion window so we wait for ACKs. - c->snd.nxt = c->snd.una; - c->snd.cwnd = utcp->mtu; - start_retransmit_timer(c); + // RFC 5681 fast recovery + debug(c, "fast recovery started\n", c->dupack); + uint32_t flightsize = seqdiff(c->snd.nxt, c->snd.una); + c->snd.ssthresh = max(flightsize / 2, utcp->mss * 2); // eq. 4 + c->snd.cwnd = min(c->snd.ssthresh + 3 * utcp->mss, c->sndbuf.maxsize); + + if(c->snd.cwnd > c->sndbuf.maxsize) { + c->snd.cwnd = c->sndbuf.maxsize; + } + + debug_cwnd(c); + + fast_retransmit(c); + } else if(c->dupack > 3) { + c->snd.cwnd += utcp->mss; + + if(c->snd.cwnd > c->sndbuf.maxsize) { + c->snd.cwnd = c->sndbuf.maxsize; + } + + debug_cwnd(c); } + + // We got an ACK which indicates the other side did get one of our packets. + // Reset the retransmission timer to avoid going to slow start, + // but don't touch the connection timeout. + start_retransmit_timer(c); } } @@ -1376,10 +1701,10 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { if(advanced) { if(c->snd.una == c->snd.last) { stop_retransmit_timer(c); - timerclear(&c->conn_timeout); + timespec_clear(&c->conn_timeout); } else if(is_reliable(c)) { start_retransmit_timer(c); - gettimeofday(&c->conn_timeout, NULL); + clock_gettime(UTCP_CLOCK, &c->conn_timeout); c->conn_timeout.tv_sec += utcp->timeout; } } @@ -1397,7 +1722,7 @@ skip_ack: } c->rcv.irs = hdr.seq; - c->rcv.nxt = hdr.seq; + c->rcv.nxt = hdr.seq + 1; if(c->shut_wr) { c->snd.last++; @@ -1406,10 +1731,12 @@ skip_ack: set_state(c, ESTABLISHED); } - // TODO: notify application of this somehow. break; case SYN_RECEIVED: + // This is a retransmit of a SYN, send back the SYNACK. + goto synack; + case ESTABLISHED: case FIN_WAIT_1: case FIN_WAIT_2: @@ -1417,8 +1744,8 @@ skip_ack: case CLOSING: case LAST_ACK: case TIME_WAIT: - // Ehm, no. We should never receive a second SYN. - return 0; + // This could be a retransmission. Ignore the SYN flag, but send an ACK back. + break; default: #ifdef UTCP_DEBUG @@ -1426,9 +1753,6 @@ skip_ack: #endif return 0; } - - // SYN counts as one sequence number - c->rcv.nxt++; } // 6. Process new data @@ -1480,7 +1804,7 @@ skip_ack: return 0; } - handle_incoming_data(c, hdr.seq, ptr, len); + handle_incoming_data(c, &hdr, ptr, len); } // 7. Process FIN stuff @@ -1504,8 +1828,8 @@ skip_ack: break; case FIN_WAIT_2: - gettimeofday(&c->conn_timeout, NULL); - c->conn_timeout.tv_sec += 60; + clock_gettime(UTCP_CLOCK, &c->conn_timeout); + c->conn_timeout.tv_sec += utcp->timeout; set_state(c, TIME_WAIT); break; @@ -1560,14 +1884,14 @@ reset: hdr.ctl = RST | ACK; } - print_packet(utcp, "send", &hdr, sizeof(hdr)); + print_packet(c, "send", &hdr, sizeof(hdr)); utcp->send(utcp, &hdr, sizeof(hdr)); return 0; } int utcp_shutdown(struct utcp_connection *c, int dir) { - debug("%p shutdown %d at %u\n", c ? c->utcp : NULL, dir, c ? c->snd.last : 0); + debug(c, "shutdown %d at %u\n", dir, c ? c->snd.last : 0); if(!c) { errno = EFAULT; @@ -1575,7 +1899,7 @@ int utcp_shutdown(struct utcp_connection *c, int dir) { } if(c->reapable) { - debug("Error: shutdown() called on closed connection %p\n", c); + debug(c, "shutdown() called on closed connection\n"); errno = EBADF; return -1; } @@ -1635,7 +1959,7 @@ int utcp_shutdown(struct utcp_connection *c, int dir) { ack(c, false); - if(!timerisset(&c->rtrx_timeout)) { + if(!timespec_isset(&c->rtrx_timeout)) { start_retransmit_timer(c); } @@ -1649,7 +1973,7 @@ static bool reset_connection(struct utcp_connection *c) { } if(c->reapable) { - debug("Error: abort() called on closed connection %p\n", c); + debug(c, "abort() called on closed connection\n"); errno = EBADF; return false; } @@ -1689,7 +2013,7 @@ static bool reset_connection(struct utcp_connection *c) { hdr.wnd = 0; hdr.ctl = RST; - print_packet(c->utcp, "send", &hdr, sizeof(hdr)); + print_packet(c, "send", &hdr, sizeof(hdr)); c->utcp->send(c->utcp, &hdr, sizeof(hdr)); return true; } @@ -1753,10 +2077,10 @@ int utcp_abort(struct utcp_connection *c) { * The return value is the time to the next timeout in milliseconds, * or maybe a negative value if the timeout is infinite. */ -struct timeval utcp_timeout(struct utcp *utcp) { - struct timeval now; - gettimeofday(&now, NULL); - struct timeval next = {now.tv_sec + 3600, now.tv_usec}; +struct timespec utcp_timeout(struct utcp *utcp) { + struct timespec now; + clock_gettime(UTCP_CLOCK, &now); + struct timespec next = {now.tv_sec + 3600, now.tv_nsec}; for(int i = 0; i < utcp->nconnections; i++) { struct utcp_connection *c = utcp->connections[i]; @@ -1768,7 +2092,7 @@ struct timeval utcp_timeout(struct utcp *utcp) { // delete connections that have been utcp_close()d. if(c->state == CLOSED) { if(c->reapable) { - debug("Reaping %p\n", c); + debug(c, "reaping\n"); free_connection(c); i--; } @@ -1776,7 +2100,7 @@ struct timeval utcp_timeout(struct utcp *utcp) { continue; } - if(timerisset(&c->conn_timeout) && timercmp(&c->conn_timeout, &now, <)) { + if(timespec_isset(&c->conn_timeout) && timespec_lt(&c->conn_timeout, &now)) { errno = ETIMEDOUT; c->state = CLOSED; @@ -1791,14 +2115,15 @@ struct timeval utcp_timeout(struct utcp *utcp) { continue; } - if(timerisset(&c->rtrx_timeout) && timercmp(&c->rtrx_timeout, &now, <)) { - debug("retransmit()\n"); + if(timespec_isset(&c->rtrx_timeout) && timespec_lt(&c->rtrx_timeout, &now)) { + debug(c, "retransmitting after timeout\n"); retransmit(c); } if(c->poll) { - if((c->state == ESTABLISHED || c->state == CLOSE_WAIT)) { - uint32_t len = buffer_free(&c->sndbuf); + if((c->state == ESTABLISHED || c->state == CLOSE_WAIT) && c->do_poll) { + c->do_poll = false; + uint32_t len = buffer_free(&c->sndbuf); if(len) { c->poll(c, len); @@ -1808,18 +2133,18 @@ struct timeval utcp_timeout(struct utcp *utcp) { } } - if(timerisset(&c->conn_timeout) && timercmp(&c->conn_timeout, &next, <)) { + if(timespec_isset(&c->conn_timeout) && timespec_lt(&c->conn_timeout, &next)) { next = c->conn_timeout; } - if(timerisset(&c->rtrx_timeout) && timercmp(&c->rtrx_timeout, &next, <)) { + if(timespec_isset(&c->rtrx_timeout) && timespec_lt(&c->rtrx_timeout, &next)) { next = c->rtrx_timeout; } } - struct timeval diff; + struct timespec diff; - timersub(&next, &now, &diff); + timespec_sub(&next, &now, &diff); return diff; } @@ -1849,13 +2174,24 @@ struct utcp *utcp_init(utcp_accept_t accept, utcp_pre_accept_t pre_accept, utcp_ return NULL; } + utcp_set_mtu(utcp, DEFAULT_MTU); + + if(!utcp->pkt) { + free(utcp); + return NULL; + } + + if(!CLOCK_GRANULARITY) { + struct timespec res; + clock_getres(UTCP_CLOCK, &res); + CLOCK_GRANULARITY = res.tv_sec * USEC_PER_SEC + res.tv_nsec / 1000; + } + utcp->accept = accept; utcp->pre_accept = pre_accept; utcp->send = send; utcp->priv = priv; - utcp->mtu = DEFAULT_MTU; utcp->timeout = DEFAULT_USER_TIMEOUT; // sec - utcp->rto = START_RTO; // usec return utcp; } @@ -1884,6 +2220,7 @@ void utcp_exit(struct utcp *utcp) { } free(utcp->connections); + free(utcp->pkt); free(utcp); } @@ -1891,11 +2228,31 @@ uint16_t utcp_get_mtu(struct utcp *utcp) { return utcp ? utcp->mtu : 0; } +uint16_t utcp_get_mss(struct utcp *utcp) { + return utcp ? utcp->mss : 0; +} + void utcp_set_mtu(struct utcp *utcp, uint16_t mtu) { - // TODO: handle overhead of the header - if(utcp) { - utcp->mtu = mtu; + if(!utcp) { + return; + } + + if(mtu <= sizeof(struct hdr)) { + return; } + + if(mtu > utcp->mtu) { + char *new = realloc(utcp->pkt, mtu + sizeof(struct hdr)); + + if(!new) { + return; + } + + utcp->pkt = new; + } + + utcp->mtu = mtu; + utcp->mss = mtu - sizeof(struct hdr); } void utcp_reset_timers(struct utcp *utcp) { @@ -1903,9 +2260,9 @@ void utcp_reset_timers(struct utcp *utcp) { return; } - struct timeval now, then; + struct timespec now, then; - gettimeofday(&now, NULL); + clock_gettime(UTCP_CLOCK, &now); then = now; @@ -1918,13 +2275,19 @@ void utcp_reset_timers(struct utcp *utcp) { continue; } - c->rtrx_timeout = now; - c->conn_timeout = then; + if(timespec_isset(&c->rtrx_timeout)) { + c->rtrx_timeout = now; + } + + if(timespec_isset(&c->conn_timeout)) { + c->conn_timeout = then; + } + c->rtt_start.tv_sec = 0; - } - if(utcp->rto > START_RTO) { - utcp->rto = START_RTO; + if(c->rto > START_RTO) { + c->rto = START_RTO; + } } } @@ -1969,6 +2332,8 @@ void utcp_set_sndbuf(struct utcp_connection *c, size_t size) { if(c->sndbuf.maxsize != size) { c->sndbuf.maxsize = -1; } + + c->do_poll = is_reliable(c) && buffer_free(&c->sndbuf); } size_t utcp_get_rcvbuf(struct utcp_connection *c) { @@ -2036,6 +2401,7 @@ void utcp_set_recv_cb(struct utcp_connection *c, utcp_recv_t recv) { void utcp_set_poll_cb(struct utcp_connection *c, utcp_poll_t poll) { if(c) { c->poll = poll; + c->do_poll = is_reliable(c) && buffer_free(&c->sndbuf); } } @@ -2057,36 +2423,49 @@ void utcp_expect_data(struct utcp_connection *c, bool expect) { if(expect) { // If we expect data, start the connection timer. - if(!timerisset(&c->conn_timeout)) { - gettimeofday(&c->conn_timeout, NULL); + if(!timespec_isset(&c->conn_timeout)) { + clock_gettime(UTCP_CLOCK, &c->conn_timeout); c->conn_timeout.tv_sec += c->utcp->timeout; } } else { // If we want to cancel expecting data, only clear the timer when there is no unACKed data. if(c->snd.una == c->snd.last) { - timerclear(&c->conn_timeout); + timespec_clear(&c->conn_timeout); } } } void utcp_offline(struct utcp *utcp, bool offline) { + struct timespec now; + clock_gettime(UTCP_CLOCK, &now); + for(int i = 0; i < utcp->nconnections; i++) { struct utcp_connection *c = utcp->connections[i]; - if(!c->reapable) { - utcp_expect_data(c, offline); - - // If we are online again, reset the retransmission timers, but keep the connection timeout as it is, - // to prevent peers toggling online/offline state frequently from keeping connections alive - // if there is no progress in sending actual data. - if(!offline) { - gettimeofday(&utcp->connections[i]->rtrx_timeout, NULL); - utcp->connections[i]->rtt_start.tv_sec = 0; + if(c->reapable) { + continue; + } + + utcp_expect_data(c, offline); + + if(!offline) { + if(timespec_isset(&c->rtrx_timeout)) { + c->rtrx_timeout = now; + } + + utcp->connections[i]->rtt_start.tv_sec = 0; + + if(c->rto > START_RTO) { + c->rto = START_RTO; } } } +} - if(!offline && utcp->rto > START_RTO) { - utcp->rto = START_RTO; - } +void utcp_set_retransmit_cb(struct utcp *utcp, utcp_retransmit_t retransmit) { + utcp->retransmit = retransmit; +} + +void utcp_set_clock_granularity(long granularity) { + CLOCK_GRANULARITY = granularity; }