X-Git-Url: http://git.meshlink.io/?p=utcp;a=blobdiff_plain;f=utcp.c;h=04d2b194c20f193aa44f2d4006b9afc1bd356b29;hp=ac598ddae28667fd9a40c28ace29255b7dd22f66;hb=HEAD;hpb=83f65f2eee2d5c7d58b27021770926d53f0f193e diff --git a/utcp.c b/utcp.c index ac598dd..04d2b19 100644 --- a/utcp.c +++ b/utcp.c @@ -27,8 +27,6 @@ #include #include #include -#include -#include #include #include "utcp_priv.h" @@ -45,16 +43,46 @@ #undef poll #endif -#ifndef timersub -#define timersub(a, b, r)\ - do {\ - (r)->tv_sec = (a)->tv_sec - (b)->tv_sec;\ - (r)->tv_usec = (a)->tv_usec - (b)->tv_usec;\ - if((r)->tv_usec < 0)\ - (r)->tv_sec--, (r)->tv_usec += USEC_PER_SEC;\ - } while (0) +#ifndef UTCP_CLOCK +#if defined(CLOCK_MONOTONIC_RAW) && defined(__x86_64__) +#define UTCP_CLOCK CLOCK_MONOTONIC_RAW +#else +#define UTCP_CLOCK CLOCK_MONOTONIC +#endif #endif +static void timespec_sub(const struct timespec *a, const struct timespec *b, struct timespec *r) { + r->tv_sec = a->tv_sec - b->tv_sec; + r->tv_nsec = a->tv_nsec - b->tv_nsec; + + if(r->tv_nsec < 0) { + r->tv_sec--, r->tv_nsec += NSEC_PER_SEC; + } +} + +static int32_t timespec_diff_usec(const struct timespec *a, const struct timespec *b) { + return (a->tv_sec - b->tv_sec) * 1000000 + (a->tv_nsec - b->tv_nsec) / 1000; +} + +static bool timespec_lt(const struct timespec *a, const struct timespec *b) { + if(a->tv_sec == b->tv_sec) { + return a->tv_nsec < b->tv_nsec; + } else { + return a->tv_sec < b->tv_sec; + } +} + +static void timespec_clear(struct timespec *a) { + a->tv_sec = 0; + a->tv_nsec = 0; +} + +static bool timespec_isset(const struct timespec *a) { + return a->tv_sec; +} + +static long CLOCK_GRANULARITY; // usec + static inline size_t min(size_t a, size_t b) { return a < b ? a : b; } @@ -117,12 +145,13 @@ static void print_packet(struct utcp_connection *c, const char *dir, const void *p = 0; - debug(c, "%s: len %lu src %u dst %u seq %u ack %u wnd %u aux %x ctl %s%s%s%s data %s\n", + debug(c, "%s: len %lu src %u dst %u seq %u ack %u wnd %u aux %x ctl %s%s%s%s%s data %s\n", dir, (unsigned long)len, hdr.src, hdr.dst, hdr.seq, hdr.ack, hdr.wnd, hdr.aux, hdr.ctl & SYN ? "SYN" : "", hdr.ctl & RST ? "RST" : "", hdr.ctl & FIN ? "FIN" : "", hdr.ctl & ACK ? "ACK" : "", + hdr.ctl & MF ? "MF" : "", str ); } @@ -140,7 +169,7 @@ static void set_state(struct utcp_connection *c, enum state state) { c->state = state; if(state == ESTABLISHED) { - timerclear(&c->conn_timeout); + timespec_clear(&c->conn_timeout); } debug(c, "state %s\n", strstate[state]); @@ -171,12 +200,40 @@ static int32_t seqdiff(uint32_t a, uint32_t b) { } // Buffer functions -// TODO: convert to ringbuffers to avoid memmove() operations. +static bool buffer_wraps(struct buffer *buf) { + return buf->size - buf->offset < buf->used; +} + +static bool buffer_resize(struct buffer *buf, uint32_t newsize) { + char *newdata = realloc(buf->data, newsize); + + if(!newdata) { + return false; + } + + buf->data = newdata; + + if(buffer_wraps(buf)) { + // Shift the right part of the buffer until it hits the end of the new buffer. + // Old situation: + // [345......012] + // New situation: + // [345.........|........012] + uint32_t tailsize = buf->size - buf->offset; + uint32_t newoffset = newsize - tailsize; + memmove(buf->data + newoffset, buf->data + buf->offset, tailsize); + buf->offset = newoffset; + } + + buf->size = newsize; + return true; +} // Store data into the buffer static ssize_t buffer_put_at(struct buffer *buf, size_t offset, const void *data, size_t len) { debug(NULL, "buffer_put_at %lu %lu %lu\n", (unsigned long)buf->used, (unsigned long)offset, (unsigned long)len); + // Ensure we don't store more than maxsize bytes in total size_t required = offset + len; if(required > buf->maxsize) { @@ -188,32 +245,41 @@ static ssize_t buffer_put_at(struct buffer *buf, size_t offset, const void *data required = buf->maxsize; } + // Check if we need to resize the buffer if(required > buf->size) { size_t newsize = buf->size; if(!newsize) { - newsize = required; - } else { - do { - newsize *= 2; - } while(newsize < required); + newsize = 4096; } + do { + newsize *= 2; + } while(newsize < required); + if(newsize > buf->maxsize) { newsize = buf->maxsize; } - char *newdata = realloc(buf->data, newsize); - - if(!newdata) { + if(!buffer_resize(buf, newsize)) { return -1; } + } - buf->data = newdata; - buf->size = newsize; + uint32_t realoffset = buf->offset + offset; + + if(buf->size - buf->offset <= offset) { + // The offset wrapped + realoffset -= buf->size; } - memcpy(buf->data + offset, data, len); + if(buf->size - realoffset < len) { + // The new chunk of data must be wrapped + memcpy(buf->data + realoffset, data, buf->size - realoffset); + memcpy(buf->data, (char *)data + buf->size - realoffset, len - (buf->size - realoffset)); + } else { + memcpy(buf->data + realoffset, data, len); + } if(required > buf->used) { buf->used = required; @@ -226,52 +292,116 @@ static ssize_t buffer_put(struct buffer *buf, const void *data, size_t len) { return buffer_put_at(buf, buf->used, data, len); } -// Get data from the buffer. data can be NULL. -static ssize_t buffer_get(struct buffer *buf, void *data, size_t len) { - if(len > buf->used) { - len = buf->used; +// Copy data from the buffer without removing it. +static ssize_t buffer_copy(struct buffer *buf, void *data, size_t offset, size_t len) { + // Ensure we don't copy more than is actually stored in the buffer + if(offset >= buf->used) { + return 0; } - if(data) { - memcpy(data, buf->data, len); + if(buf->used - offset < len) { + len = buf->used - offset; } - if(len < buf->used) { - memmove(buf->data, buf->data + len, buf->used - len); + uint32_t realoffset = buf->offset + offset; + + if(buf->size - buf->offset <= offset) { + // The offset wrapped + realoffset -= buf->size; + } + + if(buf->size - realoffset < len) { + // The data is wrapped + memcpy(data, buf->data + realoffset, buf->size - realoffset); + memcpy((char *)data + buf->size - realoffset, buf->data, len - (buf->size - realoffset)); + } else { + memcpy(data, buf->data + realoffset, len); } - buf->used -= len; return len; } // Copy data from the buffer without removing it. -static ssize_t buffer_copy(struct buffer *buf, void *data, size_t offset, size_t len) { +static ssize_t buffer_call(struct utcp_connection *c, struct buffer *buf, size_t offset, size_t len) { + if(!c->recv) { + return len; + } + + // Ensure we don't copy more than is actually stored in the buffer if(offset >= buf->used) { return 0; } - if(offset + len > buf->used) { + if(buf->used - offset < len) { len = buf->used - offset; } - memcpy(data, buf->data + offset, len); - return len; -} + uint32_t realoffset = buf->offset + offset; -static bool buffer_init(struct buffer *buf, uint32_t len, uint32_t maxlen) { - memset(buf, 0, sizeof(*buf)); + if(buf->size - buf->offset <= offset) { + // The offset wrapped + realoffset -= buf->size; + } - if(len) { - buf->data = malloc(len); + if(buf->size - realoffset < len) { + // The data is wrapped + ssize_t rx1 = c->recv(c, buf->data + realoffset, buf->size - realoffset); - if(!buf->data) { - return false; + if(rx1 < buf->size - realoffset) { + return rx1; } + + // The channel might have been closed by the previous callback + if(!c->recv) { + return len; + } + + ssize_t rx2 = c->recv(c, buf->data, len - (buf->size - realoffset)); + + if(rx2 < 0) { + return rx2; + } else { + return rx1 + rx2; + } + } else { + return c->recv(c, buf->data + realoffset, len); } +} - buf->size = len; - buf->maxsize = maxlen; - return true; +// Discard data from the buffer. +static ssize_t buffer_discard(struct buffer *buf, size_t len) { + if(buf->used < len) { + len = buf->used; + } + + if(buf->size - buf->offset <= len) { + buf->offset -= buf->size; + } + + if(buf->used == len) { + buf->offset = 0; + } else { + buf->offset += len; + } + + buf->used -= len; + + return len; +} + +static void buffer_clear(struct buffer *buf) { + buf->used = 0; + buf->offset = 0; +} + +static bool buffer_set_size(struct buffer *buf, uint32_t minsize, uint32_t maxsize) { + if(maxsize < minsize) { + maxsize = minsize; + } + + buf->maxsize = maxsize; + + return buf->size >= minsize || buffer_resize(buf, minsize); } static void buffer_exit(struct buffer *buf) { @@ -280,7 +410,7 @@ static void buffer_exit(struct buffer *buf) { } static uint32_t buffer_free(const struct buffer *buf) { - return buf->maxsize - buf->used; + return buf->maxsize > buf->used ? buf->maxsize - buf->used : 0; } // Connections are stored in a sorted list. @@ -378,12 +508,12 @@ static struct utcp_connection *allocate_connection(struct utcp *utcp, uint16_t s return NULL; } - if(!buffer_init(&c->sndbuf, DEFAULT_SNDBUFSIZE, DEFAULT_MAXSNDBUFSIZE)) { + if(!buffer_set_size(&c->sndbuf, DEFAULT_SNDBUFSIZE, DEFAULT_MAXSNDBUFSIZE)) { free(c); return NULL; } - if(!buffer_init(&c->rcvbuf, DEFAULT_RCVBUFSIZE, DEFAULT_MAXRCVBUFSIZE)) { + if(!buffer_set_size(&c->rcvbuf, DEFAULT_RCVBUFSIZE, DEFAULT_MAXRCVBUFSIZE)) { buffer_exit(&c->sndbuf); free(c); return NULL; @@ -401,9 +531,12 @@ static struct utcp_connection *allocate_connection(struct utcp *utcp, uint16_t s c->snd.una = c->snd.iss; c->snd.nxt = c->snd.iss + 1; c->snd.last = c->snd.nxt; - c->snd.cwnd = (utcp->mtu > 2190 ? 2 : utcp->mtu > 1095 ? 3 : 4) * utcp->mtu; + c->snd.cwnd = (utcp->mss > 2190 ? 2 : utcp->mss > 1095 ? 3 : 4) * utcp->mss; c->snd.ssthresh = ~0; debug_cwnd(c); + c->srtt = 0; + c->rttvar = 0; + c->rto = START_RTO; c->utcp = utcp; // Add it to the sorted list of connections @@ -429,39 +562,45 @@ static void update_rtt(struct utcp_connection *c, uint32_t rtt) { return; } - struct utcp *utcp = c->utcp; - - if(!utcp->srtt) { - utcp->srtt = rtt; - utcp->rttvar = rtt / 2; + if(!c->srtt) { + c->srtt = rtt; + c->rttvar = rtt / 2; } else { - utcp->rttvar = (utcp->rttvar * 3 + absdiff(utcp->srtt, rtt)) / 4; - utcp->srtt = (utcp->srtt * 7 + rtt) / 8; + c->rttvar = (c->rttvar * 3 + absdiff(c->srtt, rtt)) / 4; + c->srtt = (c->srtt * 7 + rtt) / 8; } - utcp->rto = utcp->srtt + max(4 * utcp->rttvar, CLOCK_GRANULARITY); + c->rto = c->srtt + max(4 * c->rttvar, CLOCK_GRANULARITY); - if(utcp->rto > MAX_RTO) { - utcp->rto = MAX_RTO; + if(c->rto > MAX_RTO) { + c->rto = MAX_RTO; } - debug(c, "rtt %u srtt %u rttvar %u rto %u\n", rtt, utcp->srtt, utcp->rttvar, utcp->rto); + debug(c, "rtt %u srtt %u rttvar %u rto %u\n", rtt, c->srtt, c->rttvar, c->rto); } static void start_retransmit_timer(struct utcp_connection *c) { - gettimeofday(&c->rtrx_timeout, NULL); - c->rtrx_timeout.tv_usec += c->utcp->rto; + clock_gettime(UTCP_CLOCK, &c->rtrx_timeout); + + uint32_t rto = c->rto; + + while(rto > USEC_PER_SEC) { + c->rtrx_timeout.tv_sec++; + rto -= USEC_PER_SEC; + } + + c->rtrx_timeout.tv_nsec += rto * 1000; - while(c->rtrx_timeout.tv_usec >= 1000000) { - c->rtrx_timeout.tv_usec -= 1000000; + if(c->rtrx_timeout.tv_nsec >= NSEC_PER_SEC) { + c->rtrx_timeout.tv_nsec -= NSEC_PER_SEC; c->rtrx_timeout.tv_sec++; } - debug(c, "rtrx_timeout %ld.%06lu\n", c->rtrx_timeout.tv_sec, c->rtrx_timeout.tv_usec); + debug(c, "rtrx_timeout %ld.%06lu\n", c->rtrx_timeout.tv_sec, c->rtrx_timeout.tv_nsec); } static void stop_retransmit_timer(struct utcp_connection *c) { - timerclear(&c->rtrx_timeout); + timespec_clear(&c->rtrx_timeout); debug(c, "rtrx_timeout cleared\n"); } @@ -500,7 +639,7 @@ struct utcp_connection *utcp_connect_ex(struct utcp *utcp, uint16_t dst, utcp_re print_packet(c, "send", &pkt, sizeof(pkt)); utcp->send(utcp, &pkt, sizeof(pkt)); - gettimeofday(&c->conn_timeout, NULL); + clock_gettime(UTCP_CLOCK, &c->conn_timeout); c->conn_timeout.tv_sec += utcp->timeout; start_retransmit_timer(c); @@ -526,7 +665,7 @@ void utcp_accept(struct utcp_connection *c, utcp_recv_t recv, void *priv) { static void ack(struct utcp_connection *c, bool sendatleastone) { int32_t left = seqdiff(c->snd.last, c->snd.nxt); - int32_t cwndleft = min(c->snd.cwnd, c->snd.wnd) - seqdiff(c->snd.nxt, c->snd.una); + int32_t cwndleft = is_reliable(c) ? min(c->snd.cwnd, c->snd.wnd) - seqdiff(c->snd.nxt, c->snd.una) : MAX_UNRELIABLE_SIZE; assert(left >= 0); @@ -535,8 +674,8 @@ static void ack(struct utcp_connection *c, bool sendatleastone) { } else if(cwndleft < left) { left = cwndleft; - if(!sendatleastone || cwndleft > c->utcp->mtu) { - left -= left % c->utcp->mtu; + if(!sendatleastone || cwndleft > c->utcp->mss) { + left -= left % c->utcp->mss; } } @@ -549,23 +688,17 @@ static void ack(struct utcp_connection *c, bool sendatleastone) { struct { struct hdr hdr; uint8_t data[]; - } *pkt; - - pkt = malloc(sizeof(pkt->hdr) + c->utcp->mtu); - - if(!pkt) { - return; - } + } *pkt = c->utcp->pkt; pkt->hdr.src = c->src; pkt->hdr.dst = c->dst; pkt->hdr.ack = c->rcv.nxt; - pkt->hdr.wnd = c->rcvbuf.maxsize; + pkt->hdr.wnd = is_reliable(c) ? c->rcvbuf.maxsize : 0; pkt->hdr.ctl = ACK; pkt->hdr.aux = 0; do { - uint32_t seglen = left > c->utcp->mtu ? c->utcp->mtu : left; + uint32_t seglen = left > c->utcp->mss ? c->utcp->mss : left; pkt->hdr.seq = c->snd.nxt; buffer_copy(&c->sndbuf, pkt->data, seqdiff(c->snd.nxt, c->snd.una), seglen); @@ -573,6 +706,14 @@ static void ack(struct utcp_connection *c, bool sendatleastone) { c->snd.nxt += seglen; left -= seglen; + if(!is_reliable(c)) { + if(left) { + pkt->hdr.ctl |= MF; + } else { + pkt->hdr.ctl &= ~MF; + } + } + if(seglen && fin_wanted(c, c->snd.nxt)) { seglen--; pkt->hdr.ctl |= FIN; @@ -580,16 +721,18 @@ static void ack(struct utcp_connection *c, bool sendatleastone) { if(!c->rtt_start.tv_sec) { // Start RTT measurement - gettimeofday(&c->rtt_start, NULL); + clock_gettime(UTCP_CLOCK, &c->rtt_start); c->rtt_seq = pkt->hdr.seq + seglen; debug(c, "starting RTT measurement, expecting ack %u\n", c->rtt_seq); } print_packet(c, "send", pkt, sizeof(pkt->hdr) + seglen); c->utcp->send(c->utcp, pkt, sizeof(pkt->hdr) + seglen); - } while(left); - free(pkt); + if(left && !is_reliable(c)) { + pkt->hdr.wnd += seglen; + } + } while(left); } ssize_t utcp_send(struct utcp_connection *c, const void *data, size_t len) { @@ -649,8 +792,13 @@ ssize_t utcp_send(struct utcp_connection *c, const void *data, size_t len) { // Add data to send buffer. - if(is_reliable(c) || (c->state != SYN_SENT && c->state != SYN_RECEIVED)) { + if(is_reliable(c)) { len = buffer_put(&c->sndbuf, data, len); + } else if(c->state != SYN_SENT && c->state != SYN_RECEIVED) { + if(len > MAX_UNRELIABLE_SIZE || buffer_put(&c->sndbuf, data, len) != (ssize_t)len) { + errno = EMSGSIZE; + return -1; + } } else { return 0; } @@ -676,15 +824,15 @@ ssize_t utcp_send(struct utcp_connection *c, const void *data, size_t len) { if(!is_reliable(c)) { c->snd.una = c->snd.nxt = c->snd.last; - buffer_get(&c->sndbuf, NULL, c->sndbuf.used); + buffer_discard(&c->sndbuf, c->sndbuf.used); } - if(is_reliable(c) && !timerisset(&c->rtrx_timeout)) { + if(is_reliable(c) && !timespec_isset(&c->rtrx_timeout)) { start_retransmit_timer(c); } - if(is_reliable(c) && !timerisset(&c->conn_timeout)) { - gettimeofday(&c->conn_timeout, NULL); + if(is_reliable(c) && !timespec_isset(&c->conn_timeout)) { + clock_gettime(UTCP_CLOCK, &c->conn_timeout); c->conn_timeout.tv_sec += c->utcp->timeout; } @@ -708,13 +856,7 @@ static void fast_retransmit(struct utcp_connection *c) { struct { struct hdr hdr; uint8_t data[]; - } *pkt; - - pkt = malloc(sizeof(pkt->hdr) + c->utcp->mtu); - - if(!pkt) { - return; - } + } *pkt = c->utcp->pkt; pkt->hdr.src = c->src; pkt->hdr.dst = c->dst; @@ -731,7 +873,7 @@ static void fast_retransmit(struct utcp_connection *c) { pkt->hdr.seq = c->snd.una; pkt->hdr.ack = c->rcv.nxt; pkt->hdr.ctl = ACK; - uint32_t len = min(seqdiff(c->snd.last, c->snd.una), utcp->mtu); + uint32_t len = min(seqdiff(c->snd.last, c->snd.una), utcp->mss); if(fin_wanted(c, c->snd.una + len)) { len--; @@ -746,8 +888,6 @@ static void fast_retransmit(struct utcp_connection *c) { default: break; } - - free(pkt); } static void retransmit(struct utcp_connection *c) { @@ -759,16 +899,14 @@ static void retransmit(struct utcp_connection *c) { struct utcp *utcp = c->utcp; + if(utcp->retransmit) { + utcp->retransmit(c); + } + struct { struct hdr hdr; uint8_t data[]; - } *pkt; - - pkt = malloc(sizeof(pkt->hdr) + c->utcp->mtu); - - if(!pkt) { - return; - } + } *pkt = c->utcp->pkt; pkt->hdr.src = c->src; pkt->hdr.dst = c->dst; @@ -808,7 +946,7 @@ static void retransmit(struct utcp_connection *c) { pkt->hdr.seq = c->snd.una; pkt->hdr.ack = c->rcv.nxt; pkt->hdr.ctl = ACK; - uint32_t len = min(seqdiff(c->snd.last, c->snd.una), utcp->mtu); + uint32_t len = min(seqdiff(c->snd.last, c->snd.una), utcp->mss); if(fin_wanted(c, c->snd.una + len)) { len--; @@ -817,8 +955,8 @@ static void retransmit(struct utcp_connection *c) { // RFC 5681 slow start after timeout uint32_t flightsize = seqdiff(c->snd.nxt, c->snd.una); - c->snd.ssthresh = max(flightsize / 2, utcp->mtu * 2); // eq. 4 - c->snd.cwnd = utcp->mtu; + c->snd.ssthresh = max(flightsize / 2, utcp->mss * 2); // eq. 4 + c->snd.cwnd = utcp->mss; debug_cwnd(c); buffer_copy(&c->sndbuf, pkt->data, 0, len); @@ -841,17 +979,17 @@ static void retransmit(struct utcp_connection *c) { } start_retransmit_timer(c); - utcp->rto *= 2; + c->rto *= 2; - if(utcp->rto > MAX_RTO) { - utcp->rto = MAX_RTO; + if(c->rto > MAX_RTO) { + c->rto = MAX_RTO; } c->rtt_start.tv_sec = 0; // invalidate RTT timer c->dupack = 0; // cancel any ongoing fast recovery cleanup: - free(pkt); + return; } /* Update receive buffer and SACK entries after consuming data. @@ -881,7 +1019,7 @@ static void sack_consume(struct utcp_connection *c, size_t len) { return; } - buffer_get(&c->rcvbuf, NULL, len); + buffer_discard(&c->rcvbuf, len); for(int i = 0; i < NSACKS && c->sacks[i].len;) { if(len < c->sacks[i].offset) { @@ -912,8 +1050,14 @@ static void handle_out_of_order(struct utcp_connection *c, uint32_t offset, cons // Packet loss or reordering occured. Store the data in the buffer. ssize_t rxd = buffer_put_at(&c->rcvbuf, offset, data, len); - if(rxd < 0 || (size_t)rxd < len) { - abort(); + if(rxd <= 0) { + debug(c, "packet outside receive buffer, dropping\n"); + return; + } + + if((size_t)rxd < len) { + debug(c, "packet partially outside receive buffer\n"); + len = rxd; } // Make note of where we put it. @@ -957,23 +1101,33 @@ static void handle_out_of_order(struct utcp_connection *c, uint32_t offset, cons } static void handle_in_order(struct utcp_connection *c, const void *data, size_t len) { - // Check if we can process out-of-order data now. - if(c->sacks[0].len && len >= c->sacks[0].offset) { // TODO: handle overlap with second SACK - debug(c, "incoming packet len %lu connected with SACK at %u\n", (unsigned long)len, c->sacks[0].offset); - buffer_put_at(&c->rcvbuf, 0, data, len); // TODO: handle return value - len = max(len, c->sacks[0].offset + c->sacks[0].len); - data = c->rcvbuf.data; - } - if(c->recv) { ssize_t rxd = c->recv(c, data, len); - if(rxd < 0 || (size_t)rxd != len) { + if(rxd != (ssize_t)len) { // TODO: handle the application not accepting all data. abort(); } } + // Check if we can process out-of-order data now. + if(c->sacks[0].len && len >= c->sacks[0].offset) { + debug(c, "incoming packet len %lu connected with SACK at %u\n", (unsigned long)len, c->sacks[0].offset); + + if(len < c->sacks[0].offset + c->sacks[0].len) { + size_t offset = len; + len = c->sacks[0].offset + c->sacks[0].len; + size_t remainder = len - offset; + + ssize_t rxd = buffer_call(c, &c->rcvbuf, offset, remainder); + + if(rxd != (ssize_t)remainder) { + // TODO: handle the application not accepting all data. + abort(); + } + } + } + if(c->rcvbuf.used) { sack_consume(c, len); } @@ -981,20 +1135,54 @@ static void handle_in_order(struct utcp_connection *c, const void *data, size_t c->rcv.nxt += len; } +static void handle_unreliable(struct utcp_connection *c, const struct hdr *hdr, const void *data, size_t len) { + // Fast path for unfragmented packets + if(!hdr->wnd && !(hdr->ctl & MF)) { + if(c->recv) { + c->recv(c, data, len); + } -static void handle_incoming_data(struct utcp_connection *c, uint32_t seq, const void *data, size_t len) { - if(!is_reliable(c)) { - c->recv(c, data, len); - c->rcv.nxt = seq + len; + c->rcv.nxt = hdr->seq + len; return; } - uint32_t offset = seqdiff(seq, c->rcv.nxt); + // Ensure reassembled packet are not larger than 64 kiB + if(hdr->wnd >= MAX_UNRELIABLE_SIZE || hdr->wnd + len > MAX_UNRELIABLE_SIZE) { + return; + } - if(offset + len > c->rcvbuf.maxsize) { - abort(); + // Don't accept out of order fragments + if(hdr->wnd && hdr->seq != c->rcv.nxt) { + return; + } + + // Reset the receive buffer for the first fragment + if(!hdr->wnd) { + buffer_clear(&c->rcvbuf); + } + + ssize_t rxd = buffer_put_at(&c->rcvbuf, hdr->wnd, data, len); + + if(rxd != (ssize_t)len) { + return; + } + + // Send the packet if it's the final fragment + if(!(hdr->ctl & MF)) { + buffer_call(c, &c->rcvbuf, 0, hdr->wnd + len); } + c->rcv.nxt = hdr->seq + len; +} + +static void handle_incoming_data(struct utcp_connection *c, const struct hdr *hdr, const void *data, size_t len) { + if(!is_reliable(c)) { + handle_unreliable(c, hdr, data, len); + return; + } + + uint32_t offset = seqdiff(hdr->seq, c->rcv.nxt); + if(offset) { handle_out_of_order(c, offset, data, len); } else { @@ -1046,7 +1234,7 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { // Drop packets with an unknown CTL flag - if(hdr.ctl & ~(SYN | ACK | RST | FIN)) { + if(hdr.ctl & ~(SYN | ACK | RST | FIN | MF)) { print_packet(NULL, "recv", data, len); errno = EBADMSG; return -1; @@ -1171,6 +1359,8 @@ synack: print_packet(c, "send", &pkt, sizeof(hdr)); utcp->send(utcp, &pkt, sizeof(hdr)); } + + start_retransmit_timer(c); } else { // No, we don't want your packets, send a RST back len = 1; @@ -1252,6 +1442,15 @@ synack: // Otherwise, continue processing. len = 0; } + } else { +#if UTCP_DEBUG + int32_t rcv_offset = seqdiff(hdr.seq, c->rcv.nxt); + + if(rcv_offset) { + debug(c, "packet out of order, offset %u bytes", rcv_offset); + } + +#endif } c->snd.wnd = hdr.wnd; // TODO: move below @@ -1372,10 +1571,10 @@ synack: // RTT measurement if(c->rtt_start.tv_sec) { if(c->rtt_seq == hdr.ack) { - struct timeval now, diff; - gettimeofday(&now, NULL); - timersub(&now, &c->rtt_start, &diff); - update_rtt(c, diff.tv_sec * 1000000 + diff.tv_usec); + struct timespec now; + clock_gettime(UTCP_CLOCK, &now); + int32_t diff = timespec_diff_usec(&now, &c->rtt_start); + update_rtt(c, diff); c->rtt_start.tv_sec = 0; } else if(c->rtt_seq < hdr.ack) { debug(c, "cancelling RTT measurement: %u < %u\n", c->rtt_seq, hdr.ack); @@ -1404,7 +1603,11 @@ synack: #endif if(data_acked) { - buffer_get(&c->sndbuf, NULL, data_acked); + buffer_discard(&c->sndbuf, data_acked); + + if(is_reliable(c)) { + c->do_poll = true; + } } // Also advance snd.nxt if possible @@ -1425,9 +1628,9 @@ synack: // Increase the congestion window according to RFC 5681 if(c->snd.cwnd < c->snd.ssthresh) { - c->snd.cwnd += min(advanced, utcp->mtu); // eq. 2 + c->snd.cwnd += min(advanced, utcp->mss); // eq. 2 } else { - c->snd.cwnd += max(1, (utcp->mtu * utcp->mtu) / c->snd.cwnd); // eq. 3 + c->snd.cwnd += max(1, (utcp->mss * utcp->mss) / c->snd.cwnd); // eq. 3 } if(c->snd.cwnd > c->sndbuf.maxsize) { @@ -1447,7 +1650,7 @@ synack: case CLOSING: if(c->snd.una == c->snd.last) { - gettimeofday(&c->conn_timeout, NULL); + clock_gettime(UTCP_CLOCK, &c->conn_timeout); c->conn_timeout.tv_sec += utcp->timeout; set_state(c, TIME_WAIT); } @@ -1466,8 +1669,8 @@ synack: // RFC 5681 fast recovery debug(c, "fast recovery started\n", c->dupack); uint32_t flightsize = seqdiff(c->snd.nxt, c->snd.una); - c->snd.ssthresh = max(flightsize / 2, utcp->mtu * 2); // eq. 4 - c->snd.cwnd = min(c->snd.ssthresh + 3 * utcp->mtu, c->sndbuf.maxsize); + c->snd.ssthresh = max(flightsize / 2, utcp->mss * 2); // eq. 4 + c->snd.cwnd = min(c->snd.ssthresh + 3 * utcp->mss, c->sndbuf.maxsize); if(c->snd.cwnd > c->sndbuf.maxsize) { c->snd.cwnd = c->sndbuf.maxsize; @@ -1477,7 +1680,7 @@ synack: fast_retransmit(c); } else if(c->dupack > 3) { - c->snd.cwnd += utcp->mtu; + c->snd.cwnd += utcp->mss; if(c->snd.cwnd > c->sndbuf.maxsize) { c->snd.cwnd = c->sndbuf.maxsize; @@ -1498,10 +1701,10 @@ synack: if(advanced) { if(c->snd.una == c->snd.last) { stop_retransmit_timer(c); - timerclear(&c->conn_timeout); + timespec_clear(&c->conn_timeout); } else if(is_reliable(c)) { start_retransmit_timer(c); - gettimeofday(&c->conn_timeout, NULL); + clock_gettime(UTCP_CLOCK, &c->conn_timeout); c->conn_timeout.tv_sec += utcp->timeout; } } @@ -1519,7 +1722,7 @@ skip_ack: } c->rcv.irs = hdr.seq; - c->rcv.nxt = hdr.seq; + c->rcv.nxt = hdr.seq + 1; if(c->shut_wr) { c->snd.last++; @@ -1528,7 +1731,6 @@ skip_ack: set_state(c, ESTABLISHED); } - // TODO: notify application of this somehow. break; case SYN_RECEIVED: @@ -1542,8 +1744,8 @@ skip_ack: case CLOSING: case LAST_ACK: case TIME_WAIT: - // Ehm, no. We should never receive a second SYN. - return 0; + // This could be a retransmission. Ignore the SYN flag, but send an ACK back. + break; default: #ifdef UTCP_DEBUG @@ -1551,9 +1753,6 @@ skip_ack: #endif return 0; } - - // SYN counts as one sequence number - c->rcv.nxt++; } // 6. Process new data @@ -1605,7 +1804,7 @@ skip_ack: return 0; } - handle_incoming_data(c, hdr.seq, ptr, len); + handle_incoming_data(c, &hdr, ptr, len); } // 7. Process FIN stuff @@ -1629,7 +1828,7 @@ skip_ack: break; case FIN_WAIT_2: - gettimeofday(&c->conn_timeout, NULL); + clock_gettime(UTCP_CLOCK, &c->conn_timeout); c->conn_timeout.tv_sec += utcp->timeout; set_state(c, TIME_WAIT); break; @@ -1760,7 +1959,7 @@ int utcp_shutdown(struct utcp_connection *c, int dir) { ack(c, false); - if(!timerisset(&c->rtrx_timeout)) { + if(!timespec_isset(&c->rtrx_timeout)) { start_retransmit_timer(c); } @@ -1878,10 +2077,10 @@ int utcp_abort(struct utcp_connection *c) { * The return value is the time to the next timeout in milliseconds, * or maybe a negative value if the timeout is infinite. */ -struct timeval utcp_timeout(struct utcp *utcp) { - struct timeval now; - gettimeofday(&now, NULL); - struct timeval next = {now.tv_sec + 3600, now.tv_usec}; +struct timespec utcp_timeout(struct utcp *utcp) { + struct timespec now; + clock_gettime(UTCP_CLOCK, &now); + struct timespec next = {now.tv_sec + 3600, now.tv_nsec}; for(int i = 0; i < utcp->nconnections; i++) { struct utcp_connection *c = utcp->connections[i]; @@ -1901,7 +2100,7 @@ struct timeval utcp_timeout(struct utcp *utcp) { continue; } - if(timerisset(&c->conn_timeout) && timercmp(&c->conn_timeout, &now, <)) { + if(timespec_isset(&c->conn_timeout) && timespec_lt(&c->conn_timeout, &now)) { errno = ETIMEDOUT; c->state = CLOSED; @@ -1916,14 +2115,15 @@ struct timeval utcp_timeout(struct utcp *utcp) { continue; } - if(timerisset(&c->rtrx_timeout) && timercmp(&c->rtrx_timeout, &now, <)) { + if(timespec_isset(&c->rtrx_timeout) && timespec_lt(&c->rtrx_timeout, &now)) { debug(c, "retransmitting after timeout\n"); retransmit(c); } if(c->poll) { - if((c->state == ESTABLISHED || c->state == CLOSE_WAIT)) { - uint32_t len = buffer_free(&c->sndbuf); + if((c->state == ESTABLISHED || c->state == CLOSE_WAIT) && c->do_poll) { + c->do_poll = false; + uint32_t len = buffer_free(&c->sndbuf); if(len) { c->poll(c, len); @@ -1933,18 +2133,18 @@ struct timeval utcp_timeout(struct utcp *utcp) { } } - if(timerisset(&c->conn_timeout) && timercmp(&c->conn_timeout, &next, <)) { + if(timespec_isset(&c->conn_timeout) && timespec_lt(&c->conn_timeout, &next)) { next = c->conn_timeout; } - if(timerisset(&c->rtrx_timeout) && timercmp(&c->rtrx_timeout, &next, <)) { + if(timespec_isset(&c->rtrx_timeout) && timespec_lt(&c->rtrx_timeout, &next)) { next = c->rtrx_timeout; } } - struct timeval diff; + struct timespec diff; - timersub(&next, &now, &diff); + timespec_sub(&next, &now, &diff); return diff; } @@ -1974,13 +2174,24 @@ struct utcp *utcp_init(utcp_accept_t accept, utcp_pre_accept_t pre_accept, utcp_ return NULL; } + utcp_set_mtu(utcp, DEFAULT_MTU); + + if(!utcp->pkt) { + free(utcp); + return NULL; + } + + if(!CLOCK_GRANULARITY) { + struct timespec res; + clock_getres(UTCP_CLOCK, &res); + CLOCK_GRANULARITY = res.tv_sec * USEC_PER_SEC + res.tv_nsec / 1000; + } + utcp->accept = accept; utcp->pre_accept = pre_accept; utcp->send = send; utcp->priv = priv; - utcp->mtu = DEFAULT_MTU; utcp->timeout = DEFAULT_USER_TIMEOUT; // sec - utcp->rto = START_RTO; // usec return utcp; } @@ -2009,6 +2220,7 @@ void utcp_exit(struct utcp *utcp) { } free(utcp->connections); + free(utcp->pkt); free(utcp); } @@ -2016,11 +2228,31 @@ uint16_t utcp_get_mtu(struct utcp *utcp) { return utcp ? utcp->mtu : 0; } +uint16_t utcp_get_mss(struct utcp *utcp) { + return utcp ? utcp->mss : 0; +} + void utcp_set_mtu(struct utcp *utcp, uint16_t mtu) { - // TODO: handle overhead of the header - if(utcp) { - utcp->mtu = mtu; + if(!utcp) { + return; + } + + if(mtu <= sizeof(struct hdr)) { + return; + } + + if(mtu > utcp->mtu) { + char *new = realloc(utcp->pkt, mtu + sizeof(struct hdr)); + + if(!new) { + return; + } + + utcp->pkt = new; } + + utcp->mtu = mtu; + utcp->mss = mtu - sizeof(struct hdr); } void utcp_reset_timers(struct utcp *utcp) { @@ -2028,9 +2260,9 @@ void utcp_reset_timers(struct utcp *utcp) { return; } - struct timeval now, then; + struct timespec now, then; - gettimeofday(&now, NULL); + clock_gettime(UTCP_CLOCK, &now); then = now; @@ -2043,19 +2275,19 @@ void utcp_reset_timers(struct utcp *utcp) { continue; } - if(timerisset(&c->rtrx_timeout)) { + if(timespec_isset(&c->rtrx_timeout)) { c->rtrx_timeout = now; } - if(timerisset(&c->conn_timeout)) { + if(timespec_isset(&c->conn_timeout)) { c->conn_timeout = then; } c->rtt_start.tv_sec = 0; - } - if(utcp->rto > START_RTO) { - utcp->rto = START_RTO; + if(c->rto > START_RTO) { + c->rto = START_RTO; + } } } @@ -2100,6 +2332,8 @@ void utcp_set_sndbuf(struct utcp_connection *c, size_t size) { if(c->sndbuf.maxsize != size) { c->sndbuf.maxsize = -1; } + + c->do_poll = is_reliable(c) && buffer_free(&c->sndbuf); } size_t utcp_get_rcvbuf(struct utcp_connection *c) { @@ -2167,6 +2401,7 @@ void utcp_set_recv_cb(struct utcp_connection *c, utcp_recv_t recv) { void utcp_set_poll_cb(struct utcp_connection *c, utcp_poll_t poll) { if(c) { c->poll = poll; + c->do_poll = is_reliable(c) && buffer_free(&c->sndbuf); } } @@ -2188,21 +2423,21 @@ void utcp_expect_data(struct utcp_connection *c, bool expect) { if(expect) { // If we expect data, start the connection timer. - if(!timerisset(&c->conn_timeout)) { - gettimeofday(&c->conn_timeout, NULL); + if(!timespec_isset(&c->conn_timeout)) { + clock_gettime(UTCP_CLOCK, &c->conn_timeout); c->conn_timeout.tv_sec += c->utcp->timeout; } } else { // If we want to cancel expecting data, only clear the timer when there is no unACKed data. if(c->snd.una == c->snd.last) { - timerclear(&c->conn_timeout); + timespec_clear(&c->conn_timeout); } } } void utcp_offline(struct utcp *utcp, bool offline) { - struct timeval now; - gettimeofday(&now, NULL); + struct timespec now; + clock_gettime(UTCP_CLOCK, &now); for(int i = 0; i < utcp->nconnections; i++) { struct utcp_connection *c = utcp->connections[i]; @@ -2214,15 +2449,23 @@ void utcp_offline(struct utcp *utcp, bool offline) { utcp_expect_data(c, offline); if(!offline) { - if(timerisset(&c->rtrx_timeout)) { + if(timespec_isset(&c->rtrx_timeout)) { c->rtrx_timeout = now; } utcp->connections[i]->rtt_start.tv_sec = 0; + + if(c->rto > START_RTO) { + c->rto = START_RTO; + } } } +} - if(!offline && utcp->rto > START_RTO) { - utcp->rto = START_RTO; - } +void utcp_set_retransmit_cb(struct utcp *utcp, utcp_retransmit_t retransmit) { + utcp->retransmit = retransmit; +} + +void utcp_set_clock_granularity(long granularity) { + CLOCK_GRANULARITY = granularity; }