X-Git-Url: http://git.meshlink.io/?p=utcp;a=blobdiff_plain;f=utcp.c;h=04d2b194c20f193aa44f2d4006b9afc1bd356b29;hp=0b36de15263b8b005c0625f866e901054432b5ba;hb=HEAD;hpb=bf55733700d946f678947a6caaa782b56669a3f1 diff --git a/utcp.c b/utcp.c index 0b36de1..04d2b19 100644 --- a/utcp.c +++ b/utcp.c @@ -43,6 +43,14 @@ #undef poll #endif +#ifndef UTCP_CLOCK +#if defined(CLOCK_MONOTONIC_RAW) && defined(__x86_64__) +#define UTCP_CLOCK CLOCK_MONOTONIC_RAW +#else +#define UTCP_CLOCK CLOCK_MONOTONIC +#endif +#endif + static void timespec_sub(const struct timespec *a, const struct timespec *b, struct timespec *r) { r->tv_sec = a->tv_sec - b->tv_sec; r->tv_nsec = a->tv_nsec - b->tv_nsec; @@ -53,8 +61,7 @@ static void timespec_sub(const struct timespec *a, const struct timespec *b, str } static int32_t timespec_diff_usec(const struct timespec *a, const struct timespec *b) { - int64_t diff = (a->tv_sec - b->tv_sec) * 1000000000 + a->tv_sec - b->tv_sec; - return diff / 1000; + return (a->tv_sec - b->tv_sec) * 1000000 + (a->tv_nsec - b->tv_nsec) / 1000; } static bool timespec_lt(const struct timespec *a, const struct timespec *b) { @@ -67,6 +74,7 @@ static bool timespec_lt(const struct timespec *a, const struct timespec *b) { static void timespec_clear(struct timespec *a) { a->tv_sec = 0; + a->tv_nsec = 0; } static bool timespec_isset(const struct timespec *a) { @@ -90,14 +98,6 @@ static inline size_t max(size_t a, size_t b) { #define UTCP_DEBUG_DATALEN 20 #endif -#ifndef UTCP_CLOCK -#if defined(CLOCK_MONOTONIC_RAW) && defined(__x86_64__) -#define UTCP_CLOCK CLOCK_MONOTONIC_RAW -#else -#define UTCP_CLOCK CLOCK_MONOTONIC -#endif -#endif - static void debug(struct utcp_connection *c, const char *format, ...) { struct timespec tv; char buf[1024]; @@ -145,12 +145,13 @@ static void print_packet(struct utcp_connection *c, const char *dir, const void *p = 0; - debug(c, "%s: len %lu src %u dst %u seq %u ack %u wnd %u aux %x ctl %s%s%s%s data %s\n", + debug(c, "%s: len %lu src %u dst %u seq %u ack %u wnd %u aux %x ctl %s%s%s%s%s data %s\n", dir, (unsigned long)len, hdr.src, hdr.dst, hdr.seq, hdr.ack, hdr.wnd, hdr.aux, hdr.ctl & SYN ? "SYN" : "", hdr.ctl & RST ? "RST" : "", hdr.ctl & FIN ? "FIN" : "", hdr.ctl & ACK ? "ACK" : "", + hdr.ctl & MF ? "MF" : "", str ); } @@ -220,7 +221,7 @@ static bool buffer_resize(struct buffer *buf, uint32_t newsize) { // [345.........|........012] uint32_t tailsize = buf->size - buf->offset; uint32_t newoffset = newsize - tailsize; - memmove(buf + newoffset, buf + buf->offset, tailsize); + memmove(buf->data + newoffset, buf->data + buf->offset, tailsize); buf->offset = newoffset; } @@ -267,7 +268,7 @@ static ssize_t buffer_put_at(struct buffer *buf, size_t offset, const void *data uint32_t realoffset = buf->offset + offset; - if(buf->size - buf->offset < offset) { + if(buf->size - buf->offset <= offset) { // The offset wrapped realoffset -= buf->size; } @@ -304,7 +305,7 @@ static ssize_t buffer_copy(struct buffer *buf, void *data, size_t offset, size_t uint32_t realoffset = buf->offset + offset; - if(buf->size - buf->offset < offset) { + if(buf->size - buf->offset <= offset) { // The offset wrapped realoffset -= buf->size; } @@ -320,22 +321,79 @@ static ssize_t buffer_copy(struct buffer *buf, void *data, size_t offset, size_t return len; } +// Copy data from the buffer without removing it. +static ssize_t buffer_call(struct utcp_connection *c, struct buffer *buf, size_t offset, size_t len) { + if(!c->recv) { + return len; + } + + // Ensure we don't copy more than is actually stored in the buffer + if(offset >= buf->used) { + return 0; + } + + if(buf->used - offset < len) { + len = buf->used - offset; + } + + uint32_t realoffset = buf->offset + offset; + + if(buf->size - buf->offset <= offset) { + // The offset wrapped + realoffset -= buf->size; + } + + if(buf->size - realoffset < len) { + // The data is wrapped + ssize_t rx1 = c->recv(c, buf->data + realoffset, buf->size - realoffset); + + if(rx1 < buf->size - realoffset) { + return rx1; + } + + // The channel might have been closed by the previous callback + if(!c->recv) { + return len; + } + + ssize_t rx2 = c->recv(c, buf->data, len - (buf->size - realoffset)); + + if(rx2 < 0) { + return rx2; + } else { + return rx1 + rx2; + } + } else { + return c->recv(c, buf->data + realoffset, len); + } +} + // Discard data from the buffer. static ssize_t buffer_discard(struct buffer *buf, size_t len) { if(buf->used < len) { len = buf->used; } - if(buf->size - buf->offset < len) { + if(buf->size - buf->offset <= len) { buf->offset -= buf->size; } - buf->offset += len; + if(buf->used == len) { + buf->offset = 0; + } else { + buf->offset += len; + } + buf->used -= len; return len; } +static void buffer_clear(struct buffer *buf) { + buf->used = 0; + buf->offset = 0; +} + static bool buffer_set_size(struct buffer *buf, uint32_t minsize, uint32_t maxsize) { if(maxsize < minsize) { maxsize = minsize; @@ -352,7 +410,7 @@ static void buffer_exit(struct buffer *buf) { } static uint32_t buffer_free(const struct buffer *buf) { - return buf->maxsize - buf->used; + return buf->maxsize > buf->used ? buf->maxsize - buf->used : 0; } // Connections are stored in a sorted list. @@ -476,6 +534,9 @@ static struct utcp_connection *allocate_connection(struct utcp *utcp, uint16_t s c->snd.cwnd = (utcp->mss > 2190 ? 2 : utcp->mss > 1095 ? 3 : 4) * utcp->mss; c->snd.ssthresh = ~0; debug_cwnd(c); + c->srtt = 0; + c->rttvar = 0; + c->rto = START_RTO; c->utcp = utcp; // Add it to the sorted list of connections @@ -501,36 +562,34 @@ static void update_rtt(struct utcp_connection *c, uint32_t rtt) { return; } - struct utcp *utcp = c->utcp; - - if(!utcp->srtt) { - utcp->srtt = rtt; - utcp->rttvar = rtt / 2; + if(!c->srtt) { + c->srtt = rtt; + c->rttvar = rtt / 2; } else { - utcp->rttvar = (utcp->rttvar * 3 + absdiff(utcp->srtt, rtt)) / 4; - utcp->srtt = (utcp->srtt * 7 + rtt) / 8; + c->rttvar = (c->rttvar * 3 + absdiff(c->srtt, rtt)) / 4; + c->srtt = (c->srtt * 7 + rtt) / 8; } - utcp->rto = utcp->srtt + max(4 * utcp->rttvar, CLOCK_GRANULARITY); + c->rto = c->srtt + max(4 * c->rttvar, CLOCK_GRANULARITY); - if(utcp->rto > MAX_RTO) { - utcp->rto = MAX_RTO; + if(c->rto > MAX_RTO) { + c->rto = MAX_RTO; } - debug(c, "rtt %u srtt %u rttvar %u rto %u\n", rtt, utcp->srtt, utcp->rttvar, utcp->rto); + debug(c, "rtt %u srtt %u rttvar %u rto %u\n", rtt, c->srtt, c->rttvar, c->rto); } static void start_retransmit_timer(struct utcp_connection *c) { clock_gettime(UTCP_CLOCK, &c->rtrx_timeout); - uint32_t rto = c->utcp->rto; + uint32_t rto = c->rto; while(rto > USEC_PER_SEC) { c->rtrx_timeout.tv_sec++; rto -= USEC_PER_SEC; } - c->rtrx_timeout.tv_nsec += c->utcp->rto * 1000; + c->rtrx_timeout.tv_nsec += rto * 1000; if(c->rtrx_timeout.tv_nsec >= NSEC_PER_SEC) { c->rtrx_timeout.tv_nsec -= NSEC_PER_SEC; @@ -606,7 +665,7 @@ void utcp_accept(struct utcp_connection *c, utcp_recv_t recv, void *priv) { static void ack(struct utcp_connection *c, bool sendatleastone) { int32_t left = seqdiff(c->snd.last, c->snd.nxt); - int32_t cwndleft = min(c->snd.cwnd, c->snd.wnd) - seqdiff(c->snd.nxt, c->snd.una); + int32_t cwndleft = is_reliable(c) ? min(c->snd.cwnd, c->snd.wnd) - seqdiff(c->snd.nxt, c->snd.una) : MAX_UNRELIABLE_SIZE; assert(left >= 0); @@ -634,7 +693,7 @@ static void ack(struct utcp_connection *c, bool sendatleastone) { pkt->hdr.src = c->src; pkt->hdr.dst = c->dst; pkt->hdr.ack = c->rcv.nxt; - pkt->hdr.wnd = c->rcvbuf.maxsize; + pkt->hdr.wnd = is_reliable(c) ? c->rcvbuf.maxsize : 0; pkt->hdr.ctl = ACK; pkt->hdr.aux = 0; @@ -647,6 +706,14 @@ static void ack(struct utcp_connection *c, bool sendatleastone) { c->snd.nxt += seglen; left -= seglen; + if(!is_reliable(c)) { + if(left) { + pkt->hdr.ctl |= MF; + } else { + pkt->hdr.ctl &= ~MF; + } + } + if(seglen && fin_wanted(c, c->snd.nxt)) { seglen--; pkt->hdr.ctl |= FIN; @@ -661,6 +728,10 @@ static void ack(struct utcp_connection *c, bool sendatleastone) { print_packet(c, "send", pkt, sizeof(pkt->hdr) + seglen); c->utcp->send(c->utcp, pkt, sizeof(pkt->hdr) + seglen); + + if(left && !is_reliable(c)) { + pkt->hdr.wnd += seglen; + } } while(left); } @@ -721,8 +792,13 @@ ssize_t utcp_send(struct utcp_connection *c, const void *data, size_t len) { // Add data to send buffer. - if(is_reliable(c) || (c->state != SYN_SENT && c->state != SYN_RECEIVED)) { + if(is_reliable(c)) { len = buffer_put(&c->sndbuf, data, len); + } else if(c->state != SYN_SENT && c->state != SYN_RECEIVED) { + if(len > MAX_UNRELIABLE_SIZE || buffer_put(&c->sndbuf, data, len) != (ssize_t)len) { + errno = EMSGSIZE; + return -1; + } } else { return 0; } @@ -780,13 +856,7 @@ static void fast_retransmit(struct utcp_connection *c) { struct { struct hdr hdr; uint8_t data[]; - } *pkt; - - pkt = malloc(c->utcp->mtu); - - if(!pkt) { - return; - } + } *pkt = c->utcp->pkt; pkt->hdr.src = c->src; pkt->hdr.dst = c->dst; @@ -818,8 +888,6 @@ static void fast_retransmit(struct utcp_connection *c) { default: break; } - - free(pkt); } static void retransmit(struct utcp_connection *c) { @@ -831,6 +899,10 @@ static void retransmit(struct utcp_connection *c) { struct utcp *utcp = c->utcp; + if(utcp->retransmit) { + utcp->retransmit(c); + } + struct { struct hdr hdr; uint8_t data[]; @@ -907,10 +979,10 @@ static void retransmit(struct utcp_connection *c) { } start_retransmit_timer(c); - utcp->rto *= 2; + c->rto *= 2; - if(utcp->rto > MAX_RTO) { - utcp->rto = MAX_RTO; + if(c->rto > MAX_RTO) { + c->rto = MAX_RTO; } c->rtt_start.tv_sec = 0; // invalidate RTT timer @@ -978,8 +1050,14 @@ static void handle_out_of_order(struct utcp_connection *c, uint32_t offset, cons // Packet loss or reordering occured. Store the data in the buffer. ssize_t rxd = buffer_put_at(&c->rcvbuf, offset, data, len); - if(rxd < 0 || (size_t)rxd < len) { - abort(); + if(rxd <= 0) { + debug(c, "packet outside receive buffer, dropping\n"); + return; + } + + if((size_t)rxd < len) { + debug(c, "packet partially outside receive buffer\n"); + len = rxd; } // Make note of where we put it. @@ -1023,23 +1101,33 @@ static void handle_out_of_order(struct utcp_connection *c, uint32_t offset, cons } static void handle_in_order(struct utcp_connection *c, const void *data, size_t len) { - // Check if we can process out-of-order data now. - if(c->sacks[0].len && len >= c->sacks[0].offset) { // TODO: handle overlap with second SACK - debug(c, "incoming packet len %lu connected with SACK at %u\n", (unsigned long)len, c->sacks[0].offset); - buffer_put_at(&c->rcvbuf, 0, data, len); // TODO: handle return value - len = max(len, c->sacks[0].offset + c->sacks[0].len); - data = c->rcvbuf.data; - } - if(c->recv) { ssize_t rxd = c->recv(c, data, len); - if(rxd < 0 || (size_t)rxd != len) { + if(rxd != (ssize_t)len) { // TODO: handle the application not accepting all data. abort(); } } + // Check if we can process out-of-order data now. + if(c->sacks[0].len && len >= c->sacks[0].offset) { + debug(c, "incoming packet len %lu connected with SACK at %u\n", (unsigned long)len, c->sacks[0].offset); + + if(len < c->sacks[0].offset + c->sacks[0].len) { + size_t offset = len; + len = c->sacks[0].offset + c->sacks[0].len; + size_t remainder = len - offset; + + ssize_t rxd = buffer_call(c, &c->rcvbuf, offset, remainder); + + if(rxd != (ssize_t)remainder) { + // TODO: handle the application not accepting all data. + abort(); + } + } + } + if(c->rcvbuf.used) { sack_consume(c, len); } @@ -1047,20 +1135,54 @@ static void handle_in_order(struct utcp_connection *c, const void *data, size_t c->rcv.nxt += len; } +static void handle_unreliable(struct utcp_connection *c, const struct hdr *hdr, const void *data, size_t len) { + // Fast path for unfragmented packets + if(!hdr->wnd && !(hdr->ctl & MF)) { + if(c->recv) { + c->recv(c, data, len); + } -static void handle_incoming_data(struct utcp_connection *c, uint32_t seq, const void *data, size_t len) { - if(!is_reliable(c)) { - c->recv(c, data, len); - c->rcv.nxt = seq + len; + c->rcv.nxt = hdr->seq + len; return; } - uint32_t offset = seqdiff(seq, c->rcv.nxt); + // Ensure reassembled packet are not larger than 64 kiB + if(hdr->wnd >= MAX_UNRELIABLE_SIZE || hdr->wnd + len > MAX_UNRELIABLE_SIZE) { + return; + } - if(offset + len > c->rcvbuf.maxsize) { - abort(); + // Don't accept out of order fragments + if(hdr->wnd && hdr->seq != c->rcv.nxt) { + return; + } + + // Reset the receive buffer for the first fragment + if(!hdr->wnd) { + buffer_clear(&c->rcvbuf); + } + + ssize_t rxd = buffer_put_at(&c->rcvbuf, hdr->wnd, data, len); + + if(rxd != (ssize_t)len) { + return; } + // Send the packet if it's the final fragment + if(!(hdr->ctl & MF)) { + buffer_call(c, &c->rcvbuf, 0, hdr->wnd + len); + } + + c->rcv.nxt = hdr->seq + len; +} + +static void handle_incoming_data(struct utcp_connection *c, const struct hdr *hdr, const void *data, size_t len) { + if(!is_reliable(c)) { + handle_unreliable(c, hdr, data, len); + return; + } + + uint32_t offset = seqdiff(hdr->seq, c->rcv.nxt); + if(offset) { handle_out_of_order(c, offset, data, len); } else { @@ -1112,7 +1234,7 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { // Drop packets with an unknown CTL flag - if(hdr.ctl & ~(SYN | ACK | RST | FIN)) { + if(hdr.ctl & ~(SYN | ACK | RST | FIN | MF)) { print_packet(NULL, "recv", data, len); errno = EBADMSG; return -1; @@ -1237,6 +1359,8 @@ synack: print_packet(c, "send", &pkt, sizeof(hdr)); utcp->send(utcp, &pkt, sizeof(hdr)); } + + start_retransmit_timer(c); } else { // No, we don't want your packets, send a RST back len = 1; @@ -1326,10 +1450,6 @@ synack: debug(c, "packet out of order, offset %u bytes", rcv_offset); } - if(rcv_offset >= 0) { - c->rcv.nxt = hdr.seq + len; - } - #endif } @@ -1484,6 +1604,10 @@ synack: if(data_acked) { buffer_discard(&c->sndbuf, data_acked); + + if(is_reliable(c)) { + c->do_poll = true; + } } // Also advance snd.nxt if possible @@ -1598,7 +1722,7 @@ skip_ack: } c->rcv.irs = hdr.seq; - c->rcv.nxt = hdr.seq; + c->rcv.nxt = hdr.seq + 1; if(c->shut_wr) { c->snd.last++; @@ -1607,7 +1731,6 @@ skip_ack: set_state(c, ESTABLISHED); } - // TODO: notify application of this somehow. break; case SYN_RECEIVED: @@ -1621,8 +1744,8 @@ skip_ack: case CLOSING: case LAST_ACK: case TIME_WAIT: - // Ehm, no. We should never receive a second SYN. - return 0; + // This could be a retransmission. Ignore the SYN flag, but send an ACK back. + break; default: #ifdef UTCP_DEBUG @@ -1630,9 +1753,6 @@ skip_ack: #endif return 0; } - - // SYN counts as one sequence number - c->rcv.nxt++; } // 6. Process new data @@ -1684,7 +1804,7 @@ skip_ack: return 0; } - handle_incoming_data(c, hdr.seq, ptr, len); + handle_incoming_data(c, &hdr, ptr, len); } // 7. Process FIN stuff @@ -2001,8 +2121,9 @@ struct timespec utcp_timeout(struct utcp *utcp) { } if(c->poll) { - if((c->state == ESTABLISHED || c->state == CLOSE_WAIT)) { - uint32_t len = buffer_free(&c->sndbuf); + if((c->state == ESTABLISHED || c->state == CLOSE_WAIT) && c->do_poll) { + c->do_poll = false; + uint32_t len = buffer_free(&c->sndbuf); if(len) { c->poll(c, len); @@ -2053,6 +2174,13 @@ struct utcp *utcp_init(utcp_accept_t accept, utcp_pre_accept_t pre_accept, utcp_ return NULL; } + utcp_set_mtu(utcp, DEFAULT_MTU); + + if(!utcp->pkt) { + free(utcp); + return NULL; + } + if(!CLOCK_GRANULARITY) { struct timespec res; clock_getres(UTCP_CLOCK, &res); @@ -2063,9 +2191,7 @@ struct utcp *utcp_init(utcp_accept_t accept, utcp_pre_accept_t pre_accept, utcp_ utcp->pre_accept = pre_accept; utcp->send = send; utcp->priv = priv; - utcp_set_mtu(utcp, DEFAULT_MTU); utcp->timeout = DEFAULT_USER_TIMEOUT; // sec - utcp->rto = START_RTO; // usec return utcp; } @@ -2094,6 +2220,7 @@ void utcp_exit(struct utcp *utcp) { } free(utcp->connections); + free(utcp->pkt); free(utcp); } @@ -2157,10 +2284,10 @@ void utcp_reset_timers(struct utcp *utcp) { } c->rtt_start.tv_sec = 0; - } - if(utcp->rto > START_RTO) { - utcp->rto = START_RTO; + if(c->rto > START_RTO) { + c->rto = START_RTO; + } } } @@ -2205,6 +2332,8 @@ void utcp_set_sndbuf(struct utcp_connection *c, size_t size) { if(c->sndbuf.maxsize != size) { c->sndbuf.maxsize = -1; } + + c->do_poll = is_reliable(c) && buffer_free(&c->sndbuf); } size_t utcp_get_rcvbuf(struct utcp_connection *c) { @@ -2272,6 +2401,7 @@ void utcp_set_recv_cb(struct utcp_connection *c, utcp_recv_t recv) { void utcp_set_poll_cb(struct utcp_connection *c, utcp_poll_t poll) { if(c) { c->poll = poll; + c->do_poll = is_reliable(c) && buffer_free(&c->sndbuf); } } @@ -2324,12 +2454,16 @@ void utcp_offline(struct utcp *utcp, bool offline) { } utcp->connections[i]->rtt_start.tv_sec = 0; + + if(c->rto > START_RTO) { + c->rto = START_RTO; + } } } +} - if(!offline && utcp->rto > START_RTO) { - utcp->rto = START_RTO; - } +void utcp_set_retransmit_cb(struct utcp *utcp, utcp_retransmit_t retransmit) { + utcp->retransmit = retransmit; } void utcp_set_clock_granularity(long granularity) {