X-Git-Url: http://git.meshlink.io/?p=utcp;a=blobdiff_plain;f=utcp.c;h=b53541d5e3e6bab3400daf70ed8141e9b018b01c;hp=d09b9e7d526e80c4ce39590c19c718904fe32590;hb=488a66580c31a873b93ed541505e1525607bbed8;hpb=f1035e971bb894203bdfba6cafbaf0bb30f197eb diff --git a/utcp.c b/utcp.c index d09b9e7..b53541d 100644 --- a/utcp.c +++ b/utcp.c @@ -29,6 +29,7 @@ #include #include #include +#include #include "utcp_priv.h" @@ -54,6 +55,10 @@ } while (0) #endif +static inline size_t min(size_t a, size_t b) { + return a < b ? a : b; +} + static inline size_t max(size_t a, size_t b) { return a > b ? a : b; } @@ -61,61 +66,74 @@ static inline size_t max(size_t a, size_t b) { #ifdef UTCP_DEBUG #include -static void debug(const char *format, ...) { +#ifndef UTCP_DEBUG_DATALEN +#define UTCP_DEBUG_DATALEN 20 +#endif + +static void debug(struct utcp_connection *c, const char *format, ...) { + struct timespec tv; + char buf[1024]; + int len; + + clock_gettime(CLOCK_REALTIME, &tv); + len = snprintf(buf, sizeof(buf), "%ld.%06lu %u:%u ", (long)tv.tv_sec, tv.tv_nsec / 1000, c ? c->src : 0, c ? c->dst : 0); va_list ap; va_start(ap, format); - vfprintf(stderr, format, ap); + len += vsnprintf(buf + len, sizeof(buf) - len, format, ap); va_end(ap); + + if(len > 0 && (size_t)len < sizeof(buf)) { + fwrite(buf, len, 1, stderr); + } } -static void print_packet(struct utcp *utcp, const char *dir, const void *pkt, size_t len) { +static void print_packet(struct utcp_connection *c, const char *dir, const void *pkt, size_t len) { struct hdr hdr; if(len < sizeof(hdr)) { - debug("%p %s: short packet (%lu bytes)\n", utcp, dir, (unsigned long)len); + debug(c, "%s: short packet (%lu bytes)\n", dir, (unsigned long)len); return; } memcpy(&hdr, pkt, sizeof(hdr)); - debug("%p %s: len=%lu, src=%u dst=%u seq=%u ack=%u wnd=%u aux=%x ctl=", utcp, dir, (unsigned long)len, hdr.src, hdr.dst, hdr.seq, hdr.ack, hdr.wnd, hdr.aux); - if(hdr.ctl & SYN) { - debug("SYN"); - } - - if(hdr.ctl & RST) { - debug("RST"); - } + uint32_t datalen; - if(hdr.ctl & FIN) { - debug("FIN"); + if(len > sizeof(hdr)) { + datalen = min(len - sizeof(hdr), UTCP_DEBUG_DATALEN); + } else { + datalen = 0; } - if(hdr.ctl & ACK) { - debug("ACK"); - } - if(len > sizeof(hdr)) { - uint32_t datalen = len - sizeof(hdr); - const uint8_t *data = (uint8_t *)pkt + sizeof(hdr); - char str[datalen * 2 + 1]; - char *p = str; + const uint8_t *data = (uint8_t *)pkt + sizeof(hdr); + char str[datalen * 2 + 1]; + char *p = str; - for(uint32_t i = 0; i < datalen; i++) { - *p++ = "0123456789ABCDEF"[data[i] >> 4]; - *p++ = "0123456789ABCDEF"[data[i] & 15]; - } + for(uint32_t i = 0; i < datalen; i++) { + *p++ = "0123456789ABCDEF"[data[i] >> 4]; + *p++ = "0123456789ABCDEF"[data[i] & 15]; + } - *p = 0; + *p = 0; - debug(" data=%s", str); - } + debug(c, "%s: len %lu src %u dst %u seq %u ack %u wnd %u aux %x ctl %s%s%s%s data %s\n", + dir, (unsigned long)len, hdr.src, hdr.dst, hdr.seq, hdr.ack, hdr.wnd, hdr.aux, + hdr.ctl & SYN ? "SYN" : "", + hdr.ctl & RST ? "RST" : "", + hdr.ctl & FIN ? "FIN" : "", + hdr.ctl & ACK ? "ACK" : "", + str + ); +} - debug("\n"); +static void debug_cwnd(struct utcp_connection *c) { + debug(c, "snd.cwnd %u snd.ssthresh %u\n", c->snd.cwnd, ~c->snd.ssthresh ? c->snd.ssthresh : 0); } #else #define debug(...) do {} while(0) #define print_packet(...) do {} while(0) +#define debug_cwnd(...) do {} while(0) #endif static void set_state(struct utcp_connection *c, enum state state) { @@ -125,7 +143,7 @@ static void set_state(struct utcp_connection *c, enum state state) { timerclear(&c->conn_timeout); } - debug("%p new state: %s\n", c->utcp, strstate[state]); + debug(c, "state %s\n", strstate[state]); } static bool fin_wanted(struct utcp_connection *c, uint32_t seq) { @@ -157,7 +175,7 @@ static int32_t seqdiff(uint32_t a, uint32_t b) { // Store data into the buffer static ssize_t buffer_put_at(struct buffer *buf, size_t offset, const void *data, size_t len) { - debug("buffer_put_at %lu %lu %lu\n", (unsigned long)buf->used, (unsigned long)offset, (unsigned long)len); + debug(NULL, "buffer_put_at %lu %lu %lu\n", (unsigned long)buf->used, (unsigned long)offset, (unsigned long)len); size_t required = offset + len; @@ -382,9 +400,10 @@ static struct utcp_connection *allocate_connection(struct utcp *utcp, uint16_t s #endif c->snd.una = c->snd.iss; c->snd.nxt = c->snd.iss + 1; - c->rcv.wnd = utcp->mtu; c->snd.last = c->snd.nxt; - c->snd.cwnd = utcp->mtu; + c->snd.cwnd = (utcp->mtu > 2190 ? 2 : utcp->mtu > 1095 ? 3 : 4) * utcp->mtu; + c->snd.ssthresh = ~0; + debug_cwnd(c); c->utcp = utcp; // Add it to the sorted list of connections @@ -406,7 +425,7 @@ static inline uint32_t absdiff(uint32_t a, uint32_t b) { // Update RTT variables. See RFC 6298. static void update_rtt(struct utcp_connection *c, uint32_t rtt) { if(!rtt) { - debug("invalid rtt\n"); + debug(c, "invalid rtt\n"); return; } @@ -415,18 +434,18 @@ static void update_rtt(struct utcp_connection *c, uint32_t rtt) { if(!utcp->srtt) { utcp->srtt = rtt; utcp->rttvar = rtt / 2; - utcp->rto = rtt + max(2 * rtt, CLOCK_GRANULARITY); } else { utcp->rttvar = (utcp->rttvar * 3 + absdiff(utcp->srtt, rtt)) / 4; utcp->srtt = (utcp->srtt * 7 + rtt) / 8; - utcp->rto = utcp->srtt + max(utcp->rttvar, CLOCK_GRANULARITY); } + utcp->rto = utcp->srtt + max(4 * utcp->rttvar, CLOCK_GRANULARITY); + if(utcp->rto > MAX_RTO) { utcp->rto = MAX_RTO; } - debug("rtt %u srtt %u rttvar %u rto %u\n", rtt, utcp->srtt, utcp->rttvar, utcp->rto); + debug(c, "rtt %u srtt %u rttvar %u rto %u\n", rtt, utcp->srtt, utcp->rttvar, utcp->rto); } static void start_retransmit_timer(struct utcp_connection *c) { @@ -438,12 +457,12 @@ static void start_retransmit_timer(struct utcp_connection *c) { c->rtrx_timeout.tv_sec++; } - debug("timeout set to %lu.%06lu (%u)\n", c->rtrx_timeout.tv_sec, c->rtrx_timeout.tv_usec, c->utcp->rto); + debug(c, "rtrx_timeout %ld.%06lu\n", c->rtrx_timeout.tv_sec, c->rtrx_timeout.tv_usec); } static void stop_retransmit_timer(struct utcp_connection *c) { timerclear(&c->rtrx_timeout); - debug("timeout cleared\n"); + debug(c, "rtrx_timeout cleared\n"); } struct utcp_connection *utcp_connect_ex(struct utcp *utcp, uint16_t dst, utcp_recv_t recv, void *priv, uint32_t flags) { @@ -453,7 +472,7 @@ struct utcp_connection *utcp_connect_ex(struct utcp *utcp, uint16_t dst, utcp_re return NULL; } - assert((flags & ~0xf) == 0); + assert((flags & ~0x1f) == 0); c->flags = flags; c->recv = recv; @@ -468,7 +487,7 @@ struct utcp_connection *utcp_connect_ex(struct utcp *utcp, uint16_t dst, utcp_re pkt.hdr.dst = c->dst; pkt.hdr.seq = c->snd.iss; pkt.hdr.ack = 0; - pkt.hdr.wnd = c->rcv.wnd; + pkt.hdr.wnd = c->rcvbuf.maxsize; pkt.hdr.ctl = SYN; pkt.hdr.aux = 0x0101; pkt.init[0] = 1; @@ -478,7 +497,7 @@ struct utcp_connection *utcp_connect_ex(struct utcp *utcp, uint16_t dst, utcp_re set_state(c, SYN_SENT); - print_packet(utcp, "send", &pkt, sizeof(pkt)); + print_packet(c, "send", &pkt, sizeof(pkt)); utcp->send(utcp, &pkt, sizeof(pkt)); gettimeofday(&c->conn_timeout, NULL); @@ -495,11 +514,11 @@ struct utcp_connection *utcp_connect(struct utcp *utcp, uint16_t dst, utcp_recv_ void utcp_accept(struct utcp_connection *c, utcp_recv_t recv, void *priv) { if(c->reapable || c->state != SYN_RECEIVED) { - debug("Error: accept() called on invalid connection %p in state %s\n", c, strstate[c->state]); + debug(c, "accept() called on invalid connection in state %s\n", c, strstate[c->state]); return; } - debug("%p accepted, %p %p\n", c, recv, priv); + debug(c, "accepted %p %p\n", c, recv, priv); c->recv = recv; c->priv = priv; set_state(c, ESTABLISHED); @@ -507,19 +526,22 @@ void utcp_accept(struct utcp_connection *c, utcp_recv_t recv, void *priv) { static void ack(struct utcp_connection *c, bool sendatleastone) { int32_t left = seqdiff(c->snd.last, c->snd.nxt); - int32_t cwndleft = c->snd.cwnd - seqdiff(c->snd.nxt, c->snd.una); - debug("cwndleft = %d\n", cwndleft); + int32_t cwndleft = min(c->snd.cwnd, c->snd.wnd) - seqdiff(c->snd.nxt, c->snd.una); assert(left >= 0); if(cwndleft <= 0) { - cwndleft = 0; - } - - if(cwndleft < left) { + left = 0; + } else if(cwndleft < left) { left = cwndleft; + + if(!sendatleastone || cwndleft > c->utcp->mtu) { + left -= left % c->utcp->mtu; + } } + debug(c, "cwndleft %d left %d\n", cwndleft, left); + if(!left && !sendatleastone) { return; } @@ -538,7 +560,7 @@ static void ack(struct utcp_connection *c, bool sendatleastone) { pkt->hdr.src = c->src; pkt->hdr.dst = c->dst; pkt->hdr.ack = c->rcv.nxt; - pkt->hdr.wnd = c->snd.wnd; + pkt->hdr.wnd = c->rcvbuf.maxsize; pkt->hdr.ctl = ACK; pkt->hdr.aux = 0; @@ -560,10 +582,10 @@ static void ack(struct utcp_connection *c, bool sendatleastone) { // Start RTT measurement gettimeofday(&c->rtt_start, NULL); c->rtt_seq = pkt->hdr.seq + seglen; - debug("Starting RTT measurement, expecting ack %u\n", c->rtt_seq); + debug(c, "starting RTT measurement, expecting ack %u\n", c->rtt_seq); } - print_packet(c->utcp, "send", pkt, sizeof(pkt->hdr) + seglen); + print_packet(c, "send", pkt, sizeof(pkt->hdr) + seglen); c->utcp->send(c->utcp, pkt, sizeof(pkt->hdr) + seglen); } while(left); @@ -572,7 +594,7 @@ static void ack(struct utcp_connection *c, bool sendatleastone) { ssize_t utcp_send(struct utcp_connection *c, const void *data, size_t len) { if(c->reapable) { - debug("Error: send() called on closed connection %p\n", c); + debug(c, "send() called on closed connection\n"); errno = EBADF; return -1; } @@ -580,7 +602,7 @@ ssize_t utcp_send(struct utcp_connection *c, const void *data, size_t len) { switch(c->state) { case CLOSED: case LISTEN: - debug("Error: send() called on unconnected connection %p\n", c); + debug(c, "send() called on unconnected connection\n"); errno = ENOTCONN; return -1; @@ -595,7 +617,7 @@ ssize_t utcp_send(struct utcp_connection *c, const void *data, size_t len) { case CLOSING: case LAST_ACK: case TIME_WAIT: - debug("Error: send() called on closing connection %p\n", c); + debug(c, "send() called on closed connection\n"); errno = EPIPE; return -1; } @@ -611,13 +633,35 @@ ssize_t utcp_send(struct utcp_connection *c, const void *data, size_t len) { return -1; } + // Check if we need to be able to buffer all data + + if(c->flags & UTCP_NO_PARTIAL) { + if(len > buffer_free(&c->sndbuf)) { + if(len > c->sndbuf.maxsize) { + errno = EMSGSIZE; + return -1; + } else { + errno = EWOULDBLOCK; + return 0; + } + } + } + // Add data to send buffer. - len = buffer_put(&c->sndbuf, data, len); + if(is_reliable(c) || (c->state != SYN_SENT && c->state != SYN_RECEIVED)) { + len = buffer_put(&c->sndbuf, data, len); + } else { + return 0; + } if(len <= 0) { - errno = EWOULDBLOCK; - return 0; + if(is_reliable(c)) { + errno = EWOULDBLOCK; + return 0; + } else { + return len; + } } c->snd.last += len; @@ -639,6 +683,11 @@ ssize_t utcp_send(struct utcp_connection *c, const void *data, size_t len) { start_retransmit_timer(c); } + if(is_reliable(c) && !timerisset(&c->conn_timeout)) { + gettimeofday(&c->conn_timeout, NULL); + c->conn_timeout.tv_sec += c->utcp->timeout; + } + return len; } @@ -648,9 +697,62 @@ static void swap_ports(struct hdr *hdr) { hdr->dst = tmp; } +static void fast_retransmit(struct utcp_connection *c) { + if(c->state == CLOSED || c->snd.last == c->snd.una) { + debug(c, "fast_retransmit() called but nothing to retransmit!\n"); + return; + } + + struct utcp *utcp = c->utcp; + + struct { + struct hdr hdr; + uint8_t data[]; + } *pkt; + + pkt = malloc(sizeof(pkt->hdr) + c->utcp->mtu); + + if(!pkt) { + return; + } + + pkt->hdr.src = c->src; + pkt->hdr.dst = c->dst; + pkt->hdr.wnd = c->rcvbuf.maxsize; + pkt->hdr.aux = 0; + + switch(c->state) { + case ESTABLISHED: + case FIN_WAIT_1: + case CLOSE_WAIT: + case CLOSING: + case LAST_ACK: + // Send unacked data again. + pkt->hdr.seq = c->snd.una; + pkt->hdr.ack = c->rcv.nxt; + pkt->hdr.ctl = ACK; + uint32_t len = min(seqdiff(c->snd.last, c->snd.una), utcp->mtu); + + if(fin_wanted(c, c->snd.una + len)) { + len--; + pkt->hdr.ctl |= FIN; + } + + buffer_copy(&c->sndbuf, pkt->data, 0, len); + print_packet(c, "rtrx", pkt, sizeof(pkt->hdr) + len); + utcp->send(utcp, pkt, sizeof(pkt->hdr) + len); + break; + + default: + break; + } + + free(pkt); +} + static void retransmit(struct utcp_connection *c) { if(c->state == CLOSED || c->snd.last == c->snd.una) { - debug("Retransmit() called but nothing to retransmit!\n"); + debug(c, "retransmit() called but nothing to retransmit!\n"); stop_retransmit_timer(c); return; } @@ -670,7 +772,7 @@ static void retransmit(struct utcp_connection *c) { pkt->hdr.src = c->src; pkt->hdr.dst = c->dst; - pkt->hdr.wnd = c->rcv.wnd; + pkt->hdr.wnd = c->rcvbuf.maxsize; pkt->hdr.aux = 0; switch(c->state) { @@ -684,7 +786,7 @@ static void retransmit(struct utcp_connection *c) { pkt->data[1] = 0; pkt->data[2] = 0; pkt->data[3] = c->flags & 0x7; - print_packet(c->utcp, "rtrx", pkt, sizeof(pkt->hdr) + 4); + print_packet(c, "rtrx", pkt, sizeof(pkt->hdr) + 4); utcp->send(utcp, pkt, sizeof(pkt->hdr) + 4); break; @@ -693,7 +795,7 @@ static void retransmit(struct utcp_connection *c) { pkt->hdr.seq = c->snd.nxt; pkt->hdr.ack = c->rcv.nxt; pkt->hdr.ctl = SYN | ACK; - print_packet(c->utcp, "rtrx", pkt, sizeof(pkt->hdr)); + print_packet(c, "rtrx", pkt, sizeof(pkt->hdr)); utcp->send(utcp, pkt, sizeof(pkt->hdr)); break; @@ -718,9 +820,14 @@ static void retransmit(struct utcp_connection *c) { } c->snd.nxt = c->snd.una + len; - c->snd.cwnd = utcp->mtu; // reduce cwnd on retransmit + + // RFC 5681 slow start after timeout + c->snd.ssthresh = max(c->snd.cwnd / 2, utcp->mtu * 2); // eq. 4 + c->snd.cwnd = utcp->mtu; + debug_cwnd(c); + buffer_copy(&c->sndbuf, pkt->data, 0, len); - print_packet(c->utcp, "rtrx", pkt, sizeof(pkt->hdr) + len); + print_packet(c, "rtrx", pkt, sizeof(pkt->hdr) + len); utcp->send(utcp, pkt, sizeof(pkt->hdr) + len); break; @@ -768,10 +875,10 @@ cleanup: * - the SACK entry is completely before ^, in that case delete it. */ static void sack_consume(struct utcp_connection *c, size_t len) { - debug("sack_consume %lu\n", (unsigned long)len); + debug(c, "sack_consume %lu\n", (unsigned long)len); if(len > c->rcvbuf.used) { - debug("All SACK entries consumed"); + debug(c, "all SACK entries consumed\n"); c->sacks[0].len = 0; return; } @@ -798,12 +905,12 @@ static void sack_consume(struct utcp_connection *c, size_t len) { } for(int i = 0; i < NSACKS && c->sacks[i].len; i++) { - debug("SACK[%d] offset %u len %u\n", i, c->sacks[i].offset, c->sacks[i].len); + debug(c, "SACK[%d] offset %u len %u\n", i, c->sacks[i].offset, c->sacks[i].len); } } static void handle_out_of_order(struct utcp_connection *c, uint32_t offset, const void *data, size_t len) { - debug("out of order packet, offset %u\n", offset); + debug(c, "out of order packet, offset %u\n", offset); // Packet loss or reordering occured. Store the data in the buffer. ssize_t rxd = buffer_put_at(&c->rcvbuf, offset, data, len); @@ -814,30 +921,30 @@ static void handle_out_of_order(struct utcp_connection *c, uint32_t offset, cons // Make note of where we put it. for(int i = 0; i < NSACKS; i++) { if(!c->sacks[i].len) { // nothing to merge, add new entry - debug("New SACK entry %d\n", i); + debug(c, "new SACK entry %d\n", i); c->sacks[i].offset = offset; c->sacks[i].len = rxd; break; } else if(offset < c->sacks[i].offset) { if(offset + rxd < c->sacks[i].offset) { // insert before if(!c->sacks[NSACKS - 1].len) { // only if room left - debug("Insert SACK entry at %d\n", i); + debug(c, "insert SACK entry at %d\n", i); memmove(&c->sacks[i + 1], &c->sacks[i], (NSACKS - i - 1) * sizeof(c->sacks)[i]); c->sacks[i].offset = offset; c->sacks[i].len = rxd; } else { - debug("SACK entries full, dropping packet\n"); + debug(c, "SACK entries full, dropping packet\n"); } break; } else { // merge - debug("Merge with start of SACK entry at %d\n", i); + debug(c, "merge with start of SACK entry at %d\n", i); c->sacks[i].offset = offset; break; } } else if(offset <= c->sacks[i].offset + c->sacks[i].len) { if(offset + rxd > c->sacks[i].offset + c->sacks[i].len) { // merge - debug("Merge with end of SACK entry at %d\n", i); + debug(c, "merge with end of SACK entry at %d\n", i); c->sacks[i].len = offset + rxd - c->sacks[i].offset; // TODO: handle potential merge with next entry } @@ -847,14 +954,14 @@ static void handle_out_of_order(struct utcp_connection *c, uint32_t offset, cons } for(int i = 0; i < NSACKS && c->sacks[i].len; i++) { - debug("SACK[%d] offset %u len %u\n", i, c->sacks[i].offset, c->sacks[i].len); + debug(c, "SACK[%d] offset %u len %u\n", i, c->sacks[i].offset, c->sacks[i].len); } } static void handle_in_order(struct utcp_connection *c, const void *data, size_t len) { // Check if we can process out-of-order data now. if(c->sacks[0].len && len >= c->sacks[0].offset) { // TODO: handle overlap with second SACK - debug("incoming packet len %lu connected with SACK at %u\n", (unsigned long)len, c->sacks[0].offset); + debug(c, "incoming packet len %lu connected with SACK at %u\n", (unsigned long)len, c->sacks[0].offset); buffer_put_at(&c->rcvbuf, 0, data, len); // TODO: handle return value len = max(len, c->sacks[0].offset + c->sacks[0].len); data = c->rcvbuf.data; @@ -915,13 +1022,12 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { return -1; } - print_packet(utcp, "recv", data, len); - // Drop packets smaller than the header struct hdr hdr; if(len < sizeof(hdr)) { + print_packet(NULL, "recv", data, len); errno = EBADMSG; return -1; } @@ -929,12 +1035,21 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { // Make a copy from the potentially unaligned data to a struct hdr memcpy(&hdr, ptr, sizeof(hdr)); + + // Try to match the packet to an existing connection + + struct utcp_connection *c = find_connection(utcp, hdr.dst, hdr.src); + print_packet(c, "recv", data, len); + + // Process the header + ptr += sizeof(hdr); len -= sizeof(hdr); // Drop packets with an unknown CTL flag if(hdr.ctl & ~(SYN | ACK | RST | FIN)) { + print_packet(NULL, "recv", data, len); errno = EBADMSG; return -1; } @@ -986,9 +1101,7 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { ptr += 2; } - // Try to match the packet to an existing connection - - struct utcp_connection *c = find_connection(utcp, hdr.dst, hdr.src); + bool has_data = len || (hdr.ctl & (SYN | FIN)); // Is it for a new connection? @@ -1028,6 +1141,7 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { c->flags = UTCP_TCP; } +synack: // Return SYN+ACK, go to SYN_RECEIVED state c->snd.wnd = hdr.wnd; c->rcv.irs = hdr.seq; @@ -1043,7 +1157,7 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { pkt.hdr.dst = c->dst; pkt.hdr.ack = c->rcv.irs + 1; pkt.hdr.seq = c->snd.iss; - pkt.hdr.wnd = c->rcv.wnd; + pkt.hdr.wnd = c->rcvbuf.maxsize; pkt.hdr.ctl = SYN | ACK; if(init) { @@ -1052,11 +1166,11 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { pkt.data[1] = 0; pkt.data[2] = 0; pkt.data[3] = c->flags & 0x7; - print_packet(c->utcp, "send", &pkt, sizeof(hdr) + 4); + print_packet(c, "send", &pkt, sizeof(hdr) + 4); utcp->send(utcp, &pkt, sizeof(hdr) + 4); } else { pkt.hdr.aux = 0; - print_packet(c->utcp, "send", &pkt, sizeof(hdr)); + print_packet(c, "send", &pkt, sizeof(hdr)); utcp->send(utcp, &pkt, sizeof(hdr)); } } else { @@ -1068,20 +1182,18 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { return 0; } - debug("%p state %s\n", c->utcp, strstate[c->state]); + debug(c, "state %s\n", strstate[c->state]); // In case this is for a CLOSED connection, ignore the packet. // TODO: make it so incoming packets can never match a CLOSED connection. if(c->state == CLOSED) { - debug("Got packet for closed connection\n"); + debug(c, "got packet for closed connection\n"); return 0; } // It is for an existing connection. - uint32_t prevrcvnxt = c->rcv.nxt; - // 1. Drop invalid packets. // 1a. Drop packets that should not happen in our current state. @@ -1105,41 +1217,43 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { break; } - // 1b. Drop packets with a sequence number not in our receive window. + // 1b. Discard data that is not in our receive window. - bool acceptable; + if(is_reliable(c)) { + bool acceptable; - if(c->state == SYN_SENT) { - acceptable = true; - } else if(len == 0) { - acceptable = seqdiff(hdr.seq, c->rcv.nxt) >= 0; - } else { - int32_t rcv_offset = seqdiff(hdr.seq, c->rcv.nxt); + if(c->state == SYN_SENT) { + acceptable = true; + } else if(len == 0) { + acceptable = seqdiff(hdr.seq, c->rcv.nxt) >= 0; + } else { + int32_t rcv_offset = seqdiff(hdr.seq, c->rcv.nxt); - // cut already accepted front overlapping - if(rcv_offset < 0) { - acceptable = len > (size_t) - rcv_offset; + // cut already accepted front overlapping + if(rcv_offset < 0) { + acceptable = len > (size_t) - rcv_offset; - if(acceptable) { - ptr -= rcv_offset; - len += rcv_offset; - hdr.seq -= rcv_offset; + if(acceptable) { + ptr -= rcv_offset; + len += rcv_offset; + hdr.seq -= rcv_offset; + } + } else { + acceptable = seqdiff(hdr.seq, c->rcv.nxt) >= 0 && seqdiff(hdr.seq, c->rcv.nxt) + len <= c->rcvbuf.maxsize; } - } else { - acceptable = seqdiff(hdr.seq, c->rcv.nxt) >= 0 && seqdiff(hdr.seq, c->rcv.nxt) + len <= c->rcvbuf.maxsize; } - } - if(!acceptable) { - debug("Packet not acceptable, %u <= %u + %lu < %u\n", c->rcv.nxt, hdr.seq, (unsigned long)len, c->rcv.nxt + c->rcvbuf.maxsize); + if(!acceptable) { + debug(c, "packet not acceptable, %u <= %u + %lu < %u\n", c->rcv.nxt, hdr.seq, (unsigned long)len, c->rcv.nxt + c->rcvbuf.maxsize); - // Ignore unacceptable RST packets. - if(hdr.ctl & RST) { - return 0; - } + // Ignore unacceptable RST packets. + if(hdr.ctl & RST) { + return 0; + } - // Otherwise, continue processing. - len = 0; + // Otherwise, continue processing. + len = 0; + } } c->snd.wnd = hdr.wnd; // TODO: move below @@ -1148,8 +1262,14 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { // ackno should not roll back, and it should also not be bigger than what we ever could have sent // (= snd.una + c->sndbuf.used). + if(!is_reliable(c)) { + if(hdr.ack != c->snd.last && c->state >= ESTABLISHED) { + hdr.ack = c->snd.una; + } + } + if(hdr.ctl & ACK && (seqdiff(hdr.ack, c->snd.last) > 0 || seqdiff(hdr.ack, c->snd.una) < 0)) { - debug("Packet ack seqno out of range, %u <= %u < %u\n", c->snd.una, hdr.ack, c->snd.una + c->sndbuf.used); + debug(c, "packet ack seqno out of range, %u <= %u < %u\n", c->snd.una, hdr.ack, c->snd.una + c->sndbuf.used); // Ignore unacceptable RST packets. if(hdr.ctl & RST) { @@ -1176,6 +1296,10 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { c->recv(c, NULL, 0); } + if(c->poll && !c->reapable) { + c->poll(c, 0); + } + return 0; case SYN_RECEIVED: @@ -1203,6 +1327,10 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { c->recv(c, NULL, 0); } + if(c->poll && !c->reapable) { + c->poll(c, 0); + } + return 0; case CLOSING: @@ -1241,7 +1369,6 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { // 3. Advance snd.una advanced = seqdiff(hdr.ack, c->snd.una); - prevrcvnxt = c->rcv.nxt; if(advanced) { // RTT measurement @@ -1253,7 +1380,7 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { update_rtt(c, diff.tv_sec * 1000000 + diff.tv_usec); c->rtt_start.tv_sec = 0; } else if(c->rtt_seq < hdr.ack) { - debug("Cancelling RTT measurement: %u < %u\n", c->rtt_seq, hdr.ack); + debug(c, "cancelling RTT measurement: %u < %u\n", c->rtt_seq, hdr.ack); c->rtt_start.tv_sec = 0; } } @@ -1273,8 +1400,10 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { assert(data_acked >= 0); +#ifndef NDEBUG int32_t bufused = seqdiff(c->snd.last, c->snd.una); assert(data_acked <= bufused); +#endif if(data_acked) { buffer_get(&c->sndbuf, NULL, data_acked); @@ -1287,13 +1416,28 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { c->snd.una = hdr.ack; - c->dupack = 0; - c->snd.cwnd += utcp->mtu; + if(c->dupack) { + if(c->dupack >= 3) { + debug(c, "fast recovery ended\n"); + c->snd.cwnd = c->snd.ssthresh; + } + + c->dupack = 0; + } + + // Increase the congestion window according to RFC 5681 + if(c->snd.cwnd < c->snd.ssthresh) { + c->snd.cwnd += min(advanced, utcp->mtu); // eq. 2 + } else { + c->snd.cwnd += max(1, (utcp->mtu * utcp->mtu) / c->snd.cwnd); // eq. 3 + } if(c->snd.cwnd > c->sndbuf.maxsize) { c->snd.cwnd = c->sndbuf.maxsize; } + debug_cwnd(c); + // Check if we have sent a FIN that is now ACKed. switch(c->state) { case FIN_WAIT_1: @@ -1306,7 +1450,7 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { case CLOSING: if(c->snd.una == c->snd.last) { gettimeofday(&c->conn_timeout, NULL); - c->conn_timeout.tv_sec += 60; + c->conn_timeout.tv_sec += utcp->timeout; set_state(c, TIME_WAIT); } @@ -1318,15 +1462,29 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { } else { if(!len && is_reliable(c)) { c->dupack++; + debug(c, "duplicate ACK %d\n", c->dupack); if(c->dupack == 3) { - debug("Triplicate ACK\n"); - //TODO: Resend one packet and go to fast recovery mode. See RFC 6582. - //We do a very simple variant here; reset the nxt pointer to the last acknowledged packet from the peer. - //Reset the congestion window so we wait for ACKs. - c->snd.nxt = c->snd.una; - c->snd.cwnd = utcp->mtu; - start_retransmit_timer(c); + // RFC 5681 fast recovery + debug(c, "fast recovery started\n", c->dupack); + c->snd.ssthresh = max(c->snd.cwnd / 2, utcp->mtu * 2); // eq. 4 + c->snd.cwnd = min(c->snd.ssthresh + 3 * utcp->mtu, c->sndbuf.maxsize); + + if(c->snd.cwnd > c->sndbuf.maxsize) { + c->snd.cwnd = c->sndbuf.maxsize; + } + + debug_cwnd(c); + + fast_retransmit(c); + } else if(c->dupack > 3) { + c->snd.cwnd += utcp->mtu; + + if(c->snd.cwnd > c->sndbuf.maxsize) { + c->snd.cwnd = c->sndbuf.maxsize; + } + + debug_cwnd(c); } } } @@ -1334,12 +1492,13 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { // 4. Update timers if(advanced) { - timerclear(&c->conn_timeout); // It will be set anew in utcp_timeout() if c->snd.una != c->snd.nxt. - if(c->snd.una == c->snd.last) { stop_retransmit_timer(c); + timerclear(&c->conn_timeout); } else if(is_reliable(c)) { start_retransmit_timer(c); + gettimeofday(&c->conn_timeout, NULL); + c->conn_timeout.tv_sec += utcp->timeout; } } @@ -1369,6 +1528,9 @@ skip_ack: break; case SYN_RECEIVED: + // This is a retransmit of a SYN, send back the SYNACK. + goto synack; + case ESTABLISHED: case FIN_WAIT_1: case FIN_WAIT_2: @@ -1444,7 +1606,7 @@ skip_ack: // 7. Process FIN stuff - if((hdr.ctl & FIN) && hdr.seq + len == c->rcv.nxt) { + if((hdr.ctl & FIN) && (!is_reliable(c) || hdr.seq + len == c->rcv.nxt)) { switch(c->state) { case SYN_SENT: case SYN_RECEIVED: @@ -1464,7 +1626,7 @@ skip_ack: case FIN_WAIT_2: gettimeofday(&c->conn_timeout, NULL); - c->conn_timeout.tv_sec += 60; + c->conn_timeout.tv_sec += utcp->timeout; set_state(c, TIME_WAIT); break; @@ -1486,7 +1648,7 @@ skip_ack: c->rcv.nxt++; len++; - // Inform the application that the peer closed the connection. + // Inform the application that the peer closed its end of the connection. if(c->recv) { errno = 0; c->recv(c, NULL, 0); @@ -1494,12 +1656,15 @@ skip_ack: } // Now we send something back if: - // - we advanced rcv.nxt (ie, we got some data that needs to be ACKed) + // - we received data, so we have to send back an ACK // -> sendatleastone = true // - or we got an ack, so we should maybe send a bit more data // -> sendatleastone = false - ack(c, len || prevrcvnxt != c->rcv.nxt); + if(is_reliable(c) || hdr.ctl & SYN || hdr.ctl & FIN) { + ack(c, has_data); + } + return 0; reset: @@ -1516,14 +1681,14 @@ reset: hdr.ctl = RST | ACK; } - print_packet(utcp, "send", &hdr, sizeof(hdr)); + print_packet(c, "send", &hdr, sizeof(hdr)); utcp->send(utcp, &hdr, sizeof(hdr)); return 0; } int utcp_shutdown(struct utcp_connection *c, int dir) { - debug("%p shutdown %d at %u\n", c ? c->utcp : NULL, dir, c ? c->snd.last : 0); + debug(c, "shutdown %d at %u\n", dir, c ? c->snd.last : 0); if(!c) { errno = EFAULT; @@ -1531,7 +1696,7 @@ int utcp_shutdown(struct utcp_connection *c, int dir) { } if(c->reapable) { - debug("Error: shutdown() called on closed connection %p\n", c); + debug(c, "shutdown() called on closed connection\n"); errno = EBADF; return -1; } @@ -1605,7 +1770,7 @@ static bool reset_connection(struct utcp_connection *c) { } if(c->reapable) { - debug("Error: abort() called on closed connection %p\n", c); + debug(c, "abort() called on closed connection\n"); errno = EBADF; return false; } @@ -1645,7 +1810,7 @@ static bool reset_connection(struct utcp_connection *c) { hdr.wnd = 0; hdr.ctl = RST; - print_packet(c->utcp, "send", &hdr, sizeof(hdr)); + print_packet(c, "send", &hdr, sizeof(hdr)); c->utcp->send(c->utcp, &hdr, sizeof(hdr)); return true; } @@ -1664,11 +1829,19 @@ void utcp_abort_all_connections(struct utcp *utcp) { continue; } + utcp_recv_t old_recv = c->recv; + utcp_poll_t old_poll = c->poll; + reset_connection(c); - if(c->recv) { + if(old_recv) { errno = 0; - c->recv(c, NULL, 0); + old_recv(c, NULL, 0); + } + + if(old_poll && !c->reapable) { + errno = 0; + old_poll(c, 0); } } @@ -1716,7 +1889,7 @@ struct timeval utcp_timeout(struct utcp *utcp) { // delete connections that have been utcp_close()d. if(c->state == CLOSED) { if(c->reapable) { - debug("Reaping %p\n", c); + debug(c, "reaping\n"); free_connection(c); i--; } @@ -1732,11 +1905,15 @@ struct timeval utcp_timeout(struct utcp *utcp) { c->recv(c, NULL, 0); } + if(c->poll && !c->reapable) { + c->poll(c, 0); + } + continue; } if(timerisset(&c->rtrx_timeout) && timercmp(&c->rtrx_timeout, &now, <)) { - debug("retransmit()\n"); + debug(c, "retransmitting after timeout\n"); retransmit(c); } @@ -1812,11 +1989,16 @@ void utcp_exit(struct utcp *utcp) { for(int i = 0; i < utcp->nconnections; i++) { struct utcp_connection *c = utcp->connections[i]; - if(!c->reapable) + if(!c->reapable) { if(c->recv) { c->recv(c, NULL, 0); } + if(c->poll && !c->reapable) { + c->poll(c, 0); + } + } + buffer_exit(&c->rcvbuf); buffer_exit(&c->sndbuf); free(c); @@ -1851,9 +2033,21 @@ void utcp_reset_timers(struct utcp *utcp) { then.tv_sec += utcp->timeout; for(int i = 0; i < utcp->nconnections; i++) { - utcp->connections[i]->rtrx_timeout = now; - utcp->connections[i]->conn_timeout = then; - utcp->connections[i]->rtt_start.tv_sec = 0; + struct utcp_connection *c = utcp->connections[i]; + + if(c->reapable) { + continue; + } + + if(timerisset(&c->rtrx_timeout)) { + c->rtrx_timeout = now; + } + + if(timerisset(&c->conn_timeout)) { + c->conn_timeout = then; + } + + c->rtt_start.tv_sec = 0; } if(utcp->rto > START_RTO) { @@ -1928,6 +2122,14 @@ void utcp_set_rcvbuf(struct utcp_connection *c, size_t size) { } } +size_t utcp_get_sendq(struct utcp_connection *c) { + return c->sndbuf.used; +} + +size_t utcp_get_recvq(struct utcp_connection *c) { + return c->rcvbuf.used; +} + bool utcp_get_nodelay(struct utcp_connection *c) { return c ? c->nodelay : false; } @@ -1970,3 +2172,53 @@ void utcp_set_accept_cb(struct utcp *utcp, utcp_accept_t accept, utcp_pre_accept utcp->pre_accept = pre_accept; } } + +void utcp_expect_data(struct utcp_connection *c, bool expect) { + if(!c || c->reapable) { + return; + } + + if(!(c->state == ESTABLISHED || c->state == FIN_WAIT_1 || c->state == FIN_WAIT_2)) { + return; + } + + if(expect) { + // If we expect data, start the connection timer. + if(!timerisset(&c->conn_timeout)) { + gettimeofday(&c->conn_timeout, NULL); + c->conn_timeout.tv_sec += c->utcp->timeout; + } + } else { + // If we want to cancel expecting data, only clear the timer when there is no unACKed data. + if(c->snd.una == c->snd.last) { + timerclear(&c->conn_timeout); + } + } +} + +void utcp_offline(struct utcp *utcp, bool offline) { + struct timeval now; + gettimeofday(&now, NULL); + + for(int i = 0; i < utcp->nconnections; i++) { + struct utcp_connection *c = utcp->connections[i]; + + if(c->reapable) { + continue; + } + + utcp_expect_data(c, offline); + + if(!offline) { + if(timerisset(&c->rtrx_timeout)) { + c->rtrx_timeout = now; + } + + utcp->connections[i]->rtt_start.tv_sec = 0; + } + } + + if(!offline && utcp->rto > START_RTO) { + utcp->rto = START_RTO; + } +}