X-Git-Url: http://git.meshlink.io/?p=utcp;a=blobdiff_plain;f=utcp.c;h=c650ab700ad150fa876b0944a6872ea39298f352;hp=f062319fa33a1daa665faaa96f3c567eb488f245;hb=5ded633782709710c42648c71154d6062e83b7a6;hpb=2ccd5cc7da5f6157beca8cfe9dbf115e95a99776 diff --git a/utcp.c b/utcp.c index f062319..c650ab7 100644 --- a/utcp.c +++ b/utcp.c @@ -49,10 +49,14 @@ (r)->tv_sec = (a)->tv_sec - (b)->tv_sec;\ (r)->tv_usec = (a)->tv_usec - (b)->tv_usec;\ if((r)->tv_usec < 0)\ - (r)->tv_sec--, (r)->tv_usec += 1000000;\ + (r)->tv_sec--, (r)->tv_usec += USEC_PER_SEC;\ } while (0) #endif +static inline size_t max(size_t a, size_t b) { + return a > b ? a : b; +} + #ifdef UTCP_DEBUG #include @@ -66,12 +70,12 @@ static void debug(const char *format, ...) { static void print_packet(struct utcp *utcp, const char *dir, const void *pkt, size_t len) { struct hdr hdr; if(len < sizeof hdr) { - debug("%p %s: short packet (%zu bytes)\n", utcp, dir, len); + debug("%p %s: short packet (%lu bytes)\n", utcp, dir, (unsigned long)len); return; } memcpy(&hdr, pkt, sizeof hdr); - fprintf (stderr, "%p %s: len=%zu, src=%u dst=%u seq=%u ack=%u wnd=%u ctl=", utcp, dir, len, hdr.src, hdr.dst, hdr.seq, hdr.ack, hdr.wnd); + debug("%p %s: len=%lu, src=%u dst=%u seq=%u ack=%u wnd=%u ctl=", utcp, dir, (unsigned long)len, hdr.src, hdr.dst, hdr.seq, hdr.ack, hdr.wnd); if(hdr.ctl & SYN) debug("SYN"); if(hdr.ctl & RST) @@ -82,11 +86,18 @@ static void print_packet(struct utcp *utcp, const char *dir, const void *pkt, si debug("ACK"); if(len > sizeof hdr) { - debug(" data="); - for(int i = sizeof hdr; i < len; i++) { - const char *data = pkt; - debug("%c", data[i] >= 32 ? data[i] : '.'); + uint32_t datalen = len - sizeof hdr; + const uint8_t *data = (uint8_t *)pkt + sizeof hdr; + char str[datalen * 2 + 1]; + char *p = str; + + for(uint32_t i = 0; i < datalen; i++) { + *p++ = "0123456789ABCDEF"[data[i] >> 4]; + *p++ = "0123456789ABCDEF"[data[i] & 15]; } + *p = 0; + + debug(" data=%s", str); } debug("\n"); @@ -116,6 +127,10 @@ static bool fin_wanted(struct utcp_connection *c, uint32_t seq) { } } +static bool is_reliable(struct utcp_connection *c) { + return c->flags & UTCP_RELIABLE; +} + static inline void list_connections(struct utcp *utcp) { debug("%p has %d connections:\n", utcp, utcp->nconnections); for(int i = 0; i < utcp->nconnections; i++) @@ -130,16 +145,26 @@ static int32_t seqdiff(uint32_t a, uint32_t b) { // TODO: convert to ringbuffers to avoid memmove() operations. // Store data into the buffer -static ssize_t buffer_put(struct buffer *buf, const void *data, size_t len) { - if(buf->maxsize <= buf->used) - return 0; - if(len > buf->maxsize - buf->used) - len = buf->maxsize - buf->used; - if(len > buf->size - buf->used) { +static ssize_t buffer_put_at(struct buffer *buf, size_t offset, const void *data, size_t len) { + debug("buffer_put_at %lu %lu %lu\n", (unsigned long)buf->used, (unsigned long)offset, (unsigned long)len); + + size_t required = offset + len; + if(required > buf->maxsize) { + if(offset >= buf->maxsize) + return 0; + len = buf->maxsize - offset; + required = buf->maxsize; + } + + if(required > buf->size) { size_t newsize = buf->size; - do { - newsize *= 2; - } while(newsize < buf->used + len); + if(!newsize) { + newsize = required; + } else { + do { + newsize *= 2; + } while(newsize < required); + } if(newsize > buf->maxsize) newsize = buf->maxsize; char *newdata = realloc(buf->data, newsize); @@ -148,11 +173,17 @@ static ssize_t buffer_put(struct buffer *buf, const void *data, size_t len) { buf->data = newdata; buf->size = newsize; } - memcpy(buf->data + buf->used, data, len); - buf->used += len; + + memcpy(buf->data + offset, data, len); + if(required > buf->used) + buf->used = required; return len; } +static ssize_t buffer_put(struct buffer *buf, const void *data, size_t len) { + return buffer_put_at(buf, buf->used, data, len); +} + // Get data from the buffer. data can be NULL. static ssize_t buffer_get(struct buffer *buf, void *data, size_t len) { if(len > buf->used) @@ -177,9 +208,11 @@ static ssize_t buffer_copy(struct buffer *buf, void *data, size_t offset, size_t static bool buffer_init(struct buffer *buf, uint32_t len, uint32_t maxlen) { memset(buf, 0, sizeof *buf); - buf->data = malloc(len); - if(!len) - return false; + if(len) { + buf->data = malloc(len); + if(!buf->data) + return false; + } buf->size = len; buf->maxsize = maxlen; return true; @@ -234,6 +267,7 @@ static void free_connection(struct utcp_connection *c) { memmove(cp, cp + 1, (utcp->nconnections - i - 1) * sizeof *cp); utcp->nconnections--; + buffer_exit(&c->rcvbuf); buffer_exit(&c->sndbuf); free(c); } @@ -278,11 +312,21 @@ static struct utcp_connection *allocate_connection(struct utcp *utcp, uint16_t s return NULL; } + if(!buffer_init(&c->rcvbuf, DEFAULT_RCVBUFSIZE, DEFAULT_MAXRCVBUFSIZE)) { + buffer_exit(&c->sndbuf); + free(c); + return NULL; + } + // Fill in the details c->src = src; c->dst = dst; +#ifdef UTCP_DEBUG + c->snd.iss = 0; +#else c->snd.iss = rand(); +#endif c->snd.una = c->snd.iss; c->snd.nxt = c->snd.iss + 1; c->rcv.wnd = utcp->mtu; @@ -298,28 +342,78 @@ static struct utcp_connection *allocate_connection(struct utcp *utcp, uint16_t s return c; } -struct utcp_connection *utcp_connect(struct utcp *utcp, uint16_t dst, utcp_recv_t recv, void *priv) { +// Update RTT variables. See RFC 6298. +static void update_rtt(struct utcp_connection *c, uint32_t rtt) { + if(!rtt) { + debug("invalid rtt\n"); + return; + } + + struct utcp *utcp = c->utcp; + + if(!utcp->srtt) { + utcp->srtt = rtt; + utcp->rttvar = rtt / 2; + utcp->rto = rtt + max(2 * rtt, CLOCK_GRANULARITY); + } else { + utcp->rttvar = (utcp->rttvar * 3 + abs(utcp->srtt - rtt)) / 4; + utcp->srtt = (utcp->srtt * 7 + rtt) / 8; + utcp->rto = utcp->srtt + max(utcp->rttvar, CLOCK_GRANULARITY); + } + + if(utcp->rto > MAX_RTO) + utcp->rto = MAX_RTO; + + debug("rtt %u srtt %u rttvar %u rto %u\n", rtt, utcp->srtt, utcp->rttvar, utcp->rto); +} + +static void start_retransmit_timer(struct utcp_connection *c) { + gettimeofday(&c->rtrx_timeout, NULL); + c->rtrx_timeout.tv_usec += c->utcp->rto; + while(c->rtrx_timeout.tv_usec >= 1000000) { + c->rtrx_timeout.tv_usec -= 1000000; + c->rtrx_timeout.tv_sec++; + } + debug("timeout set to %lu.%06lu (%u)\n", c->rtrx_timeout.tv_sec, c->rtrx_timeout.tv_usec, c->utcp->rto); +} + +static void stop_retransmit_timer(struct utcp_connection *c) { + timerclear(&c->rtrx_timeout); + debug("timeout cleared\n"); +} + +struct utcp_connection *utcp_connect_ex(struct utcp *utcp, uint16_t dst, utcp_recv_t recv, void *priv, uint32_t flags) { struct utcp_connection *c = allocate_connection(utcp, 0, dst); if(!c) return NULL; + assert((flags & ~0xf) == 0); + + c->flags = flags; c->recv = recv; c->priv = priv; - struct hdr hdr; - - hdr.src = c->src; - hdr.dst = c->dst; - hdr.seq = c->snd.iss; - hdr.ack = 0; - hdr.wnd = c->rcv.wnd; - hdr.ctl = SYN; - hdr.aux = 0; + struct { + struct hdr hdr; + uint8_t init[4]; + } pkt; + + pkt.hdr.src = c->src; + pkt.hdr.dst = c->dst; + pkt.hdr.seq = c->snd.iss; + pkt.hdr.ack = 0; + pkt.hdr.wnd = c->rcv.wnd; + pkt.hdr.ctl = SYN; + pkt.hdr.aux = 0x0101; + pkt.init[0] = 1; + pkt.init[1] = 0; + pkt.init[2] = 0; + pkt.init[3] = flags & 0x7; set_state(c, SYN_SENT); - print_packet(utcp, "send", &hdr, sizeof hdr); - utcp->send(utcp, &hdr, sizeof hdr); + print_packet(utcp, "send", &pkt, sizeof pkt); + utcp->send(utcp, &pkt, sizeof pkt); gettimeofday(&c->conn_timeout, NULL); c->conn_timeout.tv_sec += utcp->timeout; @@ -327,6 +421,10 @@ struct utcp_connection *utcp_connect(struct utcp *utcp, uint16_t dst, utcp_recv_ return c; } +struct utcp_connection *utcp_connect(struct utcp *utcp, uint16_t dst, utcp_recv_t recv, void *priv) { + return utcp_connect_ex(utcp, dst, recv, priv, UTCP_TCP); +} + void utcp_accept(struct utcp_connection *c, utcp_recv_t recv, void *priv) { if(c->reapable || c->state != SYN_RECEIVED) { debug("Error: accept() called on invalid connection %p in state %s\n", c, strstate[c->state]); @@ -342,6 +440,7 @@ void utcp_accept(struct utcp_connection *c, utcp_recv_t recv, void *priv) { static void ack(struct utcp_connection *c, bool sendatleastone) { int32_t left = seqdiff(c->snd.last, c->snd.nxt); int32_t cwndleft = c->snd.cwnd - seqdiff(c->snd.nxt, c->snd.una); + debug("cwndleft = %d\n", cwndleft); assert(left >= 0); @@ -360,7 +459,7 @@ static void ack(struct utcp_connection *c, bool sendatleastone) { } *pkt; pkt = malloc(sizeof pkt->hdr + c->utcp->mtu); - if(!pkt->data) + if(!pkt) return; pkt->hdr.src = c->src; @@ -384,6 +483,13 @@ static void ack(struct utcp_connection *c, bool sendatleastone) { pkt->hdr.ctl |= FIN; } + if(!c->rtt_start.tv_sec) { + // Start RTT measurement + gettimeofday(&c->rtt_start, NULL); + c->rtt_seq = pkt->hdr.seq + seglen; + debug("Starting RTT measurement, expecting ack %u\n", c->rtt_seq); + } + print_packet(c->utcp, "send", pkt, sizeof pkt->hdr + seglen); c->utcp->send(c->utcp, pkt, sizeof pkt->hdr + seglen); } while(left); @@ -419,7 +525,7 @@ ssize_t utcp_send(struct utcp_connection *c, const void *data, size_t len) { return -1; } - // Add data to send buffer + // Exit early if we have nothing to send. if(!len) return 0; @@ -429,6 +535,8 @@ ssize_t utcp_send(struct utcp_connection *c, const void *data, size_t len) { return -1; } + // Add data to send buffer. + len = buffer_put(&c->sndbuf, data, len); if(len <= 0) { errno = EWOULDBLOCK; @@ -437,6 +545,12 @@ ssize_t utcp_send(struct utcp_connection *c, const void *data, size_t len) { c->snd.last += len; ack(c, false); + if(!is_reliable(c)) { + c->snd.una = c->snd.nxt = c->snd.last; + buffer_get(&c->sndbuf, NULL, c->sndbuf.used); + } + if(is_reliable(c) && !timerisset(&c->rtrx_timeout)) + start_retransmit_timer(c); return len; } @@ -447,8 +561,11 @@ static void swap_ports(struct hdr *hdr) { } static void retransmit(struct utcp_connection *c) { - if(c->state == CLOSED || c->snd.nxt == c->snd.una) + if(c->state == CLOSED || c->snd.last == c->snd.una) { + debug("Retransmit() called but nothing to retransmit!\n"); + stop_retransmit_timer(c); return; + } struct utcp *utcp = c->utcp; @@ -463,22 +580,21 @@ static void retransmit(struct utcp_connection *c) { pkt->hdr.src = c->src; pkt->hdr.dst = c->dst; + pkt->hdr.wnd = c->rcv.wnd; + pkt->hdr.aux = 0; switch(c->state) { - case LISTEN: - // TODO: this should not happen - break; - case SYN_SENT: + // Send our SYN again pkt->hdr.seq = c->snd.iss; pkt->hdr.ack = 0; - pkt->hdr.wnd = c->rcv.wnd; pkt->hdr.ctl = SYN; print_packet(c->utcp, "rtrx", pkt, sizeof pkt->hdr); utcp->send(utcp, pkt, sizeof pkt->hdr); break; case SYN_RECEIVED: + // Send SYNACK again pkt->hdr.seq = c->snd.nxt; pkt->hdr.ack = c->rcv.nxt; pkt->hdr.ctl = SYN | ACK; @@ -488,34 +604,185 @@ static void retransmit(struct utcp_connection *c) { case ESTABLISHED: case FIN_WAIT_1: + case CLOSE_WAIT: + case CLOSING: + case LAST_ACK: + // Send unacked data again. pkt->hdr.seq = c->snd.una; pkt->hdr.ack = c->rcv.nxt; pkt->hdr.ctl = ACK; - uint32_t len = seqdiff(c->snd.nxt, c->snd.una); - if(c->state == FIN_WAIT_1) - len--; + uint32_t len = seqdiff(c->snd.last, c->snd.una); if(len > utcp->mtu) len = utcp->mtu; - else { - if(c->state == FIN_WAIT_1) - pkt->hdr.ctl |= FIN; + if(fin_wanted(c, c->snd.una + len)) { + len--; + pkt->hdr.ctl |= FIN; } + c->snd.nxt = c->snd.una + len; + c->snd.cwnd = utcp->mtu; // reduce cwnd on retransmit buffer_copy(&c->sndbuf, pkt->data, 0, len); print_packet(c->utcp, "rtrx", pkt, sizeof pkt->hdr + len); utcp->send(utcp, pkt, sizeof pkt->hdr + len); break; - default: - // TODO: implement + case CLOSED: + case LISTEN: + case TIME_WAIT: + case FIN_WAIT_2: + // We shouldn't need to retransmit anything in this state. #ifdef UTCP_DEBUG abort(); #endif - break; + stop_retransmit_timer(c); + goto cleanup; } + start_retransmit_timer(c); + utcp->rto *= 2; + if(utcp->rto > MAX_RTO) + utcp->rto = MAX_RTO; + c->rtt_start.tv_sec = 0; // invalidate RTT timer + +cleanup: free(pkt); } +/* Update receive buffer and SACK entries after consuming data. + * + * Situation: + * + * |.....0000..1111111111.....22222......3333| + * |---------------^ + * + * 0..3 represent the SACK entries. The ^ indicates up to which point we want + * to remove data from the receive buffer. The idea is to substract "len" + * from the offset of all the SACK entries, and then remove/cut down entries + * that are shifted to before the start of the receive buffer. + * + * There are three cases: + * - the SACK entry is after ^, in that case just change the offset. + * - the SACK entry starts before and ends after ^, so we have to + * change both its offset and size. + * - the SACK entry is completely before ^, in that case delete it. + */ +static void sack_consume(struct utcp_connection *c, size_t len) { + debug("sack_consume %lu\n", (unsigned long)len); + if(len > c->rcvbuf.used) { + debug("All SACK entries consumed"); + c->sacks[0].len = 0; + return; + } + + buffer_get(&c->rcvbuf, NULL, len); + + for(int i = 0; i < NSACKS && c->sacks[i].len; ) { + if(len < c->sacks[i].offset) { + c->sacks[i].offset -= len; + i++; + } else if(len < c->sacks[i].offset + c->sacks[i].len) { + c->sacks[i].len -= len - c->sacks[i].offset; + c->sacks[i].offset = 0; + i++; + } else { + if(i < NSACKS - 1) { + memmove(&c->sacks[i], &c->sacks[i + 1], (NSACKS - 1 - i) * sizeof c->sacks[i]); + c->sacks[NSACKS - 1].len = 0; + } else { + c->sacks[i].len = 0; + break; + } + } + } + + for(int i = 0; i < NSACKS && c->sacks[i].len; i++) + debug("SACK[%d] offset %u len %u\n", i, c->sacks[i].offset, c->sacks[i].len); +} + +static void handle_out_of_order(struct utcp_connection *c, uint32_t offset, const void *data, size_t len) { + debug("out of order packet, offset %u\n", offset); + // Packet loss or reordering occured. Store the data in the buffer. + ssize_t rxd = buffer_put_at(&c->rcvbuf, offset, data, len); + if(rxd < len) + abort(); + + // Make note of where we put it. + for(int i = 0; i < NSACKS; i++) { + if(!c->sacks[i].len) { // nothing to merge, add new entry + debug("New SACK entry %d\n", i); + c->sacks[i].offset = offset; + c->sacks[i].len = rxd; + break; + } else if(offset < c->sacks[i].offset) { + if(offset + rxd < c->sacks[i].offset) { // insert before + if(!c->sacks[NSACKS - 1].len) { // only if room left + debug("Insert SACK entry at %d\n", i); + memmove(&c->sacks[i + 1], &c->sacks[i], (NSACKS - i - 1) * sizeof c->sacks[i]); + c->sacks[i].offset = offset; + c->sacks[i].len = rxd; + } else { + debug("SACK entries full, dropping packet\n"); + } + break; + } else { // merge + debug("Merge with start of SACK entry at %d\n", i); + c->sacks[i].offset = offset; + break; + } + } else if(offset <= c->sacks[i].offset + c->sacks[i].len) { + if(offset + rxd > c->sacks[i].offset + c->sacks[i].len) { // merge + debug("Merge with end of SACK entry at %d\n", i); + c->sacks[i].len = offset + rxd - c->sacks[i].offset; + // TODO: handle potential merge with next entry + } + break; + } + } + + for(int i = 0; i < NSACKS && c->sacks[i].len; i++) + debug("SACK[%d] offset %u len %u\n", i, c->sacks[i].offset, c->sacks[i].len); +} + +static void handle_in_order(struct utcp_connection *c, const void *data, size_t len) { + // Check if we can process out-of-order data now. + if(c->sacks[0].len && len >= c->sacks[0].offset) { // TODO: handle overlap with second SACK + debug("incoming packet len %lu connected with SACK at %u\n", (unsigned long)len, c->sacks[0].offset); + buffer_put_at(&c->rcvbuf, 0, data, len); // TODO: handle return value + len = max(len, c->sacks[0].offset + c->sacks[0].len); + data = c->rcvbuf.data; + } + + if(c->recv) { + ssize_t rxd = c->recv(c, data, len); + if(rxd != len) { + // TODO: handle the application not accepting all data. + abort(); + } + } + + if(c->rcvbuf.used) + sack_consume(c, len); + + c->rcv.nxt += len; +} + + +static void handle_incoming_data(struct utcp_connection *c, uint32_t seq, const void *data, size_t len) { + if(!is_reliable(c)) { + c->recv(c, data, len); + c->rcv.nxt = seq + len; + return; + } + + uint32_t offset = seqdiff(seq, c->rcv.nxt); + if(offset + len > c->rcvbuf.maxsize) + abort(); + + if(offset) + handle_out_of_order(c, offset, data, len); + else + handle_in_order(c, data, len); +} + ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { if(!utcp) { @@ -582,6 +849,19 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { goto reset; } + // Parse auxilliary information + if(hdr.aux) { + if(hdr.aux != 0x0101 || len < 4 || ((uint8_t *)data)[0] != 1) { + len = 1; + goto reset; + } + c->flags = ((uint8_t *)data)[3] & 0x7; + data += 4; + len -= 4; + } else { + c->flags = UTCP_TCP; + } + // Return SYN+ACK, go to SYN_RECEIVED state c->snd.wnd = hdr.wnd; c->rcv.irs = hdr.seq; @@ -609,8 +889,10 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { // In case this is for a CLOSED connection, ignore the packet. // TODO: make it so incoming packets can never match a CLOSED connection. - if(c->state == CLOSED) + if(c->state == CLOSED) { + debug("Got packet for closed connection\n"); return 0; + } // It is for an existing connection. @@ -644,45 +926,41 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { if(c->state == SYN_SENT) acceptable = true; - - // TODO: handle packets overlapping c->rcv.nxt. -#if 0 - // Only use this when accepting out-of-order packets. else if(len == 0) - if(c->rcv.wnd == 0) - acceptable = hdr.seq == c->rcv.nxt; - else - acceptable = (seqdiff(hdr.seq, c->rcv.nxt) >= 0 && seqdiff(hdr.seq, c->rcv.nxt + c->rcv.wnd) < 0); - else - if(c->rcv.wnd == 0) - // We don't accept data when the receive window is zero. - acceptable = false; - else - // Both start and end of packet must be within the receive window - acceptable = (seqdiff(hdr.seq, c->rcv.nxt) >= 0 && seqdiff(hdr.seq, c->rcv.nxt + c->rcv.wnd) < 0) - || (seqdiff(hdr.seq + len + 1, c->rcv.nxt) >= 0 && seqdiff(hdr.seq + len - 1, c->rcv.nxt + c->rcv.wnd) < 0); -#else - if(c->state != SYN_SENT) - acceptable = hdr.seq == c->rcv.nxt; -#endif + acceptable = seqdiff(hdr.seq, c->rcv.nxt) >= 0; + else { + int32_t rcv_offset = seqdiff(hdr.seq, c->rcv.nxt); + + // cut already accepted front overlapping + if(rcv_offset < 0) { + acceptable = len > -rcv_offset; + if(acceptable) { + data -= rcv_offset; + len += rcv_offset; + hdr.seq -= rcv_offset; + } + } else { + acceptable = seqdiff(hdr.seq, c->rcv.nxt) >= 0 && seqdiff(hdr.seq, c->rcv.nxt) + len <= c->rcvbuf.maxsize; + } + } if(!acceptable) { - debug("Packet not acceptable, %u <= %u + %zu < %u\n", c->rcv.nxt, hdr.seq, len, c->rcv.nxt + c->rcv.wnd); + debug("Packet not acceptable, %u <= %u + %lu < %u\n", c->rcv.nxt, hdr.seq, (unsigned long)len, c->rcv.nxt + c->rcvbuf.maxsize); // Ignore unacceptable RST packets. if(hdr.ctl & RST) return 0; - // Otherwise, send an ACK back in the hope things improve. - ack(c, true); - return 0; + // Otherwise, continue processing. + len = 0; } c->snd.wnd = hdr.wnd; // TODO: move below // 1c. Drop packets with an invalid ACK. - // ackno should not roll back, and it should also not be bigger than snd.nxt. + // ackno should not roll back, and it should also not be bigger than what we ever could have sent + // (= snd.una + c->sndbuf.used). - if(hdr.ctl & ACK && (seqdiff(hdr.ack, c->snd.nxt) > 0 || seqdiff(hdr.ack, c->snd.una) < 0)) { - debug("Packet ack seqno out of range, %u %u %u\n", hdr.ack, c->snd.una, c->snd.nxt); + if(hdr.ctl & ACK && (seqdiff(hdr.ack, c->snd.last) > 0 || seqdiff(hdr.ack, c->snd.una) < 0)) { + debug("Packet ack seqno out of range, %u <= %u < %u\n", c->snd.una, hdr.ack, c->snd.una + c->sndbuf.used); // Ignore unacceptable RST packets. if(hdr.ctl & RST) return 0; @@ -748,6 +1026,20 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { prevrcvnxt = c->rcv.nxt; if(advanced) { + // RTT measurement + if(c->rtt_start.tv_sec) { + if(c->rtt_seq == hdr.ack) { + struct timeval now, diff; + gettimeofday(&now, NULL); + timersub(&now, &c->rtt_start, &diff); + update_rtt(c, diff.tv_sec * 1000000 + diff.tv_usec); + c->rtt_start.tv_sec = 0; + } else if(c->rtt_seq < hdr.ack) { + debug("Cancelling RTT measurement: %u < %u\n", c->rtt_seq, hdr.ack); + c->rtt_start.tv_sec = 0; + } + } + int32_t data_acked = advanced; switch(c->state) { @@ -768,6 +1060,10 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { if(data_acked) buffer_get(&c->sndbuf, NULL, data_acked); + // Also advance snd.nxt if possible + if(seqdiff(c->snd.nxt, hdr.ack) < 0) + c->snd.nxt = hdr.ack; + c->snd.una = hdr.ack; c->dupack = 0; @@ -792,15 +1088,16 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { break; } } else { - if(!len) { + if(!len && is_reliable(c)) { c->dupack++; if(c->dupack == 3) { debug("Triplicate ACK\n"); //TODO: Resend one packet and go to fast recovery mode. See RFC 6582. //We do a very simple variant here; reset the nxt pointer to the last acknowledged packet from the peer. - //This will cause us to start retransmitting, but at the same speed as the incoming ACKs arrive, - //thus preventing a drop in speed. + //Reset the congestion window so we wait for ACKs. c->snd.nxt = c->snd.una; + c->snd.cwnd = utcp->mtu; + start_retransmit_timer(c); } } } @@ -809,8 +1106,10 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { if(advanced) { timerclear(&c->conn_timeout); // It will be set anew in utcp_timeout() if c->snd.una != c->snd.nxt. - if(c->snd.una == c->snd.nxt) - timerclear(&c->rtrx_timeout); + if(c->snd.una == c->snd.last) + stop_retransmit_timer(c); + else if(is_reliable(c)) + start_retransmit_timer(c); } // 5. Process SYN stuff @@ -891,28 +1190,12 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { return 0; } - ssize_t rxd; - - if(c->recv) { - rxd = c->recv(c, data, len); - if(rxd != len) { - // TODO: once we have a receive buffer, handle the application not accepting all data. - abort(); - } - if(rxd < 0) - rxd = 0; - else if(rxd > len) - rxd = len; // Bad application, bad! - } else { - rxd = len; - } - - c->rcv.nxt += len; + handle_incoming_data(c, hdr.seq, data, len); } // 7. Process FIN stuff - if(hdr.ctl & FIN) { + if((hdr.ctl & FIN) && hdr.seq + len == c->rcv.nxt) { switch(c->state) { case SYN_SENT: case SYN_RECEIVED: @@ -962,8 +1245,7 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { // - or we got an ack, so we should maybe send a bit more data // -> sendatleastone = false -ack: - ack(c, prevrcvnxt != c->rcv.nxt); + ack(c, len || prevrcvnxt != c->rcv.nxt); return 0; reset: @@ -984,7 +1266,7 @@ reset: } int utcp_shutdown(struct utcp_connection *c, int dir) { - debug("%p shutdown %d at %u\n", c ? c->utcp : NULL, dir, c->snd.last); + debug("%p shutdown %d at %u\n", c ? c->utcp : NULL, dir, c ? c->snd.last : 0); if(!c) { errno = EFAULT; return -1; @@ -996,13 +1278,26 @@ int utcp_shutdown(struct utcp_connection *c, int dir) { return -1; } - // TODO: handle dir - // TODO: check that repeated calls with the same parameters should have no effect + if(!(dir == UTCP_SHUT_RD || dir == UTCP_SHUT_WR || dir == UTCP_SHUT_RDWR)) { + errno = EINVAL; + return -1; + } + + // TCP does not have a provision for stopping incoming packets. + // The best we can do is to just ignore them. + if(dir == UTCP_SHUT_RD || dir == UTCP_SHUT_RDWR) + c->recv = NULL; + + // The rest of the code deals with shutting down writes. + if(dir == UTCP_SHUT_RD) + return 0; switch(c->state) { case CLOSED: - return 0; case LISTEN: + errno = ENOTCONN; + return -1; + case SYN_SENT: set_state(c, CLOSED); return 0; @@ -1027,11 +1322,13 @@ int utcp_shutdown(struct utcp_connection *c, int dir) { c->snd.last++; ack(c, false); + if(!timerisset(&c->rtrx_timeout)) + start_retransmit_timer(c); return 0; } int utcp_close(struct utcp_connection *c) { - if(utcp_shutdown(c, SHUT_RDWR)) + if(utcp_shutdown(c, SHUT_RDWR) && errno != ENOTCONN) return -1; c->recv = NULL; c->poll = NULL; @@ -1107,6 +1404,7 @@ struct timeval utcp_timeout(struct utcp *utcp) { if(!c) continue; + // delete connections that have been utcp_close()d. if(c->state == CLOSED) { if(c->reapable) { debug("Reaping %p\n", c); @@ -1125,6 +1423,7 @@ struct timeval utcp_timeout(struct utcp *utcp) { } if(timerisset(&c->rtrx_timeout) && timercmp(&c->rtrx_timeout, &now, <)) { + debug("retransmit()\n"); retransmit(c); } @@ -1134,13 +1433,6 @@ struct timeval utcp_timeout(struct utcp *utcp) { if(timerisset(&c->conn_timeout) && timercmp(&c->conn_timeout, &next, <)) next = c->conn_timeout; - if(c->snd.nxt != c->snd.una) { - c->rtrx_timeout = now; - c->rtrx_timeout.tv_sec++; - } else { - timerclear(&c->rtrx_timeout); - } - if(timerisset(&c->rtrx_timeout) && timercmp(&c->rtrx_timeout, &next, <)) next = c->rtrx_timeout; } @@ -1150,22 +1442,34 @@ struct timeval utcp_timeout(struct utcp *utcp) { return diff; } -struct utcp *utcp_init(utcp_accept_t accept, utcp_pre_accept_t pre_accept, utcp_send_t send, void *priv) { - struct utcp *utcp = calloc(1, sizeof *utcp); +bool utcp_is_active(struct utcp *utcp) { if(!utcp) - return NULL; + return false; + for(int i = 0; i < utcp->nconnections; i++) + if(utcp->connections[i]->state != CLOSED && utcp->connections[i]->state != TIME_WAIT) + return true; + + return false; +} + +struct utcp *utcp_init(utcp_accept_t accept, utcp_pre_accept_t pre_accept, utcp_send_t send, void *priv) { if(!send) { errno = EFAULT; return NULL; } + struct utcp *utcp = calloc(1, sizeof *utcp); + if(!utcp) + return NULL; + utcp->accept = accept; utcp->pre_accept = pre_accept; utcp->send = send; utcp->priv = priv; - utcp->mtu = 1000; - utcp->timeout = 60; + utcp->mtu = DEFAULT_MTU; + utcp->timeout = DEFAULT_USER_TIMEOUT; // sec + utcp->rto = START_RTO; // usec return utcp; } @@ -1176,6 +1480,7 @@ void utcp_exit(struct utcp *utcp) { for(int i = 0; i < utcp->nconnections; i++) { if(!utcp->connections[i]->reapable) debug("Warning, freeing unclosed connection %p\n", utcp->connections[i]); + buffer_exit(&utcp->connections[i]->rcvbuf); buffer_exit(&utcp->connections[i]->sndbuf); free(utcp->connections[i]); } @@ -1221,6 +1526,25 @@ void utcp_set_sndbuf(struct utcp_connection *c, size_t size) { c->sndbuf.maxsize = -1; } +size_t utcp_get_rcvbuf(struct utcp_connection *c) { + return c ? c->rcvbuf.maxsize : 0; +} + +size_t utcp_get_rcvbuf_free(struct utcp_connection *c) { + if(c && (c->state == ESTABLISHED || c->state == CLOSE_WAIT)) + return buffer_free(&c->rcvbuf); + else + return 0; +} + +void utcp_set_rcvbuf(struct utcp_connection *c, size_t size) { + if(!c) + return; + c->rcvbuf.maxsize = size; + if(c->rcvbuf.maxsize != size) + c->rcvbuf.maxsize = -1; +} + bool utcp_get_nodelay(struct utcp_connection *c) { return c ? c->nodelay : false; } @@ -1252,3 +1576,10 @@ void utcp_set_poll_cb(struct utcp_connection *c, utcp_poll_t poll) { if(c) c->poll = poll; } + +void utcp_set_accept_cb(struct utcp *utcp, utcp_accept_t accept, utcp_pre_accept_t pre_accept) { + if(utcp) { + utcp->accept = accept; + utcp->pre_accept = pre_accept; + } +}