X-Git-Url: http://git.meshlink.io/?a=blobdiff_plain;f=utcp.c;h=3a6c506062d70a26171f67c653ecea62a44c168c;hb=9461a0793fb6d6a67345cc4666f78d766856985d;hp=5dc7501a20d805e0d57520196bae31ec41657e78;hpb=275129c9d08b2d29529b2f9d039198168aeb52a7;p=utcp diff --git a/utcp.c b/utcp.c index 5dc7501..3a6c506 100644 --- a/utcp.c +++ b/utcp.c @@ -54,6 +54,10 @@ } while (0) #endif +static inline size_t min(size_t a, size_t b) { + return a < b ? a : b; +} + static inline size_t max(size_t a, size_t b) { return a > b ? a : b; } @@ -113,9 +117,14 @@ static void print_packet(struct utcp *utcp, const char *dir, const void *pkt, si debug("\n"); } + +static void debug_cwnd(struct utcp_connection *c) { + debug("snd.cwnd = %u\n", c->snd.cwnd); +} #else #define debug(...) do {} while(0) #define print_packet(...) do {} while(0) +#define debug_cwnd(...) do {} while(0) #endif static void set_state(struct utcp_connection *c, enum state state) { @@ -382,9 +391,10 @@ static struct utcp_connection *allocate_connection(struct utcp *utcp, uint16_t s #endif c->snd.una = c->snd.iss; c->snd.nxt = c->snd.iss + 1; - c->rcv.wnd = utcp->mtu; c->snd.last = c->snd.nxt; - c->snd.cwnd = utcp->mtu; + c->snd.cwnd = (utcp->mtu > 2190 ? 2 : utcp->mtu > 1095 ? 3 : 4) * utcp->mtu; + c->snd.ssthresh = ~0; + debug_cwnd(c); c->utcp = utcp; // Add it to the sorted list of connections @@ -415,13 +425,13 @@ static void update_rtt(struct utcp_connection *c, uint32_t rtt) { if(!utcp->srtt) { utcp->srtt = rtt; utcp->rttvar = rtt / 2; - utcp->rto = rtt + max(2 * rtt, CLOCK_GRANULARITY); } else { utcp->rttvar = (utcp->rttvar * 3 + absdiff(utcp->srtt, rtt)) / 4; utcp->srtt = (utcp->srtt * 7 + rtt) / 8; - utcp->rto = utcp->srtt + max(utcp->rttvar, CLOCK_GRANULARITY); } + utcp->rto = utcp->srtt + max(4 * utcp->rttvar, CLOCK_GRANULARITY); + if(utcp->rto > MAX_RTO) { utcp->rto = MAX_RTO; } @@ -468,7 +478,7 @@ struct utcp_connection *utcp_connect_ex(struct utcp *utcp, uint16_t dst, utcp_re pkt.hdr.dst = c->dst; pkt.hdr.seq = c->snd.iss; pkt.hdr.ack = 0; - pkt.hdr.wnd = c->rcv.wnd; + pkt.hdr.wnd = c->rcvbuf.maxsize; pkt.hdr.ctl = SYN; pkt.hdr.aux = 0x0101; pkt.init[0] = 1; @@ -507,19 +517,22 @@ void utcp_accept(struct utcp_connection *c, utcp_recv_t recv, void *priv) { static void ack(struct utcp_connection *c, bool sendatleastone) { int32_t left = seqdiff(c->snd.last, c->snd.nxt); - int32_t cwndleft = c->snd.cwnd - seqdiff(c->snd.nxt, c->snd.una); - debug("cwndleft = %d\n", cwndleft); + int32_t cwndleft = min(c->snd.cwnd, c->snd.wnd) - seqdiff(c->snd.nxt, c->snd.una); assert(left >= 0); if(cwndleft <= 0) { - cwndleft = 0; - } - - if(cwndleft < left) { + left = 0; + } else if(cwndleft < left) { left = cwndleft; + + if(!sendatleastone || cwndleft > c->utcp->mtu) { + left -= left % c->utcp->mtu; + } } + debug("cwndleft = %d, left = %d\n", cwndleft, left); + if(!left && !sendatleastone) { return; } @@ -538,7 +551,7 @@ static void ack(struct utcp_connection *c, bool sendatleastone) { pkt->hdr.src = c->src; pkt->hdr.dst = c->dst; pkt->hdr.ack = c->rcv.nxt; - pkt->hdr.wnd = c->snd.wnd; + pkt->hdr.wnd = c->rcvbuf.maxsize; pkt->hdr.ctl = ACK; pkt->hdr.aux = 0; @@ -627,11 +640,19 @@ ssize_t utcp_send(struct utcp_connection *c, const void *data, size_t len) { // Add data to send buffer. - len = buffer_put(&c->sndbuf, data, len); + if(is_reliable(c) || (c->state != SYN_SENT && c->state != SYN_RECEIVED)) { + len = buffer_put(&c->sndbuf, data, len); + } else { + return 0; + } if(len <= 0) { - errno = EWOULDBLOCK; - return 0; + if(is_reliable(c)) { + errno = EWOULDBLOCK; + return 0; + } else { + return len; + } } c->snd.last += len; @@ -667,6 +688,59 @@ static void swap_ports(struct hdr *hdr) { hdr->dst = tmp; } +static void fast_retransmit(struct utcp_connection *c) { + if(c->state == CLOSED || c->snd.last == c->snd.una) { + debug("fast_retransmit() called but nothing to retransmit!\n"); + return; + } + + struct utcp *utcp = c->utcp; + + struct { + struct hdr hdr; + uint8_t data[]; + } *pkt; + + pkt = malloc(sizeof(pkt->hdr) + c->utcp->mtu); + + if(!pkt) { + return; + } + + pkt->hdr.src = c->src; + pkt->hdr.dst = c->dst; + pkt->hdr.wnd = c->rcvbuf.maxsize; + pkt->hdr.aux = 0; + + switch(c->state) { + case ESTABLISHED: + case FIN_WAIT_1: + case CLOSE_WAIT: + case CLOSING: + case LAST_ACK: + // Send unacked data again. + pkt->hdr.seq = c->snd.una; + pkt->hdr.ack = c->rcv.nxt; + pkt->hdr.ctl = ACK; + uint32_t len = min(seqdiff(c->snd.last, c->snd.una), utcp->mtu); + + if(fin_wanted(c, c->snd.una + len)) { + len--; + pkt->hdr.ctl |= FIN; + } + + buffer_copy(&c->sndbuf, pkt->data, 0, len); + print_packet(c->utcp, "rtrx", pkt, sizeof(pkt->hdr) + len); + utcp->send(utcp, pkt, sizeof(pkt->hdr) + len); + break; + + default: + break; + } + + free(pkt); +} + static void retransmit(struct utcp_connection *c) { if(c->state == CLOSED || c->snd.last == c->snd.una) { debug("Retransmit() called but nothing to retransmit!\n"); @@ -689,7 +763,7 @@ static void retransmit(struct utcp_connection *c) { pkt->hdr.src = c->src; pkt->hdr.dst = c->dst; - pkt->hdr.wnd = c->rcv.wnd; + pkt->hdr.wnd = c->rcvbuf.maxsize; pkt->hdr.aux = 0; switch(c->state) { @@ -737,7 +811,12 @@ static void retransmit(struct utcp_connection *c) { } c->snd.nxt = c->snd.una + len; - c->snd.cwnd = utcp->mtu; // reduce cwnd on retransmit + + // RFC 5681 slow start after timeout + c->snd.ssthresh = max(c->snd.cwnd / 2, utcp->mtu * 2); // eq. 4 + c->snd.cwnd = utcp->mtu; + debug_cwnd(c); + buffer_copy(&c->sndbuf, pkt->data, 0, len); print_packet(c->utcp, "rtrx", pkt, sizeof(pkt->hdr) + len); utcp->send(utcp, pkt, sizeof(pkt->hdr) + len); @@ -1005,6 +1084,8 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { ptr += 2; } + bool has_data = len || (hdr.ctl & (SYN | FIN)); + // Try to match the packet to an existing connection struct utcp_connection *c = find_connection(utcp, hdr.dst, hdr.src); @@ -1047,6 +1128,7 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { c->flags = UTCP_TCP; } +synack: // Return SYN+ACK, go to SYN_RECEIVED state c->snd.wnd = hdr.wnd; c->rcv.irs = hdr.seq; @@ -1062,7 +1144,7 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { pkt.hdr.dst = c->dst; pkt.hdr.ack = c->rcv.irs + 1; pkt.hdr.seq = c->snd.iss; - pkt.hdr.wnd = c->rcv.wnd; + pkt.hdr.wnd = c->rcvbuf.maxsize; pkt.hdr.ctl = SYN | ACK; if(init) { @@ -1099,8 +1181,6 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { // It is for an existing connection. - uint32_t prevrcvnxt = c->rcv.nxt; - // 1. Drop invalid packets. // 1a. Drop packets that should not happen in our current state. @@ -1124,41 +1204,43 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { break; } - // 1b. Drop packets with a sequence number not in our receive window. + // 1b. Discard data that is not in our receive window. - bool acceptable; + if(is_reliable(c)) { + bool acceptable; - if(c->state == SYN_SENT) { - acceptable = true; - } else if(len == 0) { - acceptable = seqdiff(hdr.seq, c->rcv.nxt) >= 0; - } else { - int32_t rcv_offset = seqdiff(hdr.seq, c->rcv.nxt); + if(c->state == SYN_SENT) { + acceptable = true; + } else if(len == 0) { + acceptable = seqdiff(hdr.seq, c->rcv.nxt) >= 0; + } else { + int32_t rcv_offset = seqdiff(hdr.seq, c->rcv.nxt); - // cut already accepted front overlapping - if(rcv_offset < 0) { - acceptable = len > (size_t) - rcv_offset; + // cut already accepted front overlapping + if(rcv_offset < 0) { + acceptable = len > (size_t) - rcv_offset; - if(acceptable) { - ptr -= rcv_offset; - len += rcv_offset; - hdr.seq -= rcv_offset; + if(acceptable) { + ptr -= rcv_offset; + len += rcv_offset; + hdr.seq -= rcv_offset; + } + } else { + acceptable = seqdiff(hdr.seq, c->rcv.nxt) >= 0 && seqdiff(hdr.seq, c->rcv.nxt) + len <= c->rcvbuf.maxsize; } - } else { - acceptable = seqdiff(hdr.seq, c->rcv.nxt) >= 0 && seqdiff(hdr.seq, c->rcv.nxt) + len <= c->rcvbuf.maxsize; } - } - if(!acceptable) { - debug("Packet not acceptable, %u <= %u + %lu < %u\n", c->rcv.nxt, hdr.seq, (unsigned long)len, c->rcv.nxt + c->rcvbuf.maxsize); + if(!acceptable) { + debug("Packet not acceptable, %u <= %u + %lu < %u\n", c->rcv.nxt, hdr.seq, (unsigned long)len, c->rcv.nxt + c->rcvbuf.maxsize); - // Ignore unacceptable RST packets. - if(hdr.ctl & RST) { - return 0; - } + // Ignore unacceptable RST packets. + if(hdr.ctl & RST) { + return 0; + } - // Otherwise, continue processing. - len = 0; + // Otherwise, continue processing. + len = 0; + } } c->snd.wnd = hdr.wnd; // TODO: move below @@ -1167,6 +1249,12 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { // ackno should not roll back, and it should also not be bigger than what we ever could have sent // (= snd.una + c->sndbuf.used). + if(!is_reliable(c)) { + if(hdr.ack != c->snd.last && c->state >= ESTABLISHED) { + hdr.ack = c->snd.una; + } + } + if(hdr.ctl & ACK && (seqdiff(hdr.ack, c->snd.last) > 0 || seqdiff(hdr.ack, c->snd.una) < 0)) { debug("Packet ack seqno out of range, %u <= %u < %u\n", c->snd.una, hdr.ack, c->snd.una + c->sndbuf.used); @@ -1268,7 +1356,6 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { // 3. Advance snd.una advanced = seqdiff(hdr.ack, c->snd.una); - prevrcvnxt = c->rcv.nxt; if(advanced) { // RTT measurement @@ -1300,8 +1387,10 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { assert(data_acked >= 0); +#ifndef NDEBUG int32_t bufused = seqdiff(c->snd.last, c->snd.una); assert(data_acked <= bufused); +#endif if(data_acked) { buffer_get(&c->sndbuf, NULL, data_acked); @@ -1314,13 +1403,27 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { c->snd.una = hdr.ack; - c->dupack = 0; - c->snd.cwnd += utcp->mtu; + if(c->dupack) { + if(c->dupack >= 3) { + c->snd.cwnd = c->snd.ssthresh; + } + + c->dupack = 0; + } + + // Increase the congestion window according to RFC 5681 + if(c->snd.cwnd < c->snd.ssthresh) { + c->snd.cwnd += min(advanced, utcp->mtu); // eq. 2 + } else { + c->snd.cwnd += max(1, (utcp->mtu * utcp->mtu) / c->snd.cwnd); // eq. 3 + } if(c->snd.cwnd > c->sndbuf.maxsize) { c->snd.cwnd = c->sndbuf.maxsize; } + debug_cwnd(c); + // Check if we have sent a FIN that is now ACKed. switch(c->state) { case FIN_WAIT_1: @@ -1333,7 +1436,7 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { case CLOSING: if(c->snd.una == c->snd.last) { gettimeofday(&c->conn_timeout, NULL); - c->conn_timeout.tv_sec += 60; + c->conn_timeout.tv_sec += utcp->timeout; set_state(c, TIME_WAIT); } @@ -1348,12 +1451,26 @@ ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) { if(c->dupack == 3) { debug("Triplicate ACK\n"); - //TODO: Resend one packet and go to fast recovery mode. See RFC 6582. - //We do a very simple variant here; reset the nxt pointer to the last acknowledged packet from the peer. - //Reset the congestion window so we wait for ACKs. - c->snd.nxt = c->snd.una; - c->snd.cwnd = utcp->mtu; - start_retransmit_timer(c); + + // RFC 5681 fast recovery + c->snd.ssthresh = max(c->snd.cwnd / 2, utcp->mtu * 2); // eq. 4 + c->snd.cwnd = max(c->snd.ssthresh + 3 * utcp->mtu, c->sndbuf.maxsize); + + if(c->snd.cwnd > c->sndbuf.maxsize) { + c->snd.cwnd = c->sndbuf.maxsize; + } + + debug_cwnd(c); + + fast_retransmit(c); + } else if(c->dupack > 3) { + c->snd.cwnd += utcp->mtu; + + if(c->snd.cwnd > c->sndbuf.maxsize) { + c->snd.cwnd = c->sndbuf.maxsize; + } + + debug_cwnd(c); } } } @@ -1397,6 +1514,9 @@ skip_ack: break; case SYN_RECEIVED: + // This is a retransmit of a SYN, send back the SYNACK. + goto synack; + case ESTABLISHED: case FIN_WAIT_1: case FIN_WAIT_2: @@ -1472,7 +1592,7 @@ skip_ack: // 7. Process FIN stuff - if((hdr.ctl & FIN) && hdr.seq + len == c->rcv.nxt) { + if((hdr.ctl & FIN) && (!is_reliable(c) || hdr.seq + len == c->rcv.nxt)) { switch(c->state) { case SYN_SENT: case SYN_RECEIVED: @@ -1492,7 +1612,7 @@ skip_ack: case FIN_WAIT_2: gettimeofday(&c->conn_timeout, NULL); - c->conn_timeout.tv_sec += 60; + c->conn_timeout.tv_sec += utcp->timeout; set_state(c, TIME_WAIT); break; @@ -1522,12 +1642,15 @@ skip_ack: } // Now we send something back if: - // - we advanced rcv.nxt (ie, we got some data that needs to be ACKed) + // - we received data, so we have to send back an ACK // -> sendatleastone = true // - or we got an ack, so we should maybe send a bit more data // -> sendatleastone = false - ack(c, len || prevrcvnxt != c->rcv.nxt); + if(is_reliable(c) || hdr.ctl & SYN || hdr.ctl & FIN) { + ack(c, has_data); + } + return 0; reset: @@ -1902,8 +2025,14 @@ void utcp_reset_timers(struct utcp *utcp) { continue; } - c->rtrx_timeout = now; - c->conn_timeout = then; + if(timerisset(&c->rtrx_timeout)) { + c->rtrx_timeout = now; + } + + if(timerisset(&c->conn_timeout)) { + c->conn_timeout = then; + } + c->rtt_start.tv_sec = 0; } @@ -2054,19 +2183,24 @@ void utcp_expect_data(struct utcp_connection *c, bool expect) { } void utcp_offline(struct utcp *utcp, bool offline) { + struct timeval now; + gettimeofday(&now, NULL); + for(int i = 0; i < utcp->nconnections; i++) { struct utcp_connection *c = utcp->connections[i]; - if(!c->reapable) { - utcp_expect_data(c, offline); - - // If we are online again, reset the retransmission timers, but keep the connection timeout as it is, - // to prevent peers toggling online/offline state frequently from keeping connections alive - // if there is no progress in sending actual data. - if(!offline) { - gettimeofday(&utcp->connections[i]->rtrx_timeout, NULL); - utcp->connections[i]->rtt_start.tv_sec = 0; + if(c->reapable) { + continue; + } + + utcp_expect_data(c, offline); + + if(!offline) { + if(timerisset(&c->rtrx_timeout)) { + c->rtrx_timeout = now; } + + utcp->connections[i]->rtt_start.tv_sec = 0; } }