- Process state changes due to SYN
- Send new data to application
- Process state changes due to FIN
+
+CONGESTION AVOIDANCE
+--------------------
+
+We want to send as much packets as possible that won't cause any packets to be
+dropped. So we should not send more than the available bandwidth, and not more
+in one go than buffers along the path can handle.
+
+To start, we use "self-clocking". We send one packet, and wait for an ACK
+before sending another packet. On a network with a finite bandwidth but zero
+delay (latency), this will send packets as efficiently as possible. We don't
+need any timers to control the outgoing packet rate, that's why we call this
+self-clocked. However, latency is non-zero, and this means a number of packets
+is always on the way between the sender and receiver. The amount of packets
+"inbetween" is in principle the bandwidth times the delay (bandwidth-delay
+product, or BDP).
+
+Delay is fairly easy to measure (equal to half the round-trip time of a packet,
+which in TCP is easily obtained from the SYN and SYNACK pair, or the ACK in
+response of a segment), however bandwidth is more difficult and might change
+more rapidly than the latency.
+
+Back to the "inbetween" packets: ideally we would like to fill the available
+inbetween space completely. It should be easy to see that in that case,
+self-clocking will still work as intended. Our estimate of the amount of
+packets in the inbetween space is called the congestion window (CWND). If we
+know the BDP, we can set the CWND to it, however if we don't know it, we can
+start with a small CWND and gradually increase it (for example, every time we
+receive an ACK, send the next 2 segments). At some point, we will start sending
+at a higher rate than the available bandwidth, in which case packets will
+inevitably be lost. We detect that because we do not receive an ACK for our
+data, and then we have to reduce the CWND (for example, by half).
+
+The trick is to choose an algorithm that best keeps the CWND to the effective
+BDP.
+
+A nice introduction is RFC 2001.
+
+snd.cwnd: size of the congestion window.
+snd.nxt - snd.una: number of unacknowledged bytes, = number of bytes in flight.
+snd.cwnd - (snd.nxt - snd.una): unused size of congestion window
c->snd.una = c->snd.iss;
c->snd.nxt = c->snd.iss + 1;
c->rcv.wnd = utcp->mtu;
+ c->snd.last = c->snd.nxt;
+ c->snd.cwnd = utcp->mtu;
c->utcp = utcp;
// Add it to the sorted list of connections
set_state(c, ESTABLISHED);
}
+static void ack(struct utcp_connection *c, bool sendatleastone) {
+ uint32_t left = seqdiff(c->snd.last, c->snd.nxt);
+ int32_t cwndleft = c->snd.cwnd - seqdiff(c->snd.nxt, c->snd.una);
+ char *data = c->sndbuf + seqdiff(c->snd.nxt, c->snd.una);
+
+ if(cwndleft <= 0)
+ cwndleft = 0;
+
+ if(cwndleft < left)
+ left = cwndleft;
+
+ if(!left && !sendatleastone)
+ return;
+
+ struct {
+ struct hdr hdr;
+ char data[c->utcp->mtu];
+ } pkt;
+
+ pkt.hdr.src = c->src;
+ pkt.hdr.dst = c->dst;
+ pkt.hdr.ack = c->rcv.nxt;
+ pkt.hdr.wnd = c->snd.wnd;
+ pkt.hdr.ctl = ACK;
+
+ do {
+ uint32_t seglen = left > c->utcp->mtu ? c->utcp->mtu : left;
+ pkt.hdr.seq = c->snd.nxt;
+
+ memcpy(pkt.data, data, seglen);
+
+ c->snd.nxt += seglen;
+ data += seglen;
+ left -= seglen;
+
+ print_packet(c->utcp, "send", &pkt, sizeof pkt.hdr + seglen);
+ c->utcp->send(c->utcp, &pkt, sizeof pkt.hdr + seglen);
+ } while(left);
+}
+
ssize_t utcp_send(struct utcp_connection *c, const void *data, size_t len) {
if(c->reapable) {
debug("Error: send() called on closed connection %p\n", c);
*/
if(len > c->sndbufsize - bufused && c->sndbufsize < c->maxsndbufsize) {
+ uint32_t newbufsize;
if(c->sndbufsize > c->maxsndbufsize / 2)
- c->sndbufsize = c->maxsndbufsize;
+ newbufsize = c->maxsndbufsize;
else
- c->sndbufsize *= 2;
- c->sndbuf = realloc(c->sndbuf, c->sndbufsize);
+ newbufsize = c->sndbufsize * 2;
+ char *newbuf = realloc(c->sndbuf, newbufsize);
+ if(newbuf) {
+ c->sndbuf = newbuf;
+ c->sndbufsize = newbufsize;
+ }
}
if(len > c->sndbufsize - bufused)
}
memcpy(c->sndbuf + bufused, data, len);
+ c->snd.last += len;
- // Send segments
-
- struct {
- struct hdr hdr;
- char data[c->utcp->mtu];
- } pkt;
-
- pkt.hdr.src = c->src;
- pkt.hdr.dst = c->dst;
- pkt.hdr.ack = c->rcv.nxt;
- pkt.hdr.wnd = c->snd.wnd;
- pkt.hdr.ctl = ACK;
-
- uint32_t left = len;
-
- while(left) {
- uint32_t seglen = left > c->utcp->mtu ? c->utcp->mtu : left;
- pkt.hdr.seq = c->snd.nxt;
-
- memcpy(pkt.data, data, seglen);
-
- c->snd.nxt += seglen;
- data += seglen;
- left -= seglen;
-
- print_packet(c->utcp, "send", &pkt, sizeof pkt.hdr + seglen);
- c->utcp->send(c->utcp, &pkt, sizeof pkt.hdr + seglen);
- }
-
- fprintf(stderr, "len=%zu\n", len);
+ ack(c, false);
return len;
}
uint32_t left = seqdiff(c->snd.nxt, hdr.ack);
if(left)
memmove(c->sndbuf, c->sndbuf + advanced, left);
+ c->dupack = 0;
+ c->snd.cwnd += utcp->mtu;
+ if(c->snd.cwnd > c->maxsndbufsize)
+ c->snd.cwnd = c->maxsndbufsize;
+ } else {
+ if(!len) {
+ c->dupack++;
+ if(c->dupack >= 3) {
+ debug("Triplicate ACK\n");
+ abort();
+ }
+ }
}
// 4. Update timers
return 0;
ack:
- hdr.src = c->src;
- hdr.dst = c->dst;
- hdr.seq = c->snd.nxt;
- hdr.ack = c->rcv.nxt;
- hdr.ctl = ACK;
- print_packet(c->utcp, "send", &hdr, sizeof hdr);
- utcp->send(utcp, &hdr, sizeof hdr);
+ ack(c, true);
return 0;
reset: