+static void fast_retransmit(struct utcp_connection *c) {
+ if(c->state == CLOSED || c->snd.last == c->snd.una) {
+ debug(c, "fast_retransmit() called but nothing to retransmit!\n");
+ return;
+ }
+
+ struct utcp *utcp = c->utcp;
+
+ struct {
+ struct hdr hdr;
+ uint8_t data[];
+ } *pkt = c->utcp->pkt;
+
+ pkt->hdr.src = c->src;
+ pkt->hdr.dst = c->dst;
+ pkt->hdr.wnd = c->rcvbuf.maxsize;
+ pkt->hdr.aux = 0;
+
+ switch(c->state) {
+ case ESTABLISHED:
+ case FIN_WAIT_1:
+ case CLOSE_WAIT:
+ case CLOSING:
+ case LAST_ACK:
+ // Send unacked data again.
+ pkt->hdr.seq = c->snd.una;
+ pkt->hdr.ack = c->rcv.nxt;
+ pkt->hdr.ctl = ACK;
+ uint32_t len = min(seqdiff(c->snd.last, c->snd.una), utcp->mss);
+
+ if(fin_wanted(c, c->snd.una + len)) {
+ len--;
+ pkt->hdr.ctl |= FIN;
+ }
+
+ buffer_copy(&c->sndbuf, pkt->data, 0, len);
+ print_packet(c, "rtrx", pkt, sizeof(pkt->hdr) + len);
+ utcp->send(utcp, pkt, sizeof(pkt->hdr) + len);
+ break;
+
+ default:
+ break;
+ }
+}
+
+static void retransmit(struct utcp_connection *c) {
+ if(c->state == CLOSED || c->snd.last == c->snd.una) {
+ debug(c, "retransmit() called but nothing to retransmit!\n");
+ stop_retransmit_timer(c);
+ return;
+ }
+
+ struct utcp *utcp = c->utcp;
+
+ if(utcp->retransmit) {
+ utcp->retransmit(c);
+ }
+
+ struct {
+ struct hdr hdr;
+ uint8_t data[];
+ } *pkt = c->utcp->pkt;
+
+ pkt->hdr.src = c->src;
+ pkt->hdr.dst = c->dst;
+ pkt->hdr.wnd = c->rcvbuf.maxsize;
+ pkt->hdr.aux = 0;
+
+ switch(c->state) {
+ case SYN_SENT:
+ // Send our SYN again
+ pkt->hdr.seq = c->snd.iss;
+ pkt->hdr.ack = 0;
+ pkt->hdr.ctl = SYN;
+ pkt->hdr.aux = 0x0101;
+ pkt->data[0] = 1;
+ pkt->data[1] = 0;
+ pkt->data[2] = 0;
+ pkt->data[3] = c->flags & 0x7;
+ print_packet(c, "rtrx", pkt, sizeof(pkt->hdr) + 4);
+ utcp->send(utcp, pkt, sizeof(pkt->hdr) + 4);
+ break;
+
+ case SYN_RECEIVED:
+ // Send SYNACK again
+ pkt->hdr.seq = c->snd.nxt;
+ pkt->hdr.ack = c->rcv.nxt;
+ pkt->hdr.ctl = SYN | ACK;
+ print_packet(c, "rtrx", pkt, sizeof(pkt->hdr));
+ utcp->send(utcp, pkt, sizeof(pkt->hdr));
+ break;
+
+ case ESTABLISHED:
+ case FIN_WAIT_1:
+ case CLOSE_WAIT:
+ case CLOSING:
+ case LAST_ACK:
+ // Send unacked data again.
+ pkt->hdr.seq = c->snd.una;
+ pkt->hdr.ack = c->rcv.nxt;
+ pkt->hdr.ctl = ACK;
+ uint32_t len = min(seqdiff(c->snd.last, c->snd.una), utcp->mss);
+
+ if(fin_wanted(c, c->snd.una + len)) {
+ len--;
+ pkt->hdr.ctl |= FIN;
+ }
+
+ // RFC 5681 slow start after timeout
+ uint32_t flightsize = seqdiff(c->snd.nxt, c->snd.una);
+ c->snd.ssthresh = max(flightsize / 2, utcp->mss * 2); // eq. 4
+ c->snd.cwnd = utcp->mss;
+ debug_cwnd(c);
+
+ buffer_copy(&c->sndbuf, pkt->data, 0, len);
+ print_packet(c, "rtrx", pkt, sizeof(pkt->hdr) + len);
+ utcp->send(utcp, pkt, sizeof(pkt->hdr) + len);
+
+ c->snd.nxt = c->snd.una + len;
+ break;
+
+ case CLOSED:
+ case LISTEN:
+ case TIME_WAIT:
+ case FIN_WAIT_2:
+ // We shouldn't need to retransmit anything in this state.
+#ifdef UTCP_DEBUG
+ abort();
+#endif
+ stop_retransmit_timer(c);
+ goto cleanup;
+ }
+
+ start_retransmit_timer(c);
+ c->rto *= 2;
+
+ if(c->rto > MAX_RTO) {
+ c->rto = MAX_RTO;
+ }
+
+ c->rtt_start.tv_sec = 0; // invalidate RTT timer
+ c->dupack = 0; // cancel any ongoing fast recovery
+
+cleanup:
+ return;
+}
+
+/* Update receive buffer and SACK entries after consuming data.
+ *
+ * Situation:
+ *
+ * |.....0000..1111111111.....22222......3333|
+ * |---------------^
+ *
+ * 0..3 represent the SACK entries. The ^ indicates up to which point we want
+ * to remove data from the receive buffer. The idea is to substract "len"
+ * from the offset of all the SACK entries, and then remove/cut down entries
+ * that are shifted to before the start of the receive buffer.
+ *
+ * There are three cases:
+ * - the SACK entry is after ^, in that case just change the offset.
+ * - the SACK entry starts before and ends after ^, so we have to
+ * change both its offset and size.
+ * - the SACK entry is completely before ^, in that case delete it.
+ */
+static void sack_consume(struct utcp_connection *c, size_t len) {
+ debug(c, "sack_consume %lu\n", (unsigned long)len);
+
+ if(len > c->rcvbuf.used) {
+ debug(c, "all SACK entries consumed\n");
+ c->sacks[0].len = 0;
+ return;
+ }
+
+ buffer_discard(&c->rcvbuf, len);
+
+ for(int i = 0; i < NSACKS && c->sacks[i].len;) {
+ if(len < c->sacks[i].offset) {
+ c->sacks[i].offset -= len;
+ i++;
+ } else if(len < c->sacks[i].offset + c->sacks[i].len) {
+ c->sacks[i].len -= len - c->sacks[i].offset;
+ c->sacks[i].offset = 0;
+ i++;
+ } else {
+ if(i < NSACKS - 1) {
+ memmove(&c->sacks[i], &c->sacks[i + 1], (NSACKS - 1 - i) * sizeof(c->sacks)[i]);
+ c->sacks[NSACKS - 1].len = 0;
+ } else {
+ c->sacks[i].len = 0;
+ break;
+ }
+ }
+ }
+
+ for(int i = 0; i < NSACKS && c->sacks[i].len; i++) {
+ debug(c, "SACK[%d] offset %u len %u\n", i, c->sacks[i].offset, c->sacks[i].len);
+ }
+}
+
+static void handle_out_of_order(struct utcp_connection *c, uint32_t offset, const void *data, size_t len) {
+ debug(c, "out of order packet, offset %u\n", offset);
+ // Packet loss or reordering occured. Store the data in the buffer.
+ ssize_t rxd = buffer_put_at(&c->rcvbuf, offset, data, len);
+
+ if(rxd <= 0) {
+ debug(c, "packet outside receive buffer, dropping\n");
+ return;
+ }
+
+ if((size_t)rxd < len) {
+ debug(c, "packet partially outside receive buffer\n");
+ len = rxd;
+ }
+
+ // Make note of where we put it.
+ for(int i = 0; i < NSACKS; i++) {
+ if(!c->sacks[i].len) { // nothing to merge, add new entry
+ debug(c, "new SACK entry %d\n", i);
+ c->sacks[i].offset = offset;
+ c->sacks[i].len = rxd;
+ break;
+ } else if(offset < c->sacks[i].offset) {
+ if(offset + rxd < c->sacks[i].offset) { // insert before
+ if(!c->sacks[NSACKS - 1].len) { // only if room left
+ debug(c, "insert SACK entry at %d\n", i);
+ memmove(&c->sacks[i + 1], &c->sacks[i], (NSACKS - i - 1) * sizeof(c->sacks)[i]);
+ c->sacks[i].offset = offset;
+ c->sacks[i].len = rxd;
+ } else {
+ debug(c, "SACK entries full, dropping packet\n");
+ }
+
+ break;
+ } else { // merge
+ debug(c, "merge with start of SACK entry at %d\n", i);
+ c->sacks[i].offset = offset;
+ break;
+ }
+ } else if(offset <= c->sacks[i].offset + c->sacks[i].len) {
+ if(offset + rxd > c->sacks[i].offset + c->sacks[i].len) { // merge
+ debug(c, "merge with end of SACK entry at %d\n", i);
+ c->sacks[i].len = offset + rxd - c->sacks[i].offset;
+ // TODO: handle potential merge with next entry
+ }
+
+ break;
+ }
+ }
+
+ for(int i = 0; i < NSACKS && c->sacks[i].len; i++) {
+ debug(c, "SACK[%d] offset %u len %u\n", i, c->sacks[i].offset, c->sacks[i].len);
+ }
+}
+
+static void handle_in_order(struct utcp_connection *c, const void *data, size_t len) {
+ if(c->recv) {
+ ssize_t rxd = c->recv(c, data, len);
+
+ if(rxd != (ssize_t)len) {
+ // TODO: handle the application not accepting all data.
+ abort();
+ }
+ }
+
+ // Check if we can process out-of-order data now.
+ if(c->sacks[0].len && len >= c->sacks[0].offset) {
+ debug(c, "incoming packet len %lu connected with SACK at %u\n", (unsigned long)len, c->sacks[0].offset);
+
+ if(len < c->sacks[0].offset + c->sacks[0].len) {
+ size_t offset = len;
+ len = c->sacks[0].offset + c->sacks[0].len;
+ size_t remainder = len - offset;
+
+ if(c->recv) {
+ ssize_t rxd = buffer_call(&c->rcvbuf, c->recv, c, offset, remainder);
+
+ if(rxd != (ssize_t)remainder) {
+ // TODO: handle the application not accepting all data.
+ abort();
+ }
+ }
+ }
+ }
+
+ if(c->rcvbuf.used) {
+ sack_consume(c, len);
+ }
+
+ c->rcv.nxt += len;
+}
+
+static void handle_unreliable(struct utcp_connection *c, const struct hdr *hdr, const void *data, size_t len) {
+ // Fast path for unfragmented packets
+ if(!hdr->wnd && !(hdr->ctl & MF)) {
+ if(c->recv) {
+ c->recv(c, data, len);
+ }
+
+ c->rcv.nxt = hdr->seq + len;
+ return;
+ }
+
+ // Ensure reassembled packet are not larger than 64 kiB
+ if(hdr->wnd >= MAX_UNRELIABLE_SIZE || hdr->wnd + len > MAX_UNRELIABLE_SIZE) {
+ return;
+ }
+
+ // Don't accept out of order fragments
+ if(hdr->wnd && hdr->seq != c->rcv.nxt) {
+ return;
+ }
+
+ // Reset the receive buffer for the first fragment
+ if(!hdr->wnd) {
+ buffer_clear(&c->rcvbuf);
+ }
+
+ ssize_t rxd = buffer_put_at(&c->rcvbuf, hdr->wnd, data, len);
+
+ if(rxd != (ssize_t)len) {
+ return;
+ }
+
+ // Send the packet if it's the final fragment
+ if(!(hdr->ctl & MF) && c->recv) {
+ buffer_call(&c->rcvbuf, c->recv, c, 0, hdr->wnd + len);
+ }
+
+ c->rcv.nxt = hdr->seq + len;
+}
+
+static void handle_incoming_data(struct utcp_connection *c, const struct hdr *hdr, const void *data, size_t len) {
+ if(!is_reliable(c)) {
+ handle_unreliable(c, hdr, data, len);
+ return;
+ }
+
+ uint32_t offset = seqdiff(hdr->seq, c->rcv.nxt);
+
+ if(offset) {
+ handle_out_of_order(c, offset, data, len);
+ } else {
+ handle_in_order(c, data, len);
+ }
+}
+
+
+ssize_t utcp_recv(struct utcp *utcp, const void *data, size_t len) {
+ const uint8_t *ptr = data;
+
+ if(!utcp) {
+ errno = EFAULT;
+ return -1;
+ }
+
+ if(!len) {
+ return 0;
+ }
+
+ if(!data) {
+ errno = EFAULT;
+ return -1;
+ }
+
+ // Drop packets smaller than the header
+
+ struct hdr hdr;
+
+ if(len < sizeof(hdr)) {
+ print_packet(NULL, "recv", data, len);
+ errno = EBADMSG;
+ return -1;
+ }
+
+ // Make a copy from the potentially unaligned data to a struct hdr
+
+ memcpy(&hdr, ptr, sizeof(hdr));
+
+ // Try to match the packet to an existing connection
+
+ struct utcp_connection *c = find_connection(utcp, hdr.dst, hdr.src);
+ print_packet(c, "recv", data, len);
+
+ // Process the header
+
+ ptr += sizeof(hdr);
+ len -= sizeof(hdr);
+
+ // Drop packets with an unknown CTL flag
+
+ if(hdr.ctl & ~(SYN | ACK | RST | FIN | MF)) {
+ print_packet(NULL, "recv", data, len);
+ errno = EBADMSG;
+ return -1;
+ }
+
+ // Check for auxiliary headers
+
+ const uint8_t *init = NULL;
+
+ uint16_t aux = hdr.aux;
+
+ while(aux) {
+ size_t auxlen = 4 * (aux >> 8) & 0xf;
+ uint8_t auxtype = aux & 0xff;
+
+ if(len < auxlen) {
+ errno = EBADMSG;
+ return -1;
+ }
+
+ switch(auxtype) {
+ case AUX_INIT:
+ if(!(hdr.ctl & SYN) || auxlen != 4) {
+ errno = EBADMSG;
+ return -1;
+ }
+
+ init = ptr;
+ break;
+
+ default:
+ errno = EBADMSG;
+ return -1;
+ }
+
+ len -= auxlen;
+ ptr += auxlen;
+
+ if(!(aux & 0x800)) {
+ break;
+ }
+
+ if(len < 2) {
+ errno = EBADMSG;
+ return -1;
+ }
+
+ memcpy(&aux, ptr, 2);
+ len -= 2;
+ ptr += 2;
+ }
+
+ bool has_data = len || (hdr.ctl & (SYN | FIN));
+
+ // Is it for a new connection?
+
+ if(!c) {
+ // Ignore RST packets
+
+ if(hdr.ctl & RST) {
+ return 0;
+ }
+
+ // Is it a SYN packet and are we LISTENing?
+
+ if(hdr.ctl & SYN && !(hdr.ctl & ACK) && utcp->accept) {
+ // If we don't want to accept it, send a RST back
+ if((utcp->pre_accept && !utcp->pre_accept(utcp, hdr.dst))) {
+ len = 1;
+ goto reset;
+ }
+
+ // Try to allocate memory, otherwise send a RST back
+ c = allocate_connection(utcp, hdr.dst, hdr.src);
+
+ if(!c) {
+ len = 1;
+ goto reset;
+ }
+
+ // Parse auxilliary information
+ if(init) {
+ if(init[0] < 1) {
+ len = 1;
+ goto reset;
+ }
+
+ c->flags = init[3] & 0x7;
+ } else {
+ c->flags = UTCP_TCP;
+ }
+
+synack:
+ // Return SYN+ACK, go to SYN_RECEIVED state
+ c->snd.wnd = hdr.wnd;
+ c->rcv.irs = hdr.seq;
+ c->rcv.nxt = c->rcv.irs + 1;
+ set_state(c, SYN_RECEIVED);
+
+ struct {
+ struct hdr hdr;
+ uint8_t data[4];
+ } pkt;
+
+ pkt.hdr.src = c->src;
+ pkt.hdr.dst = c->dst;
+ pkt.hdr.ack = c->rcv.irs + 1;
+ pkt.hdr.seq = c->snd.iss;
+ pkt.hdr.wnd = c->rcvbuf.maxsize;
+ pkt.hdr.ctl = SYN | ACK;
+
+ if(init) {
+ pkt.hdr.aux = 0x0101;
+ pkt.data[0] = 1;
+ pkt.data[1] = 0;
+ pkt.data[2] = 0;
+ pkt.data[3] = c->flags & 0x7;
+ print_packet(c, "send", &pkt, sizeof(hdr) + 4);
+ utcp->send(utcp, &pkt, sizeof(hdr) + 4);
+ } else {
+ pkt.hdr.aux = 0;
+ print_packet(c, "send", &pkt, sizeof(hdr));
+ utcp->send(utcp, &pkt, sizeof(hdr));
+ }
+
+ start_retransmit_timer(c);
+ } else {
+ // No, we don't want your packets, send a RST back
+ len = 1;
+ goto reset;
+ }
+
+ return 0;
+ }
+
+ debug(c, "state %s\n", strstate[c->state]);
+
+ // In case this is for a CLOSED connection, ignore the packet.
+ // TODO: make it so incoming packets can never match a CLOSED connection.
+
+ if(c->state == CLOSED) {
+ debug(c, "got packet for closed connection\n");
+ return 0;
+ }
+
+ // It is for an existing connection.
+
+ // 1. Drop invalid packets.
+
+ // 1a. Drop packets that should not happen in our current state.
+
+ switch(c->state) {
+ case SYN_SENT:
+ case SYN_RECEIVED:
+ case ESTABLISHED:
+ case FIN_WAIT_1:
+ case FIN_WAIT_2:
+ case CLOSE_WAIT:
+ case CLOSING:
+ case LAST_ACK:
+ case TIME_WAIT:
+ break;
+
+ default:
+#ifdef UTCP_DEBUG
+ abort();
+#endif
+ break;
+ }
+
+ // 1b. Discard data that is not in our receive window.
+
+ if(is_reliable(c)) {
+ bool acceptable;
+
+ if(c->state == SYN_SENT) {
+ acceptable = true;
+ } else if(len == 0) {
+ acceptable = seqdiff(hdr.seq, c->rcv.nxt) >= 0;
+ } else {
+ int32_t rcv_offset = seqdiff(hdr.seq, c->rcv.nxt);
+
+ // cut already accepted front overlapping
+ if(rcv_offset < 0) {
+ acceptable = len > (size_t) - rcv_offset;
+
+ if(acceptable) {
+ ptr -= rcv_offset;
+ len += rcv_offset;
+ hdr.seq -= rcv_offset;
+ }
+ } else {
+ acceptable = seqdiff(hdr.seq, c->rcv.nxt) >= 0 && seqdiff(hdr.seq, c->rcv.nxt) + len <= c->rcvbuf.maxsize;
+ }
+ }
+
+ if(!acceptable) {
+ debug(c, "packet not acceptable, %u <= %u + %lu < %u\n", c->rcv.nxt, hdr.seq, (unsigned long)len, c->rcv.nxt + c->rcvbuf.maxsize);
+
+ // Ignore unacceptable RST packets.
+ if(hdr.ctl & RST) {
+ return 0;
+ }
+
+ // Otherwise, continue processing.
+ len = 0;
+ }
+ } else {
+#if UTCP_DEBUG
+ int32_t rcv_offset = seqdiff(hdr.seq, c->rcv.nxt);
+
+ if(rcv_offset) {
+ debug(c, "packet out of order, offset %u bytes", rcv_offset);
+ }
+
+#endif
+ }
+
+ c->snd.wnd = hdr.wnd; // TODO: move below
+
+ // 1c. Drop packets with an invalid ACK.
+ // ackno should not roll back, and it should also not be bigger than what we ever could have sent
+ // (= snd.una + c->sndbuf.used).
+
+ if(!is_reliable(c)) {
+ if(hdr.ack != c->snd.last && c->state >= ESTABLISHED) {
+ hdr.ack = c->snd.una;
+ }
+ }
+
+ if(hdr.ctl & ACK && (seqdiff(hdr.ack, c->snd.last) > 0 || seqdiff(hdr.ack, c->snd.una) < 0)) {
+ debug(c, "packet ack seqno out of range, %u <= %u < %u\n", c->snd.una, hdr.ack, c->snd.una + c->sndbuf.used);
+
+ // Ignore unacceptable RST packets.
+ if(hdr.ctl & RST) {
+ return 0;
+ }
+
+ goto reset;
+ }
+
+ // 2. Handle RST packets
+
+ if(hdr.ctl & RST) {
+ switch(c->state) {
+ case SYN_SENT:
+ if(!(hdr.ctl & ACK)) {
+ return 0;
+ }
+
+ // The peer has refused our connection.
+ set_state(c, CLOSED);
+ errno = ECONNREFUSED;
+
+ if(c->recv) {
+ c->recv(c, NULL, 0);
+ }
+
+ if(c->poll && !c->reapable) {
+ c->poll(c, 0);
+ }