+ if(!is_reliable(c)) {
+ if(left) {
+ pkt->hdr.ctl |= MF;
+ } else {
+ pkt->hdr.ctl &= ~MF;
+ }
+ }
+
+ if(seglen && fin_wanted(c, c->snd.nxt)) {
+ seglen--;
+ pkt->hdr.ctl |= FIN;
+ }
+
+ if(!c->rtt_start.tv_sec) {
+ // Start RTT measurement
+ clock_gettime(UTCP_CLOCK, &c->rtt_start);
+ c->rtt_seq = pkt->hdr.seq + seglen;
+ debug(c, "starting RTT measurement, expecting ack %u\n", c->rtt_seq);
+ }
+
+ print_packet(c, "send", pkt, sizeof(pkt->hdr) + seglen);
+ c->utcp->send(c->utcp, pkt, sizeof(pkt->hdr) + seglen);
+
+ if(left && !is_reliable(c)) {
+ pkt->hdr.wnd += seglen;
+ }
+ } while(left);
+}
+
+ssize_t utcp_send(struct utcp_connection *c, const void *data, size_t len) {
+ if(c->reapable) {
+ debug(c, "send() called on closed connection\n");
+ errno = EBADF;
+ return -1;
+ }
+
+ switch(c->state) {
+ case CLOSED:
+ case LISTEN:
+ debug(c, "send() called on unconnected connection\n");
+ errno = ENOTCONN;
+ return -1;
+
+ case SYN_SENT:
+ case SYN_RECEIVED:
+ case ESTABLISHED:
+ case CLOSE_WAIT:
+ break;
+
+ case FIN_WAIT_1:
+ case FIN_WAIT_2:
+ case CLOSING:
+ case LAST_ACK:
+ case TIME_WAIT:
+ debug(c, "send() called on closed connection\n");
+ errno = EPIPE;
+ return -1;
+ }
+
+ // Exit early if we have nothing to send.
+
+ if(!len) {
+ return 0;
+ }
+
+ if(!data) {
+ errno = EFAULT;
+ return -1;
+ }
+
+ // Check if we need to be able to buffer all data
+
+ if(c->flags & UTCP_NO_PARTIAL) {
+ if(len > buffer_free(&c->sndbuf)) {
+ if(len > c->sndbuf.maxsize) {
+ errno = EMSGSIZE;
+ return -1;
+ } else {
+ errno = EWOULDBLOCK;
+ return 0;
+ }
+ }
+ }
+
+ // Add data to send buffer.
+
+ if(is_reliable(c)) {
+ len = buffer_put(&c->sndbuf, data, len);
+ } else if(c->state != SYN_SENT && c->state != SYN_RECEIVED) {
+ if(len > MAX_UNRELIABLE_SIZE || buffer_put(&c->sndbuf, data, len) != (ssize_t)len) {
+ errno = EMSGSIZE;
+ return -1;
+ }
+ } else {
+ return 0;
+ }
+
+ if(len <= 0) {
+ if(is_reliable(c)) {
+ errno = EWOULDBLOCK;
+ return 0;
+ } else {
+ return len;
+ }
+ }
+
+ c->snd.last += len;
+
+ // Don't send anything yet if the connection has not fully established yet
+
+ if(c->state == SYN_SENT || c->state == SYN_RECEIVED) {
+ return len;
+ }
+
+ ack(c, false);
+
+ if(!is_reliable(c)) {
+ c->snd.una = c->snd.nxt = c->snd.last;
+ buffer_discard(&c->sndbuf, c->sndbuf.used);
+ }
+
+ if(is_reliable(c) && !timespec_isset(&c->rtrx_timeout)) {
+ start_retransmit_timer(c);
+ }
+
+ if(is_reliable(c) && !timespec_isset(&c->conn_timeout)) {
+ clock_gettime(UTCP_CLOCK, &c->conn_timeout);
+ c->conn_timeout.tv_sec += c->utcp->timeout;
+ }
+
+ return len;
+}
+
+static void swap_ports(struct hdr *hdr) {
+ uint16_t tmp = hdr->src;
+ hdr->src = hdr->dst;
+ hdr->dst = tmp;
+}
+
+static void fast_retransmit(struct utcp_connection *c) {
+ if(c->state == CLOSED || c->snd.last == c->snd.una) {
+ debug(c, "fast_retransmit() called but nothing to retransmit!\n");
+ return;
+ }
+
+ struct utcp *utcp = c->utcp;
+
+ struct {
+ struct hdr hdr;
+ uint8_t data[];
+ } *pkt = c->utcp->pkt;
+
+ pkt->hdr.src = c->src;
+ pkt->hdr.dst = c->dst;
+ pkt->hdr.wnd = c->rcvbuf.maxsize;
+ pkt->hdr.aux = 0;
+
+ switch(c->state) {
+ case ESTABLISHED:
+ case FIN_WAIT_1:
+ case CLOSE_WAIT:
+ case CLOSING:
+ case LAST_ACK:
+ // Send unacked data again.
+ pkt->hdr.seq = c->snd.una;
+ pkt->hdr.ack = c->rcv.nxt;
+ pkt->hdr.ctl = ACK;
+ uint32_t len = min(seqdiff(c->snd.last, c->snd.una), utcp->mss);
+
+ if(fin_wanted(c, c->snd.una + len)) {
+ len--;
+ pkt->hdr.ctl |= FIN;
+ }
+
+ buffer_copy(&c->sndbuf, pkt->data, 0, len);
+ print_packet(c, "rtrx", pkt, sizeof(pkt->hdr) + len);
+ utcp->send(utcp, pkt, sizeof(pkt->hdr) + len);
+ break;
+
+ default:
+ break;
+ }
+}
+
+static void retransmit(struct utcp_connection *c) {
+ if(c->state == CLOSED || c->snd.last == c->snd.una) {
+ debug(c, "retransmit() called but nothing to retransmit!\n");
+ stop_retransmit_timer(c);
+ return;
+ }
+
+ struct utcp *utcp = c->utcp;
+
+ if(utcp->retransmit) {
+ utcp->retransmit(c);
+ }
+
+ struct {
+ struct hdr hdr;
+ uint8_t data[];
+ } *pkt = c->utcp->pkt;
+
+ pkt->hdr.src = c->src;
+ pkt->hdr.dst = c->dst;
+ pkt->hdr.wnd = c->rcvbuf.maxsize;
+ pkt->hdr.aux = 0;
+
+ switch(c->state) {
+ case SYN_SENT:
+ // Send our SYN again
+ pkt->hdr.seq = c->snd.iss;
+ pkt->hdr.ack = 0;
+ pkt->hdr.ctl = SYN;
+ pkt->hdr.aux = 0x0101;
+ pkt->data[0] = 1;
+ pkt->data[1] = 0;
+ pkt->data[2] = 0;
+ pkt->data[3] = c->flags & 0x7;
+ print_packet(c, "rtrx", pkt, sizeof(pkt->hdr) + 4);
+ utcp->send(utcp, pkt, sizeof(pkt->hdr) + 4);
+ break;
+
+ case SYN_RECEIVED:
+ // Send SYNACK again
+ pkt->hdr.seq = c->snd.nxt;
+ pkt->hdr.ack = c->rcv.nxt;
+ pkt->hdr.ctl = SYN | ACK;
+ print_packet(c, "rtrx", pkt, sizeof(pkt->hdr));
+ utcp->send(utcp, pkt, sizeof(pkt->hdr));
+ break;
+
+ case ESTABLISHED:
+ case FIN_WAIT_1:
+ case CLOSE_WAIT:
+ case CLOSING:
+ case LAST_ACK:
+ // Send unacked data again.
+ pkt->hdr.seq = c->snd.una;
+ pkt->hdr.ack = c->rcv.nxt;
+ pkt->hdr.ctl = ACK;
+ uint32_t len = min(seqdiff(c->snd.last, c->snd.una), utcp->mss);
+
+ if(fin_wanted(c, c->snd.una + len)) {
+ len--;
+ pkt->hdr.ctl |= FIN;
+ }
+
+ // RFC 5681 slow start after timeout
+ uint32_t flightsize = seqdiff(c->snd.nxt, c->snd.una);
+ c->snd.ssthresh = max(flightsize / 2, utcp->mss * 2); // eq. 4
+ c->snd.cwnd = utcp->mss;
+ debug_cwnd(c);
+
+ buffer_copy(&c->sndbuf, pkt->data, 0, len);
+ print_packet(c, "rtrx", pkt, sizeof(pkt->hdr) + len);
+ utcp->send(utcp, pkt, sizeof(pkt->hdr) + len);
+
+ c->snd.nxt = c->snd.una + len;
+ break;
+
+ case CLOSED:
+ case LISTEN:
+ case TIME_WAIT:
+ case FIN_WAIT_2:
+ // We shouldn't need to retransmit anything in this state.
+#ifdef UTCP_DEBUG
+ abort();
+#endif
+ stop_retransmit_timer(c);
+ goto cleanup;
+ }
+
+ start_retransmit_timer(c);
+ c->rto *= 2;
+
+ if(c->rto > MAX_RTO) {
+ c->rto = MAX_RTO;
+ }
+
+ c->rtt_start.tv_sec = 0; // invalidate RTT timer
+ c->dupack = 0; // cancel any ongoing fast recovery
+
+cleanup:
+ return;
+}
+
+/* Update receive buffer and SACK entries after consuming data.
+ *
+ * Situation:
+ *
+ * |.....0000..1111111111.....22222......3333|
+ * |---------------^
+ *
+ * 0..3 represent the SACK entries. The ^ indicates up to which point we want
+ * to remove data from the receive buffer. The idea is to substract "len"
+ * from the offset of all the SACK entries, and then remove/cut down entries
+ * that are shifted to before the start of the receive buffer.
+ *
+ * There are three cases:
+ * - the SACK entry is after ^, in that case just change the offset.
+ * - the SACK entry starts before and ends after ^, so we have to
+ * change both its offset and size.
+ * - the SACK entry is completely before ^, in that case delete it.
+ */
+static void sack_consume(struct utcp_connection *c, size_t len) {
+ debug(c, "sack_consume %lu\n", (unsigned long)len);
+
+ if(len > c->rcvbuf.used) {
+ debug(c, "all SACK entries consumed\n");
+ c->sacks[0].len = 0;
+ return;
+ }
+
+ buffer_discard(&c->rcvbuf, len);
+
+ for(int i = 0; i < NSACKS && c->sacks[i].len;) {
+ if(len < c->sacks[i].offset) {
+ c->sacks[i].offset -= len;
+ i++;
+ } else if(len < c->sacks[i].offset + c->sacks[i].len) {
+ c->sacks[i].len -= len - c->sacks[i].offset;
+ c->sacks[i].offset = 0;
+ i++;
+ } else {
+ if(i < NSACKS - 1) {
+ memmove(&c->sacks[i], &c->sacks[i + 1], (NSACKS - 1 - i) * sizeof(c->sacks)[i]);
+ c->sacks[NSACKS - 1].len = 0;
+ } else {
+ c->sacks[i].len = 0;