diff --git a/src/core/tcp.c b/src/core/tcp.c index e2d10ba1..d3e03506 100644 --- a/src/core/tcp.c +++ b/src/core/tcp.c @@ -1100,6 +1100,7 @@ tcp_slowtmr_start: LWIP_DEBUGF(TCP_CWND_DEBUG, ("tcp_slowtmr: cwnd %"TCPWNDSIZE_F " ssthresh %"TCPWNDSIZE_F"\n", pcb->cwnd, pcb->ssthresh)); + pcb->bytes_acked = 0; /* The following needs to be called AFTER cwnd is set to one mss - STJ */ diff --git a/src/core/tcp_in.c b/src/core/tcp_in.c index 9be8759f..167fd527 100644 --- a/src/core/tcp_in.c +++ b/src/core/tcp_in.c @@ -1103,6 +1103,7 @@ tcp_receive(struct tcp_pcb *pcb) } } else if (TCP_SEQ_BETWEEN(ackno, pcb->lastack+1, pcb->snd_nxt)) { /* We come here when the ACK acknowledges new data. */ + tcpwnd_size_t acked; /* Reset the "IN Fast Retransmit" flag, since we are no longer in fast retransmit. Also reset the congestion window to the @@ -1110,6 +1111,7 @@ tcp_receive(struct tcp_pcb *pcb) if (pcb->flags & TF_INFR) { pcb->flags &= ~TF_INFR; pcb->cwnd = pcb->ssthresh; + pcb->bytes_acked = 0; } /* Reset the number of retransmissions. */ @@ -1118,6 +1120,9 @@ tcp_receive(struct tcp_pcb *pcb) /* Reset the retransmission time-out. */ pcb->rto = (pcb->sa >> 3) + pcb->sv; + /* Record how much data this ACK acks */ + acked = (tcpwnd_size_t)(ackno - pcb->lastack); + /* Reset the fast retransmit variables. */ pcb->dupacks = 0; pcb->lastack = ackno; @@ -1126,12 +1131,25 @@ tcp_receive(struct tcp_pcb *pcb) ssthresh). */ if (pcb->state >= ESTABLISHED) { if (pcb->cwnd < pcb->ssthresh) { - if ((tcpwnd_size_t)(pcb->cwnd + pcb->mss) > pcb->cwnd) { - pcb->cwnd += pcb->mss; + tcpwnd_size_t increase; + /* limit to 1 SMSS segment during period following RTO */ + u8_t num_seg = (pcb->flags & TF_RTO) ? 1 : 2; + /* RFC 3465, section 2.2 Slow Start */ + increase = LWIP_MIN(acked, (tcpwnd_size_t)(num_seg * pcb->mss)); + if (pcb->cwnd + increase > pcb->cwnd) { + pcb->cwnd += increase; } LWIP_DEBUGF(TCP_CWND_DEBUG, ("tcp_receive: slow start cwnd %"TCPWNDSIZE_F"\n", pcb->cwnd)); } else { - tcpwnd_size_t new_cwnd = (pcb->cwnd + pcb->mss * pcb->mss / pcb->cwnd); + tcpwnd_size_t new_cwnd = pcb->cwnd; + /* RFC 3465, section 2.1 Congestion Avoidance */ + if (pcb->bytes_acked + acked > pcb->bytes_acked) { + pcb->bytes_acked += acked; + if (pcb->bytes_acked >= pcb->cwnd) { + pcb->bytes_acked -= pcb->cwnd; + new_cwnd = pcb->cwnd + pcb->mss; + } + } if (new_cwnd > pcb->cwnd) { pcb->cwnd = new_cwnd; } @@ -1222,6 +1240,21 @@ tcp_receive(struct tcp_pcb *pcb) } } pcb->snd_buf += recv_acked; + /* check if this ACK ends our retransmission of in-flight data */ + if (pcb->flags & TF_RTO) { + /* RTO is done if + 1) both queues are empty or + 2) unacked is empty and unsent head contains data not part of RTO or + 3) unacked head contains data not part of RTO */ + if (pcb->unacked == NULL) { + if ((pcb->unsent == NULL) || + (TCP_SEQ_LEQ(pcb->rto_end, lwip_ntohl(pcb->unsent->tcphdr->seqno)))) { + pcb->flags &= ~TF_RTO; + } + } else if (TCP_SEQ_LEQ(pcb->rto_end, lwip_ntohl(pcb->unacked->tcphdr->seqno))) { + pcb->flags &= ~TF_RTO; + } + } /* End of ACK for new data processing. */ } else { /* Out of sequence ACK, didn't really ack anything */ diff --git a/src/core/tcp_out.c b/src/core/tcp_out.c index 8fe136d2..750d7361 100644 --- a/src/core/tcp_out.c +++ b/src/core/tcp_out.c @@ -1419,6 +1419,11 @@ tcp_rexmit_rto(struct tcp_pcb *pcb) /* unacked queue is now empty */ pcb->unacked = NULL; + /* Mark RTO in-progress */ + pcb->flags |= TF_RTO; + /* Record the next byte following retransmit */ + pcb->rto_end = lwip_ntohl(seg->tcphdr->seqno) + TCP_TCPLEN(seg); + /* increment number of retransmissions */ if (pcb->nrtx < 0xFF) { ++pcb->nrtx; diff --git a/src/include/lwip/tcp.h b/src/include/lwip/tcp.h index 29f89fd8..f7da50dc 100644 --- a/src/include/lwip/tcp.h +++ b/src/include/lwip/tcp.h @@ -209,6 +209,7 @@ struct tcp_pcb { #if LWIP_TCP_TIMESTAMPS #define TF_TIMESTAMP 0x0400U /* Timestamp option enabled */ #endif +#define TF_RTO 0x0800U /* RTO timer has fired, in-flight data moved to unsent and being retransmitted */ /* the rest of the fields are in host byte order as we have to do some math with them */ @@ -245,6 +246,9 @@ struct tcp_pcb { tcpwnd_size_t cwnd; tcpwnd_size_t ssthresh; + /* first byte following last rto byte */ + u32_t rto_end; + /* sender variables */ u32_t snd_nxt; /* next new seqno to be sent */ u32_t snd_wl1, snd_wl2; /* Sequence and acknowledgement numbers of last @@ -262,6 +266,8 @@ struct tcp_pcb { u16_t unsent_oversize; #endif /* TCP_OVERSIZE */ + tcpwnd_size_t bytes_acked; + /* These are ordered by sequence number: */ struct tcp_seg *unsent; /* Unsent (queued) segments. */ struct tcp_seg *unacked; /* Sent but unacknowledged segments. */ diff --git a/test/unit/tcp/test_tcp.c b/test/unit/tcp/test_tcp.c index 97c4b255..2b3a3ebd 100644 --- a/test/unit/tcp/test_tcp.c +++ b/test/unit/tcp/test_tcp.c @@ -695,6 +695,127 @@ START_TEST(test_tcp_tx_full_window_lost_from_unacked) } END_TEST +START_TEST(test_tcp_rto_tracking) +{ + struct netif netif; + struct test_tcp_txcounters txcounters; + struct test_tcp_counters counters; + struct tcp_pcb* pcb; + struct pbuf* p; + err_t err; + u16_t i, sent_total = 0; + LWIP_UNUSED_ARG(_i); + + for (i = 0; i < sizeof(tx_data); i++) { + tx_data[i] = (u8_t)i; + } + + /* initialize local vars */ + test_tcp_init_netif(&netif, &txcounters, &test_local_ip, &test_netmask); + memset(&counters, 0, sizeof(counters)); + + /* create and initialize the pcb */ + tcp_ticks = SEQNO1 - ISS; + pcb = test_tcp_new_counters_pcb(&counters); + EXPECT_RET(pcb != NULL); + tcp_set_state(pcb, ESTABLISHED, &test_local_ip, &test_remote_ip, TEST_LOCAL_PORT, TEST_REMOTE_PORT); + pcb->mss = TCP_MSS; + /* Set congestion window large enough to send all our segments */ + pcb->cwnd = 5*TCP_MSS; + + /* send 5 mss-sized segments */ + for (i = 0; i < 5; i++) { + err = tcp_write(pcb, &tx_data[sent_total], TCP_MSS, TCP_WRITE_FLAG_COPY); + EXPECT_RET(err == ERR_OK); + sent_total += TCP_MSS; + } + check_seqnos(pcb->unsent, 5, seqnos); + EXPECT(pcb->unacked == NULL); + err = tcp_output(pcb); + EXPECT(txcounters.num_tx_calls == 5); + EXPECT(txcounters.num_tx_bytes == 5 * (TCP_MSS + 40U)); + memset(&txcounters, 0, sizeof(txcounters)); + /* Check all 5 are in-flight */ + EXPECT(pcb->unsent == NULL); + check_seqnos(pcb->unacked, 5, seqnos); + + /* Force us into retransmisson timeout */ + while (!(pcb->flags & TF_RTO)) { + test_tcp_tmr(); + } + /* Ensure 4 remaining segments are back on unsent, ready for retransmission */ + check_seqnos(pcb->unsent, 4, &seqnos[1]); + /* Ensure 1st segment is on unacked (already retransmitted) */ + check_seqnos(pcb->unacked, 1, seqnos); + EXPECT(txcounters.num_tx_calls == 1); + EXPECT(txcounters.num_tx_bytes == TCP_MSS + 40U); + memset(&txcounters, 0, sizeof(txcounters)); + /* Ensure rto_end points to next byte */ + EXPECT(pcb->rto_end == seqnos[5]); + EXPECT(pcb->rto_end == pcb->snd_nxt); + /* Check cwnd was reset */ + EXPECT(pcb->cwnd == pcb->mss); + + /* Add another segment to send buffer which is outside of RTO */ + err = tcp_write(pcb, &tx_data[sent_total], TCP_MSS, TCP_WRITE_FLAG_COPY); + EXPECT_RET(err == ERR_OK); + sent_total += TCP_MSS; + check_seqnos(pcb->unsent, 5, &seqnos[1]); + /* Ensure no new data was sent */ + EXPECT(txcounters.num_tx_calls == 0); + EXPECT(txcounters.num_tx_bytes == 0); + EXPECT(pcb->rto_end == pcb->snd_nxt); + + /* ACK first segment */ + p = tcp_create_rx_segment(pcb, NULL, 0, 0, TCP_MSS, TCP_ACK); + test_tcp_input(p, &netif); + /* Next two retranmissions should go out, due to cwnd in slow start */ + EXPECT(txcounters.num_tx_calls == 2); + EXPECT(txcounters.num_tx_bytes == 2 * (TCP_MSS + 40U)); + memset(&txcounters, 0, sizeof(txcounters)); + check_seqnos(pcb->unacked, 2, &seqnos[1]); + check_seqnos(pcb->unsent, 3, &seqnos[3]); + /* RTO should still be marked */ + EXPECT(pcb->flags & TF_RTO); + /* cwnd should have only grown by 1 MSS */ + EXPECT(pcb->cwnd == (tcpwnd_size_t)(2 * pcb->mss)); + /* Ensure no new data was sent */ + EXPECT(pcb->rto_end == pcb->snd_nxt); + + /* ACK the next two segments */ + p = tcp_create_rx_segment(pcb, NULL, 0, 0, 2*TCP_MSS, TCP_ACK); + test_tcp_input(p, &netif); + /* Final 2 retransmissions and 1 new data should go out */ + EXPECT(txcounters.num_tx_calls == 3); + EXPECT(txcounters.num_tx_bytes == 3 * (TCP_MSS + 40U)); + memset(&txcounters, 0, sizeof(txcounters)); + check_seqnos(pcb->unacked, 3, &seqnos[3]); + EXPECT(pcb->unsent == NULL); + /* RTO should still be marked */ + EXPECT(pcb->flags & TF_RTO); + /* cwnd should have only grown by 1 MSS */ + EXPECT(pcb->cwnd == (tcpwnd_size_t)(3 * pcb->mss)); + /* snd_nxt should have been advanced past rto_end */ + EXPECT(TCP_SEQ_GT(pcb->snd_nxt, pcb->rto_end)); + + /* ACK the next two segments, finishing our RTO, leaving new segment unacked */ + p = tcp_create_rx_segment(pcb, NULL, 0, 0, 2*TCP_MSS, TCP_ACK); + test_tcp_input(p, &netif); + EXPECT(!(pcb->flags & TF_RTO)); + check_seqnos(pcb->unacked, 1, &seqnos[5]); + /* We should be in ABC congestion avoidance, so no change in cwnd */ + EXPECT(pcb->cwnd == (tcpwnd_size_t)(3 * pcb->mss)); + EXPECT(pcb->cwnd >= pcb->ssthresh); + /* Ensure ABC congestion avoidance is tracking bytes acked */ + EXPECT(pcb->bytes_acked == (tcpwnd_size_t)(2 * pcb->mss)); + + /* make sure the pcb is freed */ + EXPECT_RET(MEMP_STATS_GET(used, MEMP_TCP_PCB) == 1); + tcp_abort(pcb); + EXPECT_RET(MEMP_STATS_GET(used, MEMP_TCP_PCB) == 0); +} +END_TEST + /** Create the suite including all tests for this module */ Suite * tcp_suite(void) @@ -707,7 +828,8 @@ tcp_suite(void) TESTFUNC(test_tcp_fast_rexmit_wraparound), TESTFUNC(test_tcp_rto_rexmit_wraparound), TESTFUNC(test_tcp_tx_full_window_lost_from_unacked), - TESTFUNC(test_tcp_tx_full_window_lost_from_unsent) + TESTFUNC(test_tcp_tx_full_window_lost_from_unsent), + TESTFUNC(test_tcp_rto_tracking) }; return create_suite("TCP", tests, sizeof(tests)/sizeof(testfunc), tcp_setup, tcp_teardown); }