tcp: task #14128 - Appropriate Byte Counting support

This commit adds TCP Appropriate Byte Counting (ABC) support based on
RFC 3465

ABC replaces the previous congestion window growth mechanism and has been
configured with limit of 2 SMSS.  See task #14128 for discussion on
defaults, but the goal is to mitigate the performance impact of delayed
ACKs on congestion window growth

This commit also introduces a mechanism to track when the stack is
undergoing a period following an RTO where data is being retransmitted.

Lastly, this adds a unit test to verify RTO period tracking and some
basic ABC cwnd checking
This commit is contained in:
Joel Cunningham 2017-04-12 19:29:38 -05:00
parent e1f2c8b30c
commit de90d03e48
5 changed files with 171 additions and 4 deletions

View File

@ -1100,6 +1100,7 @@ tcp_slowtmr_start:
LWIP_DEBUGF(TCP_CWND_DEBUG, ("tcp_slowtmr: cwnd %"TCPWNDSIZE_F
" ssthresh %"TCPWNDSIZE_F"\n",
pcb->cwnd, pcb->ssthresh));
pcb->bytes_acked = 0;
/* The following needs to be called AFTER cwnd is set to one
mss - STJ */

View File

@ -1103,6 +1103,7 @@ tcp_receive(struct tcp_pcb *pcb)
}
} else if (TCP_SEQ_BETWEEN(ackno, pcb->lastack+1, pcb->snd_nxt)) {
/* We come here when the ACK acknowledges new data. */
tcpwnd_size_t acked;
/* Reset the "IN Fast Retransmit" flag, since we are no longer
in fast retransmit. Also reset the congestion window to the
@ -1110,6 +1111,7 @@ tcp_receive(struct tcp_pcb *pcb)
if (pcb->flags & TF_INFR) {
pcb->flags &= ~TF_INFR;
pcb->cwnd = pcb->ssthresh;
pcb->bytes_acked = 0;
}
/* Reset the number of retransmissions. */
@ -1118,6 +1120,9 @@ tcp_receive(struct tcp_pcb *pcb)
/* Reset the retransmission time-out. */
pcb->rto = (pcb->sa >> 3) + pcb->sv;
/* Record how much data this ACK acks */
acked = (tcpwnd_size_t)(ackno - pcb->lastack);
/* Reset the fast retransmit variables. */
pcb->dupacks = 0;
pcb->lastack = ackno;
@ -1126,12 +1131,25 @@ tcp_receive(struct tcp_pcb *pcb)
ssthresh). */
if (pcb->state >= ESTABLISHED) {
if (pcb->cwnd < pcb->ssthresh) {
if ((tcpwnd_size_t)(pcb->cwnd + pcb->mss) > pcb->cwnd) {
pcb->cwnd += pcb->mss;
tcpwnd_size_t increase;
/* limit to 1 SMSS segment during period following RTO */
u8_t num_seg = (pcb->flags & TF_RTO) ? 1 : 2;
/* RFC 3465, section 2.2 Slow Start */
increase = LWIP_MIN(acked, (tcpwnd_size_t)(num_seg * pcb->mss));
if (pcb->cwnd + increase > pcb->cwnd) {
pcb->cwnd += increase;
}
LWIP_DEBUGF(TCP_CWND_DEBUG, ("tcp_receive: slow start cwnd %"TCPWNDSIZE_F"\n", pcb->cwnd));
} else {
tcpwnd_size_t new_cwnd = (pcb->cwnd + pcb->mss * pcb->mss / pcb->cwnd);
tcpwnd_size_t new_cwnd = pcb->cwnd;
/* RFC 3465, section 2.1 Congestion Avoidance */
if (pcb->bytes_acked + acked > pcb->bytes_acked) {
pcb->bytes_acked += acked;
if (pcb->bytes_acked >= pcb->cwnd) {
pcb->bytes_acked -= pcb->cwnd;
new_cwnd = pcb->cwnd + pcb->mss;
}
}
if (new_cwnd > pcb->cwnd) {
pcb->cwnd = new_cwnd;
}
@ -1222,6 +1240,21 @@ tcp_receive(struct tcp_pcb *pcb)
}
}
pcb->snd_buf += recv_acked;
/* check if this ACK ends our retransmission of in-flight data */
if (pcb->flags & TF_RTO) {
/* RTO is done if
1) both queues are empty or
2) unacked is empty and unsent head contains data not part of RTO or
3) unacked head contains data not part of RTO */
if (pcb->unacked == NULL) {
if ((pcb->unsent == NULL) ||
(TCP_SEQ_LEQ(pcb->rto_end, lwip_ntohl(pcb->unsent->tcphdr->seqno)))) {
pcb->flags &= ~TF_RTO;
}
} else if (TCP_SEQ_LEQ(pcb->rto_end, lwip_ntohl(pcb->unacked->tcphdr->seqno))) {
pcb->flags &= ~TF_RTO;
}
}
/* End of ACK for new data processing. */
} else {
/* Out of sequence ACK, didn't really ack anything */

View File

@ -1419,6 +1419,11 @@ tcp_rexmit_rto(struct tcp_pcb *pcb)
/* unacked queue is now empty */
pcb->unacked = NULL;
/* Mark RTO in-progress */
pcb->flags |= TF_RTO;
/* Record the next byte following retransmit */
pcb->rto_end = lwip_ntohl(seg->tcphdr->seqno) + TCP_TCPLEN(seg);
/* increment number of retransmissions */
if (pcb->nrtx < 0xFF) {
++pcb->nrtx;

View File

@ -209,6 +209,7 @@ struct tcp_pcb {
#if LWIP_TCP_TIMESTAMPS
#define TF_TIMESTAMP 0x0400U /* Timestamp option enabled */
#endif
#define TF_RTO 0x0800U /* RTO timer has fired, in-flight data moved to unsent and being retransmitted */
/* the rest of the fields are in host byte order
as we have to do some math with them */
@ -245,6 +246,9 @@ struct tcp_pcb {
tcpwnd_size_t cwnd;
tcpwnd_size_t ssthresh;
/* first byte following last rto byte */
u32_t rto_end;
/* sender variables */
u32_t snd_nxt; /* next new seqno to be sent */
u32_t snd_wl1, snd_wl2; /* Sequence and acknowledgement numbers of last
@ -262,6 +266,8 @@ struct tcp_pcb {
u16_t unsent_oversize;
#endif /* TCP_OVERSIZE */
tcpwnd_size_t bytes_acked;
/* These are ordered by sequence number: */
struct tcp_seg *unsent; /* Unsent (queued) segments. */
struct tcp_seg *unacked; /* Sent but unacknowledged segments. */

View File

@ -695,6 +695,127 @@ START_TEST(test_tcp_tx_full_window_lost_from_unacked)
}
END_TEST
START_TEST(test_tcp_rto_tracking)
{
struct netif netif;
struct test_tcp_txcounters txcounters;
struct test_tcp_counters counters;
struct tcp_pcb* pcb;
struct pbuf* p;
err_t err;
u16_t i, sent_total = 0;
LWIP_UNUSED_ARG(_i);
for (i = 0; i < sizeof(tx_data); i++) {
tx_data[i] = (u8_t)i;
}
/* initialize local vars */
test_tcp_init_netif(&netif, &txcounters, &test_local_ip, &test_netmask);
memset(&counters, 0, sizeof(counters));
/* create and initialize the pcb */
tcp_ticks = SEQNO1 - ISS;
pcb = test_tcp_new_counters_pcb(&counters);
EXPECT_RET(pcb != NULL);
tcp_set_state(pcb, ESTABLISHED, &test_local_ip, &test_remote_ip, TEST_LOCAL_PORT, TEST_REMOTE_PORT);
pcb->mss = TCP_MSS;
/* Set congestion window large enough to send all our segments */
pcb->cwnd = 5*TCP_MSS;
/* send 5 mss-sized segments */
for (i = 0; i < 5; i++) {
err = tcp_write(pcb, &tx_data[sent_total], TCP_MSS, TCP_WRITE_FLAG_COPY);
EXPECT_RET(err == ERR_OK);
sent_total += TCP_MSS;
}
check_seqnos(pcb->unsent, 5, seqnos);
EXPECT(pcb->unacked == NULL);
err = tcp_output(pcb);
EXPECT(txcounters.num_tx_calls == 5);
EXPECT(txcounters.num_tx_bytes == 5 * (TCP_MSS + 40U));
memset(&txcounters, 0, sizeof(txcounters));
/* Check all 5 are in-flight */
EXPECT(pcb->unsent == NULL);
check_seqnos(pcb->unacked, 5, seqnos);
/* Force us into retransmisson timeout */
while (!(pcb->flags & TF_RTO)) {
test_tcp_tmr();
}
/* Ensure 4 remaining segments are back on unsent, ready for retransmission */
check_seqnos(pcb->unsent, 4, &seqnos[1]);
/* Ensure 1st segment is on unacked (already retransmitted) */
check_seqnos(pcb->unacked, 1, seqnos);
EXPECT(txcounters.num_tx_calls == 1);
EXPECT(txcounters.num_tx_bytes == TCP_MSS + 40U);
memset(&txcounters, 0, sizeof(txcounters));
/* Ensure rto_end points to next byte */
EXPECT(pcb->rto_end == seqnos[5]);
EXPECT(pcb->rto_end == pcb->snd_nxt);
/* Check cwnd was reset */
EXPECT(pcb->cwnd == pcb->mss);
/* Add another segment to send buffer which is outside of RTO */
err = tcp_write(pcb, &tx_data[sent_total], TCP_MSS, TCP_WRITE_FLAG_COPY);
EXPECT_RET(err == ERR_OK);
sent_total += TCP_MSS;
check_seqnos(pcb->unsent, 5, &seqnos[1]);
/* Ensure no new data was sent */
EXPECT(txcounters.num_tx_calls == 0);
EXPECT(txcounters.num_tx_bytes == 0);
EXPECT(pcb->rto_end == pcb->snd_nxt);
/* ACK first segment */
p = tcp_create_rx_segment(pcb, NULL, 0, 0, TCP_MSS, TCP_ACK);
test_tcp_input(p, &netif);
/* Next two retranmissions should go out, due to cwnd in slow start */
EXPECT(txcounters.num_tx_calls == 2);
EXPECT(txcounters.num_tx_bytes == 2 * (TCP_MSS + 40U));
memset(&txcounters, 0, sizeof(txcounters));
check_seqnos(pcb->unacked, 2, &seqnos[1]);
check_seqnos(pcb->unsent, 3, &seqnos[3]);
/* RTO should still be marked */
EXPECT(pcb->flags & TF_RTO);
/* cwnd should have only grown by 1 MSS */
EXPECT(pcb->cwnd == (tcpwnd_size_t)(2 * pcb->mss));
/* Ensure no new data was sent */
EXPECT(pcb->rto_end == pcb->snd_nxt);
/* ACK the next two segments */
p = tcp_create_rx_segment(pcb, NULL, 0, 0, 2*TCP_MSS, TCP_ACK);
test_tcp_input(p, &netif);
/* Final 2 retransmissions and 1 new data should go out */
EXPECT(txcounters.num_tx_calls == 3);
EXPECT(txcounters.num_tx_bytes == 3 * (TCP_MSS + 40U));
memset(&txcounters, 0, sizeof(txcounters));
check_seqnos(pcb->unacked, 3, &seqnos[3]);
EXPECT(pcb->unsent == NULL);
/* RTO should still be marked */
EXPECT(pcb->flags & TF_RTO);
/* cwnd should have only grown by 1 MSS */
EXPECT(pcb->cwnd == (tcpwnd_size_t)(3 * pcb->mss));
/* snd_nxt should have been advanced past rto_end */
EXPECT(TCP_SEQ_GT(pcb->snd_nxt, pcb->rto_end));
/* ACK the next two segments, finishing our RTO, leaving new segment unacked */
p = tcp_create_rx_segment(pcb, NULL, 0, 0, 2*TCP_MSS, TCP_ACK);
test_tcp_input(p, &netif);
EXPECT(!(pcb->flags & TF_RTO));
check_seqnos(pcb->unacked, 1, &seqnos[5]);
/* We should be in ABC congestion avoidance, so no change in cwnd */
EXPECT(pcb->cwnd == (tcpwnd_size_t)(3 * pcb->mss));
EXPECT(pcb->cwnd >= pcb->ssthresh);
/* Ensure ABC congestion avoidance is tracking bytes acked */
EXPECT(pcb->bytes_acked == (tcpwnd_size_t)(2 * pcb->mss));
/* make sure the pcb is freed */
EXPECT_RET(MEMP_STATS_GET(used, MEMP_TCP_PCB) == 1);
tcp_abort(pcb);
EXPECT_RET(MEMP_STATS_GET(used, MEMP_TCP_PCB) == 0);
}
END_TEST
/** Create the suite including all tests for this module */
Suite *
tcp_suite(void)
@ -707,7 +828,8 @@ tcp_suite(void)
TESTFUNC(test_tcp_fast_rexmit_wraparound),
TESTFUNC(test_tcp_rto_rexmit_wraparound),
TESTFUNC(test_tcp_tx_full_window_lost_from_unacked),
TESTFUNC(test_tcp_tx_full_window_lost_from_unsent)
TESTFUNC(test_tcp_tx_full_window_lost_from_unsent),
TESTFUNC(test_tcp_rto_tracking)
};
return create_suite("TCP", tests, sizeof(tests)/sizeof(testfunc), tcp_setup, tcp_teardown);
}