From 90873d6c71dc4398e6674ace49fdfc96305c9297 Mon Sep 17 00:00:00 2001 From: goldsimon Date: Tue, 8 Aug 2017 23:02:14 +0200 Subject: [PATCH] Continue fixing the spirit of bug #51663: don't change pcb state if retransmission can't be done because segments are still queued for transmission - add a better-documented static function tcp_output_segment_busy - try to reduce the number of checks - tcp_rexmit_rto: iterate pcb->unacked only once - no need to check for ref==1 in tcp_rexmit_fast when tcp_rexmit does - call tcp_rexmit_fast if dupacks >= 3 (not == 3) and use TF_INFR flag to guard the fast-rexmit case (that way, it's triggered again on the next dupack) --- src/core/tcp.c | 51 +++++------ src/core/tcp_in.c | 4 +- src/core/tcp_out.c | 145 ++++++++++++++++++++----------- src/include/lwip/priv/tcp_priv.h | 4 +- 4 files changed, 126 insertions(+), 78 deletions(-) diff --git a/src/core/tcp.c b/src/core/tcp.c index 95c7b011..d4aa3e02 100644 --- a/src/core/tcp.c +++ b/src/core/tcp.c @@ -1104,33 +1104,34 @@ tcp_slowtmr_start: LWIP_DEBUGF(TCP_RTO_DEBUG, ("tcp_slowtmr: rtime %"S16_F " pcb->rto %"S16_F"\n", pcb->rtime, pcb->rto)); + if (tcp_rexmit_rto_prepare(pcb) == ERR_OK) { + /* Double retransmission time-out unless we are trying to + * connect to somebody (i.e., we are in SYN_SENT). */ + if (pcb->state != SYN_SENT) { + u8_t backoff_idx = LWIP_MIN(pcb->nrtx, sizeof(tcp_backoff)-1); + int calc_rto = ((pcb->sa >> 3) + pcb->sv) << tcp_backoff[backoff_idx]; + pcb->rto = (s16_t)LWIP_MIN(calc_rto, 0x7FFF); + } - /* Double retransmission time-out unless we are trying to - * connect to somebody (i.e., we are in SYN_SENT). */ - if (pcb->state != SYN_SENT) { - u8_t backoff_idx = LWIP_MIN(pcb->nrtx, sizeof(tcp_backoff)-1); - int calc_rto = ((pcb->sa >> 3) + pcb->sv) << tcp_backoff[backoff_idx]; - pcb->rto = (s16_t)LWIP_MIN(calc_rto, 0x7FFF); + /* Reset the retransmission timer. */ + pcb->rtime = 0; + + /* Reduce congestion window and ssthresh. */ + eff_wnd = LWIP_MIN(pcb->cwnd, pcb->snd_wnd); + pcb->ssthresh = eff_wnd >> 1; + if (pcb->ssthresh < (tcpwnd_size_t)(pcb->mss << 1)) { + pcb->ssthresh = (tcpwnd_size_t)(pcb->mss << 1); + } + pcb->cwnd = pcb->mss; + LWIP_DEBUGF(TCP_CWND_DEBUG, ("tcp_slowtmr: cwnd %"TCPWNDSIZE_F + " ssthresh %"TCPWNDSIZE_F"\n", + pcb->cwnd, pcb->ssthresh)); + pcb->bytes_acked = 0; + + /* The following needs to be called AFTER cwnd is set to one + mss - STJ */ + tcp_rexmit_rto_commit(pcb); } - - /* Reset the retransmission timer. */ - pcb->rtime = 0; - - /* Reduce congestion window and ssthresh. */ - eff_wnd = LWIP_MIN(pcb->cwnd, pcb->snd_wnd); - pcb->ssthresh = eff_wnd >> 1; - if (pcb->ssthresh < (tcpwnd_size_t)(pcb->mss << 1)) { - pcb->ssthresh = (tcpwnd_size_t)(pcb->mss << 1); - } - pcb->cwnd = pcb->mss; - LWIP_DEBUGF(TCP_CWND_DEBUG, ("tcp_slowtmr: cwnd %"TCPWNDSIZE_F - " ssthresh %"TCPWNDSIZE_F"\n", - pcb->cwnd, pcb->ssthresh)); - pcb->bytes_acked = 0; - - /* The following needs to be called AFTER cwnd is set to one - mss - STJ */ - tcp_rexmit_rto(pcb); } } } diff --git a/src/core/tcp_in.c b/src/core/tcp_in.c index bee160bc..0d5438e9 100644 --- a/src/core/tcp_in.c +++ b/src/core/tcp_in.c @@ -1173,8 +1173,8 @@ tcp_receive(struct tcp_pcb *pcb) if ((tcpwnd_size_t)(pcb->cwnd + pcb->mss) > pcb->cwnd) { pcb->cwnd = (tcpwnd_size_t)(pcb->cwnd + pcb->mss); } - } else if (pcb->dupacks == 3) { - /* Do fast retransmit */ + } else if (pcb->dupacks >= 3) { + /* Do fast retransmit (checked via TF_INFR, not via dupacks count) */ tcp_rexmit_fast(pcb); } } diff --git a/src/core/tcp_out.c b/src/core/tcp_out.c index d744ed65..2ccaea57 100644 --- a/src/core/tcp_out.c +++ b/src/core/tcp_out.c @@ -1286,6 +1286,29 @@ output_done: return ERR_OK; } +/** Check if a segment's pbufs are used by someone else than TCP. + * This can happen on retransmission if the pbuf of this segment is still + * referenced by the netif driver due to deferred transmission. + * This is the case (only!) if someone down the TX call path called + * pbuf_ref() on one of the pbufs! + * + * @arg seg the tcp segment to check + * @return 1 if ref != 1, 0 if ref == 1 + */ +static int +tcp_output_segment_busy(struct tcp_seg *seg) +{ + /* We only need to check the first pbuf here: + If a pbuf is queued for transmission, a driver calls pbuf_ref(), + which only changes the ref count of the first pbuf */ + if (seg->p->ref != 1) { + /* other reference found */ + return 1; + } + /* no other references found */ + return 0; +} + /** * Called by tcp_output() to actually send a TCP segment over IP. * @@ -1300,10 +1323,10 @@ tcp_output_segment(struct tcp_seg *seg, struct tcp_pcb *pcb, struct netif *netif u16_t len; u32_t *opts; - if (seg->p->ref != 1) { - /* This can happen if the pbuf of this segment is still referenced by the - netif driver due to deferred transmission. Since this function modifies - p->len, we must not continue in this case. */ + if (tcp_output_segment_busy(seg)) { + /* This should not happen: rexmit functions should have checked this. + However, since this function modifies p->len, we must not continue in this case. */ + LWIP_DEBUGF(TCP_RTO_DEBUG|LWIP_DBG_LEVEL_SERIOUS, ("tcp_output_segment: segment busy\n")); return ERR_OK; } @@ -1514,27 +1537,29 @@ tcp_rst(const struct tcp_pcb *pcb, u32_t seqno, u32_t ackno, * * @param pcb the tcp_pcb for which to re-enqueue all unacked segments */ -void -tcp_rexmit_rto(struct tcp_pcb *pcb) +err_t +tcp_rexmit_rto_prepare(struct tcp_pcb *pcb) { struct tcp_seg *seg; if (pcb->unacked == NULL) { - return; + return ERR_VAL; } - /* Give up if any of the segment pbufs are still referenced by the netif - driver due to deferred transmission. No point loading the link further if - it is struggling to flush its buffered writes. */ - for (seg = pcb->unacked; seg != NULL; seg = seg->next) { - if (seg->p->ref != 1) { - LWIP_DEBUGF(TCP_RTO_DEBUG, ("tcp_rexmit_rto busy\n")); - return; + /* Move all unacked segments to the head of the unsent queue. + However, give up if any of the unsent pbufs are still referenced by the + netif driver due to deferred transmission. No point loading the link further + if it is struggling to flush its buffered writes. */ + for (seg = pcb->unacked; seg->next != NULL; seg = seg->next) { + if (tcp_output_segment_busy(seg)) { + LWIP_DEBUGF(TCP_RTO_DEBUG, ("tcp_rexmit_rto: segment busy\n")); + return ERR_VAL; } } - - /* Move all unacked segments to the head of the unsent queue */ - for (seg = pcb->unacked; seg->next != NULL; seg = seg->next); + if (tcp_output_segment_busy(seg)) { + LWIP_DEBUGF(TCP_RTO_DEBUG, ("tcp_rexmit_rto: segment busy\n")); + return ERR_VAL; + } /* concatenate unsent queue after unacked queue */ seg->next = pcb->unsent; #if TCP_OVERSIZE_DBGCHECK @@ -1552,19 +1577,45 @@ tcp_rexmit_rto(struct tcp_pcb *pcb) pcb->flags |= TF_RTO; /* Record the next byte following retransmit */ pcb->rto_end = lwip_ntohl(seg->tcphdr->seqno) + TCP_TCPLEN(seg); + /* Don't take any RTT measurements after retransmitting. */ + pcb->rttest = 0; + return ERR_OK; +} + +/** + * Requeue all unacked segments for retransmission + * + * Called by tcp_slowtmr() for slow retransmission. + * + * @param pcb the tcp_pcb for which to re-enqueue all unacked segments + */ +void +tcp_rexmit_rto_commit(struct tcp_pcb *pcb) +{ /* increment number of retransmissions */ if (pcb->nrtx < 0xFF) { ++pcb->nrtx; } - - /* Don't take any RTT measurements after retransmitting. */ - pcb->rttest = 0; - /* Do the actual retransmission */ tcp_output(pcb); } +/** + * Requeue all unacked segments for retransmission + * + * Called by tcp_slowtmr() for slow retransmission. + * + * @param pcb the tcp_pcb for which to re-enqueue all unacked segments + */ +void +tcp_rexmit_rto(struct tcp_pcb *pcb) +{ + if (tcp_rexmit_rto_prepare(pcb) == ERR_OK) { + tcp_rexmit_rto_commit(pcb); + } +} + /** * Requeue the first unacked segment for retransmission * @@ -1572,23 +1623,23 @@ tcp_rexmit_rto(struct tcp_pcb *pcb) * * @param pcb the tcp_pcb for which to retransmit the first unacked segment */ -void +err_t tcp_rexmit(struct tcp_pcb *pcb) { struct tcp_seg *seg; struct tcp_seg **cur_seg; if (pcb->unacked == NULL) { - return; + return ERR_VAL; } seg = pcb->unacked; - /* Give up if the first segment pbuf is still referenced by the netif driver + /* Give up if the segment is still referenced by the netif driver due to deferred transmission. */ - if (seg->p->ref != 1) { + if (tcp_output_segment_busy(seg)) { LWIP_DEBUGF(TCP_RTO_DEBUG, ("tcp_rexmit busy\n")); - return; + return ERR_VAL; } /* Move the first unacked segment to the unsent queue */ @@ -1620,6 +1671,7 @@ tcp_rexmit(struct tcp_pcb *pcb) MIB2_STATS_INC(mib2.tcpretranssegs); /* No need to call tcp_output: we are always called from tcp_input() and thus tcp_output directly returns. */ + return ERR_OK; } @@ -1632,39 +1684,32 @@ void tcp_rexmit_fast(struct tcp_pcb *pcb) { if (pcb->unacked != NULL && !(pcb->flags & TF_INFR)) { - /* Give up if the first segment pbuf is still referenced by the netif driver - due to deferred transmission. */ - if (pcb->unacked->p->ref != 1) { - LWIP_DEBUGF(TCP_RTO_DEBUG, ("tcp_rexmit_fast busy\n")); - return; - } - /* This is fast retransmit. Retransmit the first unacked segment. */ LWIP_DEBUGF(TCP_FR_DEBUG, ("tcp_receive: dupacks %"U16_F" (%"U32_F "), fast retransmit %"U32_F"\n", (u16_t)pcb->dupacks, pcb->lastack, lwip_ntohl(pcb->unacked->tcphdr->seqno))); - tcp_rexmit(pcb); + if (tcp_rexmit(pcb) == ERR_OK) { + /* Set ssthresh to half of the minimum of the current + * cwnd and the advertised window */ + pcb->ssthresh = LWIP_MIN(pcb->cwnd, pcb->snd_wnd) / 2; - /* Set ssthresh to half of the minimum of the current - * cwnd and the advertised window */ - pcb->ssthresh = LWIP_MIN(pcb->cwnd, pcb->snd_wnd) / 2; + /* The minimum value for ssthresh should be 2 MSS */ + if (pcb->ssthresh < (2U * pcb->mss)) { + LWIP_DEBUGF(TCP_FR_DEBUG, + ("tcp_receive: The minimum value for ssthresh %"TCPWNDSIZE_F + " should be min 2 mss %"U16_F"...\n", + pcb->ssthresh, (u16_t)(2*pcb->mss))); + pcb->ssthresh = 2 * pcb->mss; + } - /* The minimum value for ssthresh should be 2 MSS */ - if (pcb->ssthresh < (2U * pcb->mss)) { - LWIP_DEBUGF(TCP_FR_DEBUG, - ("tcp_receive: The minimum value for ssthresh %"TCPWNDSIZE_F - " should be min 2 mss %"U16_F"...\n", - pcb->ssthresh, (u16_t)(2*pcb->mss))); - pcb->ssthresh = 2*pcb->mss; + pcb->cwnd = pcb->ssthresh + 3 * pcb->mss; + pcb->flags |= TF_INFR; + + /* Reset the retransmission timer to prevent immediate rto retransmissions */ + pcb->rtime = 0; } - - pcb->cwnd = pcb->ssthresh + 3 * pcb->mss; - pcb->flags |= TF_INFR; - - /* Reset the retransmission timer to prevent immediate rto retransmissions */ - pcb->rtime = 0; } } diff --git a/src/include/lwip/priv/tcp_priv.h b/src/include/lwip/priv/tcp_priv.h index 75f7fbf3..7e02b604 100644 --- a/src/include/lwip/priv/tcp_priv.h +++ b/src/include/lwip/priv/tcp_priv.h @@ -79,7 +79,9 @@ void tcp_input (struct pbuf *p, struct netif *inp); struct tcp_pcb * tcp_alloc (u8_t prio); void tcp_abandon (struct tcp_pcb *pcb, int reset); err_t tcp_send_empty_ack(struct tcp_pcb *pcb); -void tcp_rexmit (struct tcp_pcb *pcb); +err_t tcp_rexmit (struct tcp_pcb *pcb); +err_t tcp_rexmit_rto_prepare(struct tcp_pcb *pcb); +void tcp_rexmit_rto_commit(struct tcp_pcb *pcb); void tcp_rexmit_rto (struct tcp_pcb *pcb); void tcp_rexmit_fast (struct tcp_pcb *pcb); u32_t tcp_update_rcv_ann_wnd(struct tcp_pcb *pcb);