From f1a9f7ea701a836d83211cbf1961b2addbceaa05 Mon Sep 17 00:00:00 2001 From: kieranm Date: Tue, 31 Mar 2009 14:23:40 +0000 Subject: [PATCH] BUG20515: rework way TCP window updates are calculated and sent --- CHANGELOG | 3 ++ src/core/tcp.c | 72 +++++++++++++++++++++++------------------- src/core/tcp_in.c | 15 +++------ src/core/tcp_out.c | 57 ++++++++++++++++----------------- src/include/lwip/opt.h | 11 ++++++- src/include/lwip/tcp.h | 6 ++-- 6 files changed, 90 insertions(+), 74 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 913b33eb..ec799b72 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -81,6 +81,9 @@ HISTORY ++ Bugfixes: 2009-03-31 Kieran Mansley + * tcp.c, tcp_in.c, tcp_out.c, tcp.h, opt.h: Rework the way window + updates are calculated and sent (BUG20515) + * tcp_in.c: cope with SYN packets received during established states, and retransmission of initial SYN. diff --git a/src/core/tcp.c b/src/core/tcp.c index 55e068c8..1dfe62c1 100644 --- a/src/core/tcp.c +++ b/src/core/tcp.c @@ -383,6 +383,33 @@ tcp_listen_with_backlog(struct tcp_pcb *pcb, u8_t backlog) return (struct tcp_pcb *)lpcb; } +/** + * Update the state that tracks the available window space to advertise. + * + * Returns how much extra window would be advertised if we sent an + * update now. + */ +u32_t tcp_update_rcv_ann_wnd(struct tcp_pcb *pcb) +{ + u32_t new_right_edge = pcb->rcv_nxt + pcb->rcv_wnd; + + if (TCP_SEQ_GEQ(new_right_edge, pcb->rcv_ann_right_edge + pcb->mss)) { + /* we can advertise more window */ + pcb->rcv_ann_wnd = pcb->rcv_wnd; + return new_right_edge - pcb->rcv_ann_right_edge; + } else { + if (TCP_SEQ_GT(pcb->rcv_nxt, pcb->rcv_ann_right_edge)) { + /* Can happen due to other end sending out of advertised window, + * but within actual available (but not yet advertised) window */ + pcb->rcv_ann_wnd = 0; + } else { + /* keep the right edge of window constant */ + pcb->rcv_ann_wnd = pcb->rcv_ann_right_edge - pcb->rcv_nxt; + } + return 0; + } +} + /** * This function should be called by the application when it has * processed the data. The purpose is to advertise a larger window @@ -394,40 +421,20 @@ tcp_listen_with_backlog(struct tcp_pcb *pcb, u8_t backlog) void tcp_recved(struct tcp_pcb *pcb, u16_t len) { - if ((u32_t)pcb->rcv_wnd + len > TCP_WND) { - pcb->rcv_wnd = TCP_WND; - pcb->rcv_ann_wnd = TCP_WND; - } else { - pcb->rcv_wnd += len; - if (pcb->rcv_wnd >= pcb->mss) { - pcb->rcv_ann_wnd = pcb->rcv_wnd; - } - } + int wnd_inflation; - if (!(pcb->flags & TF_ACK_DELAY) && - !(pcb->flags & TF_ACK_NOW)) { - /* - * We send an ACK here (if one is not already pending, hence - * the above tests) as tcp_recved() implies that the application - * has processed some data, and so we can open the receiver's - * window to allow more to be transmitted. This could result in - * two ACKs being sent for each received packet in some limited cases - * (where the application is only receiving data, and is slow to - * process it) but it is necessary to guarantee that the sender can - * continue to transmit. - */ - tcp_ack(pcb); - } - else if (pcb->flags & TF_ACK_DELAY && pcb->rcv_wnd >= TCP_WND/2) { - /* If we can send a window update such that there is a full - * segment available in the window, do so now. This is sort of - * nagle-like in its goals, and tries to hit a compromise between - * sending acks each time the window is updated, and only sending - * window updates when a timer expires. The "threshold" used - * above (currently TCP_WND/2) can be tuned to be more or less - * aggressive */ + pcb->rcv_wnd += len; + if (pcb->rcv_wnd > TCP_WND) + pcb->rcv_wnd = TCP_WND; + + wnd_inflation = tcp_update_rcv_ann_wnd(pcb); + + /* If the change in the right edge of window is significant (default + * watermark is TCP_WND/2), then send an explicit update now. + * Otherwise wait for a packet to be sent in the normal course of + * events (or more window to be available later) */ + if (wnd_inflation >= TCP_WND_UPDATE_THRESHOLD) tcp_ack_now(pcb); - } LWIP_DEBUGF(TCP_DEBUG, ("tcp_recved: recveived %"U16_F" bytes, wnd %"U16_F" (%"U16_F").\n", len, pcb->rcv_wnd, TCP_WND - pcb->rcv_wnd)); @@ -510,6 +517,7 @@ tcp_connect(struct tcp_pcb *pcb, struct ip_addr *ipaddr, u16_t port, pcb->snd_lbb = iss - 1; pcb->rcv_wnd = TCP_WND; pcb->rcv_ann_wnd = TCP_WND; + pcb->rcv_ann_right_edge = pcb->rcv_nxt; pcb->snd_wnd = TCP_WND; /* As initial send MSS, we use TCP_MSS but limit it to 536. The send MSS is updated when an MSS option is received. */ diff --git a/src/core/tcp_in.c b/src/core/tcp_in.c index c966d81f..b88bccee 100644 --- a/src/core/tcp_in.c +++ b/src/core/tcp_in.c @@ -430,6 +430,7 @@ tcp_listen_input(struct tcp_pcb_listen *pcb) npcb->remote_port = tcphdr->src; npcb->state = SYN_RCVD; npcb->rcv_nxt = seqno + 1; + npcb->rcv_ann_right_edge = npcb->rcv_nxt; npcb->snd_wnd = tcphdr->wnd; npcb->ssthresh = npcb->snd_wnd; npcb->snd_wl1 = seqno - 1;/* initialise to seqno-1 to force window update */ @@ -561,6 +562,7 @@ tcp_process(struct tcp_pcb *pcb) && ackno == ntohl(pcb->unacked->tcphdr->seqno) + 1) { pcb->snd_buf++; pcb->rcv_nxt = seqno + 1; + pcb->rcv_ann_right_edge = pcb->rcv_nxt; pcb->lastack = ackno; pcb->snd_wnd = tcphdr->wnd; pcb->snd_wl1 = seqno - 1; /* initialise to seqno - 1 to force window update */ @@ -1095,11 +1097,7 @@ tcp_receive(struct tcp_pcb *pcb) pcb->rcv_wnd -= tcplen; } - if (pcb->rcv_ann_wnd < tcplen) { - pcb->rcv_ann_wnd = 0; - } else { - pcb->rcv_ann_wnd -= tcplen; - } + tcp_update_rcv_ann_wnd(pcb); /* If there is data in the segment, we make preparations to pass this up to the application. The ->recv_data variable @@ -1137,11 +1135,8 @@ tcp_receive(struct tcp_pcb *pcb) } else { pcb->rcv_wnd -= TCP_TCPLEN(cseg); } - if (pcb->rcv_ann_wnd < TCP_TCPLEN(cseg)) { - pcb->rcv_ann_wnd = 0; - } else { - pcb->rcv_ann_wnd -= TCP_TCPLEN(cseg); - } + + tcp_update_rcv_ann_wnd(pcb); if (cseg->p->tot_len > 0) { /* Chain this pbuf onto the pbuf that we will pass to diff --git a/src/core/tcp_out.c b/src/core/tcp_out.c index 2dba5915..653ce4ae 100644 --- a/src/core/tcp_out.c +++ b/src/core/tcp_out.c @@ -59,6 +59,26 @@ /* Forward declarations.*/ static void tcp_output_segment(struct tcp_seg *seg, struct tcp_pcb *pcb); +static struct tcp_hdr * +tcp_output_set_header(struct tcp_pcb *pcb, struct pbuf *p, int optlen) +{ + struct tcp_hdr *tcphdr = p->payload; + tcphdr->src = htons(pcb->local_port); + tcphdr->dest = htons(pcb->remote_port); + tcphdr->seqno = htonl(pcb->snd_nxt); + tcphdr->ackno = htonl(pcb->rcv_nxt); + TCPH_FLAGS_SET(tcphdr, TCP_ACK); + tcphdr->wnd = htons(pcb->rcv_ann_wnd); + tcphdr->urgp = 0; + TCPH_HDRLEN_SET(tcphdr, (5 + optlen / 4)); + tcphdr->chksum = 0; + + /* If we're sending a packet, update the announced right window edge */ + pcb->rcv_ann_right_edge = pcb->rcv_nxt + pcb->rcv_ann_wnd; + + return tcphdr; +} + /** * Called by tcp_close() to send a segment including flags but not data. * @@ -466,15 +486,7 @@ tcp_output(struct tcp_pcb *pcb) /* remove ACK flags from the PCB, as we send an empty ACK now */ pcb->flags &= ~(TF_ACK_DELAY | TF_ACK_NOW); - tcphdr = p->payload; - tcphdr->src = htons(pcb->local_port); - tcphdr->dest = htons(pcb->remote_port); - tcphdr->seqno = htonl(pcb->snd_nxt); - tcphdr->ackno = htonl(pcb->rcv_nxt); - TCPH_FLAGS_SET(tcphdr, TCP_ACK); - tcphdr->wnd = htons(pcb->rcv_ann_wnd); - tcphdr->urgp = 0; - TCPH_HDRLEN_SET(tcphdr, (5 + optlen / 4)); + tcphdr = tcp_output_set_header(pcb, p, optlen); /* NB. MSS option is only sent on SYNs, so ignore it here */ #if LWIP_TCP_TIMESTAMPS @@ -484,7 +496,6 @@ tcp_output(struct tcp_pcb *pcb) tcp_build_timestamp_option(pcb, (u32_t *)(tcphdr + 1)); #endif - tcphdr->chksum = 0; #if CHECKSUM_GEN_TCP tcphdr->chksum = inet_chksum_pseudo(p, &(pcb->local_ip), &(pcb->remote_ip), IP_PROTO_TCP, p->tot_len); @@ -630,6 +641,8 @@ tcp_output_segment(struct tcp_seg *seg, struct tcp_pcb *pcb) /* advertise our receive window size in this TCP segment */ seg->tcphdr->wnd = htons(pcb->rcv_ann_wnd); + pcb->rcv_ann_right_edge = pcb->rcv_nxt + pcb->rcv_ann_wnd; + /* Add any requested options. NB MSS option is only set on SYN packets, so ignore it here */ opts = (u32_t *)(seg->tcphdr + 1); @@ -862,17 +875,10 @@ tcp_keepalive(struct tcp_pcb *pcb) LWIP_ASSERT("check that first pbuf can hold struct tcp_hdr", (p->len >= sizeof(struct tcp_hdr))); - tcphdr = p->payload; - tcphdr->src = htons(pcb->local_port); - tcphdr->dest = htons(pcb->remote_port); - tcphdr->seqno = htonl(pcb->snd_nxt - 1); - tcphdr->ackno = htonl(pcb->rcv_nxt); - TCPH_FLAGS_SET(tcphdr, TCP_ACK); - tcphdr->wnd = htons(pcb->rcv_ann_wnd); - tcphdr->urgp = 0; - TCPH_HDRLEN_SET(tcphdr, 5); + tcphdr = tcp_output_set_header(pcb, p, 0); + + tcphdr->seqno = htonl(pcb->snd_nxt - 1); - tcphdr->chksum = 0; #if CHECKSUM_GEN_TCP tcphdr->chksum = inet_chksum_pseudo(p, &pcb->local_ip, &pcb->remote_ip, IP_PROTO_TCP, p->tot_len); @@ -945,20 +951,13 @@ tcp_zero_window_probe(struct tcp_pcb *pcb) LWIP_ASSERT("check that first pbuf can hold struct tcp_hdr", (p->len >= sizeof(struct tcp_hdr))); - tcphdr = p->payload; - tcphdr->src = htons(pcb->local_port); - tcphdr->dest = htons(pcb->remote_port); + tcphdr = tcp_output_set_header(pcb, p, 0); + tcphdr->seqno = seg->tcphdr->seqno; - tcphdr->ackno = htonl(pcb->rcv_nxt); - TCPH_FLAGS_SET(tcphdr, TCP_ACK); - tcphdr->wnd = htons(pcb->rcv_ann_wnd); - tcphdr->urgp = 0; - TCPH_HDRLEN_SET(tcphdr, 5); /* Copy in one byte from the head of the unacked queue */ *((char *)p->payload + sizeof(struct tcp_hdr)) = *(char *)seg->dataptr; - tcphdr->chksum = 0; #if CHECKSUM_GEN_TCP tcphdr->chksum = inet_chksum_pseudo(p, &pcb->local_ip, &pcb->remote_ip, IP_PROTO_TCP, p->tot_len); diff --git a/src/include/lwip/opt.h b/src/include/lwip/opt.h index 16807be5..31aba4b2 100644 --- a/src/include/lwip/opt.h +++ b/src/include/lwip/opt.h @@ -681,7 +681,8 @@ #endif /** - * TCP_WND: The size of a TCP window. + * TCP_WND: The size of a TCP window. This must be at least + * (2 * TCP_MSS) for things to work well */ #ifndef TCP_WND #define TCP_WND 2048 @@ -780,6 +781,14 @@ #define LWIP_TCP_TIMESTAMPS 0 #endif +/** + * TCP_WND_UPDATE_THRESHOLD: difference in window to trigger an + * explicit window update + */ +#ifndef TCP_WND_UPDATE_THRESHOLD +#define TCP_WND_UPDATE_THRESHOLD (TCP_WND / 4) +#endif + /** * LWIP_EVENT_API and LWIP_CALLBACK_API: Only one of these should be set to 1. * LWIP_EVENT_API==1: The user defines lwip_tcp_event() to receive all diff --git a/src/include/lwip/tcp.h b/src/include/lwip/tcp.h index 83841571..e9e82fb6 100644 --- a/src/include/lwip/tcp.h +++ b/src/include/lwip/tcp.h @@ -125,6 +125,7 @@ void tcp_input (struct pbuf *p, struct netif *inp); err_t tcp_output (struct tcp_pcb *pcb); void tcp_rexmit (struct tcp_pcb *pcb); void tcp_rexmit_rto (struct tcp_pcb *pcb); +u32_t tcp_update_rcv_ann_wnd(struct tcp_pcb *pcb); /** * This is the Nagle algorithm: inhibit the sending of new TCP @@ -304,8 +305,9 @@ struct tcp_pcb { as we have to do some math with them */ /* receiver variables */ u32_t rcv_nxt; /* next seqno expected */ - u16_t rcv_wnd; /* receiver window */ - u16_t rcv_ann_wnd; /* announced receive window */ + u16_t rcv_wnd; /* receiver window available */ + u16_t rcv_ann_wnd; /* receiver window to announce */ + u32_t rcv_ann_right_edge; /* announced right edge of window */ /* Timers */ u32_t tmr;