From b1a3c37c3c5b38e8cee9f77c1bf048b11dcadc0f Mon Sep 17 00:00:00 2001 From: Jakub Schmidtke Date: Thu, 29 Jun 2017 17:06:16 -0400 Subject: [PATCH] Partial SACK (RFC 2018) support Adds partial support for selective acknowledgements (RFC 2018). This change makes lwIP negotiate SACK support, and include SACK data in outgoing empty ACK packets. It does not include it in outgoing packets with data payload. It also does not add support for handling incoming SACKs. Signed-off-by: goldsimon --- src/core/init.c | 6 + src/core/tcp.c | 6 + src/core/tcp_in.c | 251 ++++++++++++++++++++++++++++++- src/core/tcp_out.c | 111 +++++++++++++- src/include/lwip/opt.h | 21 +++ src/include/lwip/priv/tcp_priv.h | 16 +- src/include/lwip/tcp.h | 16 ++ 7 files changed, 417 insertions(+), 10 deletions(-) diff --git a/src/core/init.c b/src/core/init.c index a361cb20..31bb6b74 100644 --- a/src/core/init.c +++ b/src/core/init.c @@ -164,6 +164,12 @@ PACK_STRUCT_END #if (LWIP_TCP && TCP_LISTEN_BACKLOG && ((TCP_DEFAULT_LISTEN_BACKLOG < 0) || (TCP_DEFAULT_LISTEN_BACKLOG > 0xff))) #error "If you want to use TCP backlog, TCP_DEFAULT_LISTEN_BACKLOG must fit into an u8_t" #endif +#if (LWIP_TCP && LWIP_TCP_SACK_OUT && !TCP_QUEUE_OOSEQ) +#error "To use LWIP_TCP_SACK_OUT, TCP_QUEUE_OOSEQ needs to be enabled" +#endif +#if (LWIP_TCP && LWIP_TCP_SACK_OUT && (LWIP_TCP_MAX_SACK_NUM < 1)) +#error "LWIP_TCP_MAX_SACK_NUM must be greater than 0" +#endif #if (LWIP_NETIF_API && (NO_SYS==1)) #error "If you want to use NETIF API, you have to define NO_SYS=0 in your lwipopts.h" #endif diff --git a/src/core/tcp.c b/src/core/tcp.c index 88a2ea5b..a3b02a3a 100644 --- a/src/core/tcp.c +++ b/src/core/tcp.c @@ -1179,6 +1179,9 @@ tcp_slowtmr_start: tcp_segs_free(pcb->ooseq); pcb->ooseq = NULL; LWIP_DEBUGF(TCP_CWND_DEBUG, ("tcp_slowtmr: dropping OOSEQ queued data\n")); +#if LWIP_TCP_SACK_OUT + memset(pcb->rcv_sacks, 0, sizeof(pcb->rcv_sacks)); +#endif } #endif /* TCP_QUEUE_OOSEQ */ @@ -1888,6 +1891,9 @@ tcp_pcb_purge(struct tcp_pcb *pcb) } tcp_segs_free(pcb->ooseq); pcb->ooseq = NULL; +#if LWIP_TCP_SACK_OUT + memset(pcb->rcv_sacks, 0, sizeof(pcb->rcv_sacks)); +#endif #endif /* TCP_QUEUE_OOSEQ */ /* Stop the retransmission timer as it will expect data on unacked diff --git a/src/core/tcp_in.c b/src/core/tcp_in.c index e3234d39..11eb11a1 100644 --- a/src/core/tcp_in.c +++ b/src/core/tcp_in.c @@ -89,6 +89,12 @@ static void tcp_parseopt(struct tcp_pcb *pcb); static void tcp_listen_input(struct tcp_pcb_listen *pcb); static void tcp_timewait_input(struct tcp_pcb *pcb); +#if LWIP_TCP_SACK_OUT +static void tcp_add_sack(struct tcp_pcb *pcb, u32_t left, u32_t right); +static void tcp_remove_sacks_lt(struct tcp_pcb *pcb, u32_t seq); +static void tcp_remove_sacks_gt(struct tcp_pcb *pcb, u32_t seq); +#endif + /** * The initial input processing of TCP. It verifies the TCP header, demultiplexes * the segment between the PCBs and passes it on to tcp_process(), which implements @@ -1485,6 +1491,19 @@ tcp_receive(struct tcp_pcb *pcb) } pcb->ooseq = next; } + +#if LWIP_TCP_SACK_OUT + if (pcb->flags & TF_SACK) { + if (pcb->ooseq != NULL) { + /* Some segments may have been removed from ooseq, let's remove all SACKs that + describe anything before the new beginning of that list. */ + tcp_remove_sacks_lt(pcb, pcb->ooseq->tcphdr->seqno); + } else { + /* ooseq has been cleared. Nothing to SACK */ + memset(pcb->rcv_sacks, 0, sizeof(pcb->rcv_sacks)); + } + } +#endif } #endif /* TCP_QUEUE_OOSEQ */ @@ -1563,6 +1582,16 @@ tcp_receive(struct tcp_pcb *pcb) /* Acknowledge the segment(s). */ tcp_ack(pcb); +#if LWIP_TCP_SACK_OUT + if (pcb->rcv_sacks[0].left != pcb->rcv_sacks[0].right) { + /* Normally the ACK for the data received could be piggy-backed on a data packet, + but lwIP currently does not support including SACKs in data packets. So we force + it to respond with an empty ACK packet (only if there is at least one SACK to be sent). + NOTE: tcp_send_empty_ack() on success clears the ACK flags (set by tcp_ack()) */ + tcp_send_empty_ack(pcb); + } +#endif + #if LWIP_IPV6 && LWIP_ND6_TCP_REACHABILITY_HINTS if (ip_current_is_v6()) { /* Inform neighbor reachability of forward progress. */ @@ -1572,11 +1601,18 @@ tcp_receive(struct tcp_pcb *pcb) } else { /* We get here if the incoming segment is out-of-sequence. */ - tcp_send_empty_ack(pcb); + #if TCP_QUEUE_OOSEQ /* We queue the segment on the ->ooseq queue. */ if (pcb->ooseq == NULL) { pcb->ooseq = tcp_seg_copy(&inseg); +#if LWIP_TCP_SACK_OUT + if (pcb->flags & TF_SACK) { + /* All the SACKs should be invalid, so we can simply store the most recent one: */ + pcb->rcv_sacks[0].left = seqno; + pcb->rcv_sacks[0].right = seqno + inseg.len; + } +#endif } else { /* If the queue is not empty, we walk through the queue and try to find a place where the sequence number of the @@ -1590,6 +1626,11 @@ tcp_receive(struct tcp_pcb *pcb) segment on the ->ooseq queue, we discard the segment that contains less data. */ +#if LWIP_TCP_SACK_OUT + /* This is the left edge of the lowest possible SACK range. + It may start before the newly received segment. */ + u32_t sackbeg = TCP_SEQ_LT(seqno, pcb->ooseq->tcphdr->seqno) ? seqno : pcb->ooseq->tcphdr->seqno; +#endif prev = NULL; for (next = pcb->ooseq; next != NULL; next = next->next) { if (seqno == next->tcphdr->seqno) { @@ -1653,6 +1694,20 @@ tcp_receive(struct tcp_pcb *pcb) break; } } + +#if LWIP_TCP_SACK_OUT + /* The new segment goes after the 'next' one. If there is a "hole" in sequence numbers + between 'prev' and the beginning of 'next', we want to move sackbeg. */ + if (prev != NULL && prev->tcphdr->seqno + prev->len != next->tcphdr->seqno) { + sackbeg = next->tcphdr->seqno; + } +#endif + + /* We don't use 'prev' below, so let's set it to current 'next'. + This way even if we break the loop below, 'prev' will be pointing + at the segment right in front of the newly added one. */ + prev = next; + /* If the "next" segment is the last segment on the ooseq queue, we add the incoming segment to the end of the list. */ @@ -1691,8 +1746,33 @@ tcp_receive(struct tcp_pcb *pcb) break; } } - prev = next; } + +#if LWIP_TCP_SACK_OUT + if (pcb->flags & TF_SACK) { + if (prev == NULL) { + /* The new segment is at the beginning. sackbeg should already be set properly. + We need to find the right edge. */ + next = pcb->ooseq; + } else if (prev->next != NULL) { + /* The new segment was added after 'prev'. If there is a "hole" between 'prev' and 'prev->next', + we need to move sackbeg. After that we should find the right edge. */ + next = prev->next; + if (prev->tcphdr->seqno + prev->len != next->tcphdr->seqno) { + sackbeg = next->tcphdr->seqno; + } + } else { + next = NULL; + } + if (next != NULL) { + u32_t sackend = next->tcphdr->seqno; + for ( ; (next != NULL) && (sackend == next->tcphdr->seqno); next = next->next) { + sackend += next->len; + } + tcp_add_sack(pcb, sackbeg, sackend); + } + } +#endif /* LWIP_TCP_SACK_OUT */ } #if TCP_OOSEQ_MAX_BYTES || TCP_OOSEQ_MAX_PBUFS /* Check that the data on ooseq doesn't exceed one of the limits @@ -1706,6 +1786,12 @@ tcp_receive(struct tcp_pcb *pcb) ooseq_qlen += pbuf_clen(p); if ((ooseq_blen > TCP_OOSEQ_MAX_BYTES) || (ooseq_qlen > TCP_OOSEQ_MAX_PBUFS)) { +#if LWIP_TCP_SACK_OUT + if (pcb->flags & TF_SACK) { + /* Let's remove all SACKs from next's seqno up. */ + tcp_remove_sacks_gt(pcb, next->tcphdr->seqno); + } +#endif /* too much ooseq data, dump this and everything after it */ tcp_segs_free(next); if (prev == NULL) { @@ -1720,6 +1806,10 @@ tcp_receive(struct tcp_pcb *pcb) } #endif /* TCP_OOSEQ_MAX_BYTES || TCP_OOSEQ_MAX_PBUFS */ #endif /* TCP_QUEUE_OOSEQ */ + + /* We send the ACK packet after we've (potentially) dealt with SACKs, + so they can be included in the acknowledgment. */ + tcp_send_empty_ack(pcb); } } else { /* The incoming segment is not within the window. */ @@ -1839,6 +1929,21 @@ tcp_parseopt(struct tcp_pcb *pcb) /* Advance to next option (6 bytes already read) */ tcp_optidx += LWIP_TCP_OPT_LEN_TS - 6; break; +#endif +#if LWIP_TCP_SACK_OUT + case LWIP_TCP_OPT_SACK_PERM: + LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_parseopt: SACK_PERM\n")); + if (tcp_getoptbyte() != LWIP_TCP_OPT_LEN_SACK_PERM || (tcp_optidx - 2 + LWIP_TCP_OPT_LEN_SACK_PERM) > tcphdr_optlen) { + /* Bad length */ + LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_parseopt: bad length\n")); + return; + } + /* TCP SACK_PERM option with valid length */ + if (flags & TCP_SYN) { + /* We only set it if we receive it in a SYN (or SYN+ACK) packet */ + pcb->flags |= TF_SACK; + } + break; #endif default: LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_parseopt: other\n")); @@ -1863,4 +1968,146 @@ tcp_trigger_input_pcb_close(void) recv_flags |= TF_CLOSED; } +#if LWIP_TCP_SACK_OUT +/** + * Called by tcp_receive() to add new SACK entry. + * + * The new SACK entry will be placed at the beginning of rcv_sacks[], as the newest one. + * Existing SACK entries will be "pushed back", to preserve their order. + * This is the behavior described in RFC 2018, section 4. + * + * @param pcb the tcp_pcb for which a segment arrived + * @param left the left side of the SACK (the first sequence number) + * @param right the right side of the SACK (the first sequence number past this SACK) + */ +static void +tcp_add_sack(struct tcp_pcb *pcb, u32_t left, u32_t right) +{ + u8_t i; + u8_t unused_idx; + + if ((pcb->flags & TF_SACK) == 0 || !TCP_SEQ_LT(left, right)) { + return; + } + + /* First, let's remove all SACKs that are no longer needed (because they overlap with the newest one), + while moving all other SACKs forward. + We run this loop for all entries, until we find the first invalid one. + There is no point checking after that. */ + for (i = unused_idx = 0; (i < LWIP_TCP_MAX_SACK_NUM) && (pcb->rcv_sacks[i].left != pcb->rcv_sacks[i].right); ++i) { + /* We only want to use SACK at [i] if it doesn't overlap with left:right range. + It does not overlap if its right side is before the newly added SACK, + or if its left side is after the newly added SACK. + NOTE: The equality should not really happen, but it doesn't hurt. */ + if (TCP_SEQ_LEQ(pcb->rcv_sacks[i].right, left) || TCP_SEQ_LEQ(right, pcb->rcv_sacks[i].left)) { + if (unused_idx != i) { + /* We don't need to copy if it's already in the right spot */ + pcb->rcv_sacks[unused_idx] = pcb->rcv_sacks[i]; + } + ++unused_idx; + } + } + + /* Now 'unused_idx' is the index of the first invalid SACK entry, + anywhere between 0 (no valid entries) and LWIP_TCP_MAX_SACK_NUM (all entries are valid). + We want to clear this and all following SACKs. + However, we will be adding another one in the front (and shifting everything else back). + So let's just iterate from the back, and set each entry to the one to the left if it's valid, + or to 0 if it is not. */ + for (i = LWIP_TCP_MAX_SACK_NUM - 1; i > 0; --i) { + /* [i] is the index we are setting, and the value should be at index [i-1], + or 0 if that index is unused (>= unused_idx). */ + if (i-1 >= unused_idx) { + /* [i-1] is unused. Let's clear [i]. */ + pcb->rcv_sacks[i].left = pcb->rcv_sacks[i].right = 0; + } else { + pcb->rcv_sacks[i] = pcb->rcv_sacks[i-1]; + } + } + + /* And now we can store the newest SACK */ + pcb->rcv_sacks[0].left = left; + pcb->rcv_sacks[0].right = right; +} + +/** + * Called to remove a range of SACKs. + * + * SACK entries will be removed or adjusted to not acknowledge any sequence + * numbers that are less than 'seq' passed. It not only invalidates entries, + * but also moves all entries that are still valid to the beginning. + * + * @param pcb the tcp_pcb to modify + * @param seq the lowest sequence number to keep in SACK entries + */ +static void +tcp_remove_sacks_lt(struct tcp_pcb *pcb, u32_t seq) +{ + u8_t i; + u8_t unused_idx; + + /* We run this loop for all entries, until we find the first invalid one. + There is no point checking after that. */ + for (i = unused_idx = 0; (i < LWIP_TCP_MAX_SACK_NUM) && (pcb->rcv_sacks[i].left != pcb->rcv_sacks[i].right); ++i) { + /* We only want to use SACK at index [i] if its right side is > 'seq'. */ + if (TCP_SEQ_GT(pcb->rcv_sacks[i].right, seq)) { + if (unused_idx != i) { + /* We only copy it if it's not in the right spot already. */ + pcb->rcv_sacks[unused_idx] = pcb->rcv_sacks[i]; + } + /* NOTE: It is possible that its left side is < 'seq', in which case we should adjust it. */ + if (TCP_SEQ_LT(pcb->rcv_sacks[unused_idx].left, seq)) { + pcb->rcv_sacks[unused_idx].left = seq; + } + ++unused_idx; + } + } + + /* We also need to invalidate everything from 'unused_idx' till the end */ + for (i = unused_idx; i < LWIP_TCP_MAX_SACK_NUM; ++i) { + pcb->rcv_sacks[i].left = pcb->rcv_sacks[i].right = 0; + } +} + +/** + * Called to remove a range of SACKs. + * + * SACK entries will be removed or adjusted to not acknowledge any sequence + * numbers that are greater than (or equal to) 'seq' passed. It not only invalidates entries, + * but also moves all entries that are still valid to the beginning. + * + * @param pcb the tcp_pcb to modify + * @param seq the highest sequence number to keep in SACK entries + */ +static void +tcp_remove_sacks_gt(struct tcp_pcb *pcb, u32_t seq) +{ + u8_t i; + u8_t unused_idx; + + /* We run this loop for all entries, until we find the first invalid one. + There is no point checking after that. */ + for (i = unused_idx = 0; (i < LWIP_TCP_MAX_SACK_NUM) && (pcb->rcv_sacks[i].left != pcb->rcv_sacks[i].right); ++i) { + /* We only want to use SACK at index [i] if its left side is < 'seq'. */ + if (TCP_SEQ_LT(pcb->rcv_sacks[i].left, seq)) { + if (unused_idx != i) { + /* We only copy it if it's not in the right spot already. */ + pcb->rcv_sacks[unused_idx] = pcb->rcv_sacks[i]; + } + /* NOTE: It is possible that its right side is > 'seq', in which case we should adjust it. */ + if (TCP_SEQ_GT(pcb->rcv_sacks[unused_idx].right, seq)) { + pcb->rcv_sacks[unused_idx].right = seq; + } + ++unused_idx; + } + } + + /* We also need to invalidate everything from 'unused_idx' till the end */ + for (i = unused_idx; i < LWIP_TCP_MAX_SACK_NUM; ++i) { + pcb->rcv_sacks[i].left = pcb->rcv_sacks[i].right = 0; + } +} + +#endif /* LWIP_TCP_SACK_OUT */ + #endif /* LWIP_TCP */ diff --git a/src/core/tcp_out.c b/src/core/tcp_out.c index b3a7ce7c..051cc108 100644 --- a/src/core/tcp_out.c +++ b/src/core/tcp_out.c @@ -833,6 +833,13 @@ tcp_enqueue_flags(struct tcp_pcb *pcb, u8_t flags) optflags |= TF_SEG_OPTS_WND_SCALE; } #endif /* LWIP_WND_SCALE */ +#if LWIP_TCP_SACK_OUT + if ((pcb->state != SYN_RCVD) || (pcb->flags & TF_SACK)) { + /* In a (sent in state SYN_RCVD), the SACK_PERM option may only + be sent if we received a SACK_PERM option from the remote host. */ + optflags |= TF_SEG_OPTS_SACK_PERM; + } +#endif /* LWIP_TCP_SACK_OUT */ } #if LWIP_TCP_TIMESTAMPS if ((pcb->flags & TF_TIMESTAMP) || ((flags & TCP_SYN) && (pcb->state != SYN_RCVD))) { @@ -916,6 +923,63 @@ tcp_build_timestamp_option(struct tcp_pcb *pcb, u32_t *opts) } #endif +#if LWIP_TCP_SACK_OUT +/** + * Calculates the number of SACK entries that should be generated. + * It takes into account whether TF_SACK flag is set, + * the number of SACK entries in tcp_pcb that are valid, + * as well as the available options size. + * + * @param pcb tcp_pcb + * @param optlen the length of other TCP options (in bytes) + * @return the number of SACK ranges that can be used + */ +static u8_t +tcp_get_num_sacks(struct tcp_pcb *pcb, u8_t optlen) +{ + u8_t num_sacks = 0; + + if (pcb->flags & TF_SACK) { + u8_t i; + + /* The first SACK takes up 12 bytes (it includes SACK header and two NOP options), + each additional one - 8 bytes. */ + optlen += 12; + + /* Max options size = 40, number of SACK array entries = LWIP_TCP_MAX_SACK_NUM */ + for (i = 0; (i < LWIP_TCP_MAX_SACK_NUM) && (optlen <= 40) && (pcb->rcv_sacks[i].left != pcb->rcv_sacks[i].right); ++i) { + ++num_sacks; + optlen += 8; + } + } + + return num_sacks; +} + +/** Build a SACK option (12 or more bytes long) at the specified options pointer) + * + * @param pcb tcp_pcb + * @param opts option pointer where to store the SACK option + * @param num_sacks the number of SACKs to store + */ +static void +tcp_build_sack_option(struct tcp_pcb *pcb, u32_t *opts, u8_t num_sacks) +{ + u8_t i; + + /* Pad with two NOP options to make everything nicely aligned. + We add the length (of just the SACK option, not the NOPs in front of it), + which is 2B of header, plus 8B for each SACK. */ + *(opts++) = PP_HTONL(0x01010500 + 2 + num_sacks * 8); + + for (i = 0; i < num_sacks; ++i) { + *(opts++) = lwip_htonl(pcb->rcv_sacks[i].left); + *(opts++) = lwip_htonl(pcb->rcv_sacks[i].right); + } +} + +#endif + #if LWIP_WND_SCALE /** Build a window scale option (3 bytes long) at the specified options pointer) * @@ -941,9 +1005,15 @@ tcp_send_empty_ack(struct tcp_pcb *pcb) struct pbuf *p; u8_t optlen = 0; struct netif *netif; -#if LWIP_TCP_TIMESTAMPS || CHECKSUM_GEN_TCP +#if CHECKSUM_GEN_TCP || LWIP_TCP_TIMESTAMPS || LWIP_TCP_SACK_OUT struct tcp_hdr *tcphdr; -#endif /* LWIP_TCP_TIMESTAMPS || CHECKSUM_GEN_TCP */ +#if LWIP_TCP_TIMESTAMPS || LWIP_TCP_SACK_OUT + u32_t *opts; +#if LWIP_TCP_SACK_OUT + u8_t num_sacks; +#endif /* LWIP_TCP_SACK_OUT */ +#endif /* LWIP_TCP_TIMESTAMPS || LWIP_TCP_SACK_OUT */ +#endif /* CHECKSUM_GEN_TCP || LWIP_TCP_TIMESTAMPS || LWIP_TCP_SACK_OUT */ #if LWIP_TCP_TIMESTAMPS if (pcb->flags & TF_TIMESTAMP) { @@ -951,6 +1021,12 @@ tcp_send_empty_ack(struct tcp_pcb *pcb) } #endif +#if LWIP_TCP_SACK_OUT + if ((num_sacks = tcp_get_num_sacks(pcb, optlen)) > 0) { + optlen += 4 + num_sacks * 8; /* 4 bytes for header (including 2*NOP), plus 8B for each SACK */ + } +#endif + p = tcp_output_alloc_header(pcb, optlen, 0, lwip_htonl(pcb->snd_nxt)); if (p == NULL) { /* let tcp_fasttmr retry sending this ACK */ @@ -958,9 +1034,15 @@ tcp_send_empty_ack(struct tcp_pcb *pcb) LWIP_DEBUGF(TCP_OUTPUT_DEBUG, ("tcp_output: (ACK) could not allocate pbuf\n")); return ERR_BUF; } -#if LWIP_TCP_TIMESTAMPS || CHECKSUM_GEN_TCP + +#if CHECKSUM_GEN_TCP || LWIP_TCP_TIMESTAMPS || LWIP_TCP_SACK_OUT tcphdr = (struct tcp_hdr *)p->payload; -#endif /* LWIP_TCP_TIMESTAMPS || CHECKSUM_GEN_TCP */ +#if LWIP_TCP_TIMESTAMPS || LWIP_TCP_SACK_OUT + /* cast through void* to get rid of alignment warnings */ + opts = (u32_t *)(void *)(tcphdr + 1); +#endif /* LWIP_TCP_TIMESTAMPS || LWIP_TCP_SACK_OUT */ +#endif /* CHECKSUM_GEN_TCP || LWIP_TCP_TIMESTAMPS || LWIP_TCP_SACK_OUT */ + LWIP_DEBUGF(TCP_OUTPUT_DEBUG, ("tcp_output: sending ACK for %"U32_F"\n", pcb->rcv_nxt)); @@ -969,7 +1051,16 @@ tcp_send_empty_ack(struct tcp_pcb *pcb) pcb->ts_lastacksent = pcb->rcv_nxt; if (pcb->flags & TF_TIMESTAMP) { - tcp_build_timestamp_option(pcb, (u32_t *)(tcphdr + 1)); + tcp_build_timestamp_option(pcb, opts); + opts += 3; + } +#endif + +#if LWIP_TCP_SACK_OUT + if (num_sacks > 0) { + tcp_build_sack_option(pcb, opts, num_sacks); + /* 1 word for SACKs header (including 2xNOP), and 2 words for each SACK */ + opts += 1 + num_sacks * 2; } #endif @@ -1272,6 +1363,16 @@ tcp_output_segment(struct tcp_seg *seg, struct tcp_pcb *pcb, struct netif *netif opts += 1; } #endif +#if LWIP_TCP_SACK_OUT + if (seg->flags & TF_SEG_OPTS_SACK_PERM) { + /* Pad with two NOP options to make everything nicely aligned + * NOTE: When we send both timestamp and SACK_PERM options, + * we could use the first two NOPs before the timestamp to store SACK_PERM option, + * but that would complicate the code. + */ + *(opts++) = PP_HTONL(0x01010402); + } +#endif /* Set retransmission timer running if it is not currently enabled This must be set before checking the route. */ diff --git a/src/include/lwip/opt.h b/src/include/lwip/opt.h index c87e794c..8faea95d 100644 --- a/src/include/lwip/opt.h +++ b/src/include/lwip/opt.h @@ -1211,6 +1211,27 @@ #define TCP_QUEUE_OOSEQ (LWIP_TCP) #endif +/** + * LWIP_TCP_SACK_OUT==1: TCP will support sending selective acknowledgements (SACKs). + */ +#if !defined LWIP_TCP_SACK_OUT || defined __DOXYGEN__ +#define LWIP_TCP_SACK_OUT 0 +#endif + +/** + * LWIP_TCP_MAX_SACK_NUM: The maximum number of SACK values to include in TCP packets. + * Must be at least 1, but is only used if LWIP_TCP_SACK_OUT is enabled. + * NOTE: Even though we never send more than 4 SACK ranges in a single packet + * (depending on other options), setting this option to values greater than 4 is not pointless. + * This is basically the max number of SACK ranges we want to keep track of. + * As new data is delivered, some of the SACK ranges may be removed or merged. + * In that case some of those older SACK ranges may be used again. + * The amount of memory used to store SACK ranges is LWIP_TCP_MAX_SACK_NUM * 8 bytes for each TCP PCB. + */ +#if !defined LWIP_TCP_MAX_SACK_NUM || defined __DOXYGEN__ +#define LWIP_TCP_MAX_SACK_NUM 4 +#endif + /** * TCP_MSS: TCP Maximum segment size. (default is 536, a conservative default, * you might want to increase this.) diff --git a/src/include/lwip/priv/tcp_priv.h b/src/include/lwip/priv/tcp_priv.h index bbf4305c..1670805c 100644 --- a/src/include/lwip/priv/tcp_priv.h +++ b/src/include/lwip/priv/tcp_priv.h @@ -264,6 +264,7 @@ struct tcp_seg { #define TF_SEG_DATA_CHECKSUMMED (u8_t)0x04U /* ALL data (not the header) is checksummed into 'chksum' */ #define TF_SEG_OPTS_WND_SCALE (u8_t)0x08U /* Include WND SCALE option */ +#define TF_SEG_OPTS_SACK_PERM (u8_t)0x10U /* Include SACK Permitted option */ struct tcp_hdr *tcphdr; /* the TCP header */ }; @@ -271,6 +272,7 @@ struct tcp_seg { #define LWIP_TCP_OPT_NOP 1 #define LWIP_TCP_OPT_MSS 2 #define LWIP_TCP_OPT_WS 3 +#define LWIP_TCP_OPT_SACK_PERM 4 #define LWIP_TCP_OPT_TS 8 #define LWIP_TCP_OPT_LEN_MSS 4 @@ -287,10 +289,18 @@ struct tcp_seg { #define LWIP_TCP_OPT_LEN_WS_OUT 0 #endif +#if LWIP_TCP_SACK_OUT +#define LWIP_TCP_OPT_LEN_SACK_PERM 2 +#define LWIP_TCP_OPT_LEN_SACK_PERM_OUT 4 /* aligned for output (includes NOP padding) */ +#else +#define LWIP_TCP_OPT_LEN_SACK_PERM_OUT 0 +#endif + #define LWIP_TCP_OPT_LENGTH(flags) \ - (flags & TF_SEG_OPTS_MSS ? LWIP_TCP_OPT_LEN_MSS : 0) + \ - (flags & TF_SEG_OPTS_TS ? LWIP_TCP_OPT_LEN_TS_OUT : 0) + \ - (flags & TF_SEG_OPTS_WND_SCALE ? LWIP_TCP_OPT_LEN_WS_OUT : 0) + (flags & TF_SEG_OPTS_MSS ? LWIP_TCP_OPT_LEN_MSS : 0) + \ + (flags & TF_SEG_OPTS_TS ? LWIP_TCP_OPT_LEN_TS_OUT : 0) + \ + (flags & TF_SEG_OPTS_WND_SCALE ? LWIP_TCP_OPT_LEN_WS_OUT : 0) + \ + (flags & TF_SEG_OPTS_SACK_PERM ? LWIP_TCP_OPT_LEN_SACK_PERM_OUT : 0) /** This returns a TCP header option for MSS in an u32_t */ #define TCP_BUILD_MSS_OPTION(mss) lwip_htonl(0x02040000 | ((mss) & 0xFFFF)) diff --git a/src/include/lwip/tcp.h b/src/include/lwip/tcp.h index 6b7a2b1e..ccc1ab7d 100644 --- a/src/include/lwip/tcp.h +++ b/src/include/lwip/tcp.h @@ -214,6 +214,9 @@ struct tcp_pcb { #define TF_TIMESTAMP 0x0400U /* Timestamp option enabled */ #endif #define TF_RTO 0x0800U /* RTO timer has fired, in-flight data moved to unsent and being retransmitted */ +#if LWIP_TCP_SACK_OUT +#define TF_SACK 0x1000U /* Selective ACKs enabled */ +#endif /* the rest of the fields are in host byte order as we have to do some math with them */ @@ -229,6 +232,19 @@ struct tcp_pcb { tcpwnd_size_t rcv_ann_wnd; /* receiver window to announce */ u32_t rcv_ann_right_edge; /* announced right edge of window */ +#ifdef LWIP_TCP_SACK_OUT + /* SACK ranges to include in ACK packets. + SACK entry is invalid if left=right. */ + struct + { + /* Left edge of the SACK: the first acknowledged sequence number. */ + u32_t left; + + /* Right edge of the SACK: the last acknowledged sequence number +1 (so first NOT acknowledged). */ + u32_t right; + } rcv_sacks[LWIP_TCP_MAX_SACK_NUM]; +#endif + /* Retransmission timer. */ s16_t rtime;