diff --git a/src/core/init.c b/src/core/init.c index a361cb20..31bb6b74 100644 --- a/src/core/init.c +++ b/src/core/init.c @@ -164,6 +164,12 @@ PACK_STRUCT_END #if (LWIP_TCP && TCP_LISTEN_BACKLOG && ((TCP_DEFAULT_LISTEN_BACKLOG < 0) || (TCP_DEFAULT_LISTEN_BACKLOG > 0xff))) #error "If you want to use TCP backlog, TCP_DEFAULT_LISTEN_BACKLOG must fit into an u8_t" #endif +#if (LWIP_TCP && LWIP_TCP_SACK_OUT && !TCP_QUEUE_OOSEQ) +#error "To use LWIP_TCP_SACK_OUT, TCP_QUEUE_OOSEQ needs to be enabled" +#endif +#if (LWIP_TCP && LWIP_TCP_SACK_OUT && (LWIP_TCP_MAX_SACK_NUM < 1)) +#error "LWIP_TCP_MAX_SACK_NUM must be greater than 0" +#endif #if (LWIP_NETIF_API && (NO_SYS==1)) #error "If you want to use NETIF API, you have to define NO_SYS=0 in your lwipopts.h" #endif diff --git a/src/core/tcp.c b/src/core/tcp.c index 88a2ea5b..a3b02a3a 100644 --- a/src/core/tcp.c +++ b/src/core/tcp.c @@ -1179,6 +1179,9 @@ tcp_slowtmr_start: tcp_segs_free(pcb->ooseq); pcb->ooseq = NULL; LWIP_DEBUGF(TCP_CWND_DEBUG, ("tcp_slowtmr: dropping OOSEQ queued data\n")); +#if LWIP_TCP_SACK_OUT + memset(pcb->rcv_sacks, 0, sizeof(pcb->rcv_sacks)); +#endif } #endif /* TCP_QUEUE_OOSEQ */ @@ -1888,6 +1891,9 @@ tcp_pcb_purge(struct tcp_pcb *pcb) } tcp_segs_free(pcb->ooseq); pcb->ooseq = NULL; +#if LWIP_TCP_SACK_OUT + memset(pcb->rcv_sacks, 0, sizeof(pcb->rcv_sacks)); +#endif #endif /* TCP_QUEUE_OOSEQ */ /* Stop the retransmission timer as it will expect data on unacked diff --git a/src/core/tcp_in.c b/src/core/tcp_in.c index e3234d39..11eb11a1 100644 --- a/src/core/tcp_in.c +++ b/src/core/tcp_in.c @@ -89,6 +89,12 @@ static void tcp_parseopt(struct tcp_pcb *pcb); static void tcp_listen_input(struct tcp_pcb_listen *pcb); static void tcp_timewait_input(struct tcp_pcb *pcb); +#if LWIP_TCP_SACK_OUT +static void tcp_add_sack(struct tcp_pcb *pcb, u32_t left, u32_t right); +static void tcp_remove_sacks_lt(struct tcp_pcb *pcb, u32_t seq); +static void tcp_remove_sacks_gt(struct tcp_pcb *pcb, u32_t seq); +#endif + /** * The initial input processing of TCP. It verifies the TCP header, demultiplexes * the segment between the PCBs and passes it on to tcp_process(), which implements @@ -1485,6 +1491,19 @@ tcp_receive(struct tcp_pcb *pcb) } pcb->ooseq = next; } + +#if LWIP_TCP_SACK_OUT + if (pcb->flags & TF_SACK) { + if (pcb->ooseq != NULL) { + /* Some segments may have been removed from ooseq, let's remove all SACKs that + describe anything before the new beginning of that list. */ + tcp_remove_sacks_lt(pcb, pcb->ooseq->tcphdr->seqno); + } else { + /* ooseq has been cleared. Nothing to SACK */ + memset(pcb->rcv_sacks, 0, sizeof(pcb->rcv_sacks)); + } + } +#endif } #endif /* TCP_QUEUE_OOSEQ */ @@ -1563,6 +1582,16 @@ tcp_receive(struct tcp_pcb *pcb) /* Acknowledge the segment(s). */ tcp_ack(pcb); +#if LWIP_TCP_SACK_OUT + if (pcb->rcv_sacks[0].left != pcb->rcv_sacks[0].right) { + /* Normally the ACK for the data received could be piggy-backed on a data packet, + but lwIP currently does not support including SACKs in data packets. So we force + it to respond with an empty ACK packet (only if there is at least one SACK to be sent). + NOTE: tcp_send_empty_ack() on success clears the ACK flags (set by tcp_ack()) */ + tcp_send_empty_ack(pcb); + } +#endif + #if LWIP_IPV6 && LWIP_ND6_TCP_REACHABILITY_HINTS if (ip_current_is_v6()) { /* Inform neighbor reachability of forward progress. */ @@ -1572,11 +1601,18 @@ tcp_receive(struct tcp_pcb *pcb) } else { /* We get here if the incoming segment is out-of-sequence. */ - tcp_send_empty_ack(pcb); + #if TCP_QUEUE_OOSEQ /* We queue the segment on the ->ooseq queue. */ if (pcb->ooseq == NULL) { pcb->ooseq = tcp_seg_copy(&inseg); +#if LWIP_TCP_SACK_OUT + if (pcb->flags & TF_SACK) { + /* All the SACKs should be invalid, so we can simply store the most recent one: */ + pcb->rcv_sacks[0].left = seqno; + pcb->rcv_sacks[0].right = seqno + inseg.len; + } +#endif } else { /* If the queue is not empty, we walk through the queue and try to find a place where the sequence number of the @@ -1590,6 +1626,11 @@ tcp_receive(struct tcp_pcb *pcb) segment on the ->ooseq queue, we discard the segment that contains less data. */ +#if LWIP_TCP_SACK_OUT + /* This is the left edge of the lowest possible SACK range. + It may start before the newly received segment. */ + u32_t sackbeg = TCP_SEQ_LT(seqno, pcb->ooseq->tcphdr->seqno) ? seqno : pcb->ooseq->tcphdr->seqno; +#endif prev = NULL; for (next = pcb->ooseq; next != NULL; next = next->next) { if (seqno == next->tcphdr->seqno) { @@ -1653,6 +1694,20 @@ tcp_receive(struct tcp_pcb *pcb) break; } } + +#if LWIP_TCP_SACK_OUT + /* The new segment goes after the 'next' one. If there is a "hole" in sequence numbers + between 'prev' and the beginning of 'next', we want to move sackbeg. */ + if (prev != NULL && prev->tcphdr->seqno + prev->len != next->tcphdr->seqno) { + sackbeg = next->tcphdr->seqno; + } +#endif + + /* We don't use 'prev' below, so let's set it to current 'next'. + This way even if we break the loop below, 'prev' will be pointing + at the segment right in front of the newly added one. */ + prev = next; + /* If the "next" segment is the last segment on the ooseq queue, we add the incoming segment to the end of the list. */ @@ -1691,8 +1746,33 @@ tcp_receive(struct tcp_pcb *pcb) break; } } - prev = next; } + +#if LWIP_TCP_SACK_OUT + if (pcb->flags & TF_SACK) { + if (prev == NULL) { + /* The new segment is at the beginning. sackbeg should already be set properly. + We need to find the right edge. */ + next = pcb->ooseq; + } else if (prev->next != NULL) { + /* The new segment was added after 'prev'. If there is a "hole" between 'prev' and 'prev->next', + we need to move sackbeg. After that we should find the right edge. */ + next = prev->next; + if (prev->tcphdr->seqno + prev->len != next->tcphdr->seqno) { + sackbeg = next->tcphdr->seqno; + } + } else { + next = NULL; + } + if (next != NULL) { + u32_t sackend = next->tcphdr->seqno; + for ( ; (next != NULL) && (sackend == next->tcphdr->seqno); next = next->next) { + sackend += next->len; + } + tcp_add_sack(pcb, sackbeg, sackend); + } + } +#endif /* LWIP_TCP_SACK_OUT */ } #if TCP_OOSEQ_MAX_BYTES || TCP_OOSEQ_MAX_PBUFS /* Check that the data on ooseq doesn't exceed one of the limits @@ -1706,6 +1786,12 @@ tcp_receive(struct tcp_pcb *pcb) ooseq_qlen += pbuf_clen(p); if ((ooseq_blen > TCP_OOSEQ_MAX_BYTES) || (ooseq_qlen > TCP_OOSEQ_MAX_PBUFS)) { +#if LWIP_TCP_SACK_OUT + if (pcb->flags & TF_SACK) { + /* Let's remove all SACKs from next's seqno up. */ + tcp_remove_sacks_gt(pcb, next->tcphdr->seqno); + } +#endif /* too much ooseq data, dump this and everything after it */ tcp_segs_free(next); if (prev == NULL) { @@ -1720,6 +1806,10 @@ tcp_receive(struct tcp_pcb *pcb) } #endif /* TCP_OOSEQ_MAX_BYTES || TCP_OOSEQ_MAX_PBUFS */ #endif /* TCP_QUEUE_OOSEQ */ + + /* We send the ACK packet after we've (potentially) dealt with SACKs, + so they can be included in the acknowledgment. */ + tcp_send_empty_ack(pcb); } } else { /* The incoming segment is not within the window. */ @@ -1839,6 +1929,21 @@ tcp_parseopt(struct tcp_pcb *pcb) /* Advance to next option (6 bytes already read) */ tcp_optidx += LWIP_TCP_OPT_LEN_TS - 6; break; +#endif +#if LWIP_TCP_SACK_OUT + case LWIP_TCP_OPT_SACK_PERM: + LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_parseopt: SACK_PERM\n")); + if (tcp_getoptbyte() != LWIP_TCP_OPT_LEN_SACK_PERM || (tcp_optidx - 2 + LWIP_TCP_OPT_LEN_SACK_PERM) > tcphdr_optlen) { + /* Bad length */ + LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_parseopt: bad length\n")); + return; + } + /* TCP SACK_PERM option with valid length */ + if (flags & TCP_SYN) { + /* We only set it if we receive it in a SYN (or SYN+ACK) packet */ + pcb->flags |= TF_SACK; + } + break; #endif default: LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_parseopt: other\n")); @@ -1863,4 +1968,146 @@ tcp_trigger_input_pcb_close(void) recv_flags |= TF_CLOSED; } +#if LWIP_TCP_SACK_OUT +/** + * Called by tcp_receive() to add new SACK entry. + * + * The new SACK entry will be placed at the beginning of rcv_sacks[], as the newest one. + * Existing SACK entries will be "pushed back", to preserve their order. + * This is the behavior described in RFC 2018, section 4. + * + * @param pcb the tcp_pcb for which a segment arrived + * @param left the left side of the SACK (the first sequence number) + * @param right the right side of the SACK (the first sequence number past this SACK) + */ +static void +tcp_add_sack(struct tcp_pcb *pcb, u32_t left, u32_t right) +{ + u8_t i; + u8_t unused_idx; + + if ((pcb->flags & TF_SACK) == 0 || !TCP_SEQ_LT(left, right)) { + return; + } + + /* First, let's remove all SACKs that are no longer needed (because they overlap with the newest one), + while moving all other SACKs forward. + We run this loop for all entries, until we find the first invalid one. + There is no point checking after that. */ + for (i = unused_idx = 0; (i < LWIP_TCP_MAX_SACK_NUM) && (pcb->rcv_sacks[i].left != pcb->rcv_sacks[i].right); ++i) { + /* We only want to use SACK at [i] if it doesn't overlap with left:right range. + It does not overlap if its right side is before the newly added SACK, + or if its left side is after the newly added SACK. + NOTE: The equality should not really happen, but it doesn't hurt. */ + if (TCP_SEQ_LEQ(pcb->rcv_sacks[i].right, left) || TCP_SEQ_LEQ(right, pcb->rcv_sacks[i].left)) { + if (unused_idx != i) { + /* We don't need to copy if it's already in the right spot */ + pcb->rcv_sacks[unused_idx] = pcb->rcv_sacks[i]; + } + ++unused_idx; + } + } + + /* Now 'unused_idx' is the index of the first invalid SACK entry, + anywhere between 0 (no valid entries) and LWIP_TCP_MAX_SACK_NUM (all entries are valid). + We want to clear this and all following SACKs. + However, we will be adding another one in the front (and shifting everything else back). + So let's just iterate from the back, and set each entry to the one to the left if it's valid, + or to 0 if it is not. */ + for (i = LWIP_TCP_MAX_SACK_NUM - 1; i > 0; --i) { + /* [i] is the index we are setting, and the value should be at index [i-1], + or 0 if that index is unused (>= unused_idx). */ + if (i-1 >= unused_idx) { + /* [i-1] is unused. Let's clear [i]. */ + pcb->rcv_sacks[i].left = pcb->rcv_sacks[i].right = 0; + } else { + pcb->rcv_sacks[i] = pcb->rcv_sacks[i-1]; + } + } + + /* And now we can store the newest SACK */ + pcb->rcv_sacks[0].left = left; + pcb->rcv_sacks[0].right = right; +} + +/** + * Called to remove a range of SACKs. + * + * SACK entries will be removed or adjusted to not acknowledge any sequence + * numbers that are less than 'seq' passed. It not only invalidates entries, + * but also moves all entries that are still valid to the beginning. + * + * @param pcb the tcp_pcb to modify + * @param seq the lowest sequence number to keep in SACK entries + */ +static void +tcp_remove_sacks_lt(struct tcp_pcb *pcb, u32_t seq) +{ + u8_t i; + u8_t unused_idx; + + /* We run this loop for all entries, until we find the first invalid one. + There is no point checking after that. */ + for (i = unused_idx = 0; (i < LWIP_TCP_MAX_SACK_NUM) && (pcb->rcv_sacks[i].left != pcb->rcv_sacks[i].right); ++i) { + /* We only want to use SACK at index [i] if its right side is > 'seq'. */ + if (TCP_SEQ_GT(pcb->rcv_sacks[i].right, seq)) { + if (unused_idx != i) { + /* We only copy it if it's not in the right spot already. */ + pcb->rcv_sacks[unused_idx] = pcb->rcv_sacks[i]; + } + /* NOTE: It is possible that its left side is < 'seq', in which case we should adjust it. */ + if (TCP_SEQ_LT(pcb->rcv_sacks[unused_idx].left, seq)) { + pcb->rcv_sacks[unused_idx].left = seq; + } + ++unused_idx; + } + } + + /* We also need to invalidate everything from 'unused_idx' till the end */ + for (i = unused_idx; i < LWIP_TCP_MAX_SACK_NUM; ++i) { + pcb->rcv_sacks[i].left = pcb->rcv_sacks[i].right = 0; + } +} + +/** + * Called to remove a range of SACKs. + * + * SACK entries will be removed or adjusted to not acknowledge any sequence + * numbers that are greater than (or equal to) 'seq' passed. It not only invalidates entries, + * but also moves all entries that are still valid to the beginning. + * + * @param pcb the tcp_pcb to modify + * @param seq the highest sequence number to keep in SACK entries + */ +static void +tcp_remove_sacks_gt(struct tcp_pcb *pcb, u32_t seq) +{ + u8_t i; + u8_t unused_idx; + + /* We run this loop for all entries, until we find the first invalid one. + There is no point checking after that. */ + for (i = unused_idx = 0; (i < LWIP_TCP_MAX_SACK_NUM) && (pcb->rcv_sacks[i].left != pcb->rcv_sacks[i].right); ++i) { + /* We only want to use SACK at index [i] if its left side is < 'seq'. */ + if (TCP_SEQ_LT(pcb->rcv_sacks[i].left, seq)) { + if (unused_idx != i) { + /* We only copy it if it's not in the right spot already. */ + pcb->rcv_sacks[unused_idx] = pcb->rcv_sacks[i]; + } + /* NOTE: It is possible that its right side is > 'seq', in which case we should adjust it. */ + if (TCP_SEQ_GT(pcb->rcv_sacks[unused_idx].right, seq)) { + pcb->rcv_sacks[unused_idx].right = seq; + } + ++unused_idx; + } + } + + /* We also need to invalidate everything from 'unused_idx' till the end */ + for (i = unused_idx; i < LWIP_TCP_MAX_SACK_NUM; ++i) { + pcb->rcv_sacks[i].left = pcb->rcv_sacks[i].right = 0; + } +} + +#endif /* LWIP_TCP_SACK_OUT */ + #endif /* LWIP_TCP */ diff --git a/src/core/tcp_out.c b/src/core/tcp_out.c index b3a7ce7c..051cc108 100644 --- a/src/core/tcp_out.c +++ b/src/core/tcp_out.c @@ -833,6 +833,13 @@ tcp_enqueue_flags(struct tcp_pcb *pcb, u8_t flags) optflags |= TF_SEG_OPTS_WND_SCALE; } #endif /* LWIP_WND_SCALE */ +#if LWIP_TCP_SACK_OUT + if ((pcb->state != SYN_RCVD) || (pcb->flags & TF_SACK)) { + /* In a (sent in state SYN_RCVD), the SACK_PERM option may only + be sent if we received a SACK_PERM option from the remote host. */ + optflags |= TF_SEG_OPTS_SACK_PERM; + } +#endif /* LWIP_TCP_SACK_OUT */ } #if LWIP_TCP_TIMESTAMPS if ((pcb->flags & TF_TIMESTAMP) || ((flags & TCP_SYN) && (pcb->state != SYN_RCVD))) { @@ -916,6 +923,63 @@ tcp_build_timestamp_option(struct tcp_pcb *pcb, u32_t *opts) } #endif +#if LWIP_TCP_SACK_OUT +/** + * Calculates the number of SACK entries that should be generated. + * It takes into account whether TF_SACK flag is set, + * the number of SACK entries in tcp_pcb that are valid, + * as well as the available options size. + * + * @param pcb tcp_pcb + * @param optlen the length of other TCP options (in bytes) + * @return the number of SACK ranges that can be used + */ +static u8_t +tcp_get_num_sacks(struct tcp_pcb *pcb, u8_t optlen) +{ + u8_t num_sacks = 0; + + if (pcb->flags & TF_SACK) { + u8_t i; + + /* The first SACK takes up 12 bytes (it includes SACK header and two NOP options), + each additional one - 8 bytes. */ + optlen += 12; + + /* Max options size = 40, number of SACK array entries = LWIP_TCP_MAX_SACK_NUM */ + for (i = 0; (i < LWIP_TCP_MAX_SACK_NUM) && (optlen <= 40) && (pcb->rcv_sacks[i].left != pcb->rcv_sacks[i].right); ++i) { + ++num_sacks; + optlen += 8; + } + } + + return num_sacks; +} + +/** Build a SACK option (12 or more bytes long) at the specified options pointer) + * + * @param pcb tcp_pcb + * @param opts option pointer where to store the SACK option + * @param num_sacks the number of SACKs to store + */ +static void +tcp_build_sack_option(struct tcp_pcb *pcb, u32_t *opts, u8_t num_sacks) +{ + u8_t i; + + /* Pad with two NOP options to make everything nicely aligned. + We add the length (of just the SACK option, not the NOPs in front of it), + which is 2B of header, plus 8B for each SACK. */ + *(opts++) = PP_HTONL(0x01010500 + 2 + num_sacks * 8); + + for (i = 0; i < num_sacks; ++i) { + *(opts++) = lwip_htonl(pcb->rcv_sacks[i].left); + *(opts++) = lwip_htonl(pcb->rcv_sacks[i].right); + } +} + +#endif + #if LWIP_WND_SCALE /** Build a window scale option (3 bytes long) at the specified options pointer) * @@ -941,9 +1005,15 @@ tcp_send_empty_ack(struct tcp_pcb *pcb) struct pbuf *p; u8_t optlen = 0; struct netif *netif; -#if LWIP_TCP_TIMESTAMPS || CHECKSUM_GEN_TCP +#if CHECKSUM_GEN_TCP || LWIP_TCP_TIMESTAMPS || LWIP_TCP_SACK_OUT struct tcp_hdr *tcphdr; -#endif /* LWIP_TCP_TIMESTAMPS || CHECKSUM_GEN_TCP */ +#if LWIP_TCP_TIMESTAMPS || LWIP_TCP_SACK_OUT + u32_t *opts; +#if LWIP_TCP_SACK_OUT + u8_t num_sacks; +#endif /* LWIP_TCP_SACK_OUT */ +#endif /* LWIP_TCP_TIMESTAMPS || LWIP_TCP_SACK_OUT */ +#endif /* CHECKSUM_GEN_TCP || LWIP_TCP_TIMESTAMPS || LWIP_TCP_SACK_OUT */ #if LWIP_TCP_TIMESTAMPS if (pcb->flags & TF_TIMESTAMP) { @@ -951,6 +1021,12 @@ tcp_send_empty_ack(struct tcp_pcb *pcb) } #endif +#if LWIP_TCP_SACK_OUT + if ((num_sacks = tcp_get_num_sacks(pcb, optlen)) > 0) { + optlen += 4 + num_sacks * 8; /* 4 bytes for header (including 2*NOP), plus 8B for each SACK */ + } +#endif + p = tcp_output_alloc_header(pcb, optlen, 0, lwip_htonl(pcb->snd_nxt)); if (p == NULL) { /* let tcp_fasttmr retry sending this ACK */ @@ -958,9 +1034,15 @@ tcp_send_empty_ack(struct tcp_pcb *pcb) LWIP_DEBUGF(TCP_OUTPUT_DEBUG, ("tcp_output: (ACK) could not allocate pbuf\n")); return ERR_BUF; } -#if LWIP_TCP_TIMESTAMPS || CHECKSUM_GEN_TCP + +#if CHECKSUM_GEN_TCP || LWIP_TCP_TIMESTAMPS || LWIP_TCP_SACK_OUT tcphdr = (struct tcp_hdr *)p->payload; -#endif /* LWIP_TCP_TIMESTAMPS || CHECKSUM_GEN_TCP */ +#if LWIP_TCP_TIMESTAMPS || LWIP_TCP_SACK_OUT + /* cast through void* to get rid of alignment warnings */ + opts = (u32_t *)(void *)(tcphdr + 1); +#endif /* LWIP_TCP_TIMESTAMPS || LWIP_TCP_SACK_OUT */ +#endif /* CHECKSUM_GEN_TCP || LWIP_TCP_TIMESTAMPS || LWIP_TCP_SACK_OUT */ + LWIP_DEBUGF(TCP_OUTPUT_DEBUG, ("tcp_output: sending ACK for %"U32_F"\n", pcb->rcv_nxt)); @@ -969,7 +1051,16 @@ tcp_send_empty_ack(struct tcp_pcb *pcb) pcb->ts_lastacksent = pcb->rcv_nxt; if (pcb->flags & TF_TIMESTAMP) { - tcp_build_timestamp_option(pcb, (u32_t *)(tcphdr + 1)); + tcp_build_timestamp_option(pcb, opts); + opts += 3; + } +#endif + +#if LWIP_TCP_SACK_OUT + if (num_sacks > 0) { + tcp_build_sack_option(pcb, opts, num_sacks); + /* 1 word for SACKs header (including 2xNOP), and 2 words for each SACK */ + opts += 1 + num_sacks * 2; } #endif @@ -1272,6 +1363,16 @@ tcp_output_segment(struct tcp_seg *seg, struct tcp_pcb *pcb, struct netif *netif opts += 1; } #endif +#if LWIP_TCP_SACK_OUT + if (seg->flags & TF_SEG_OPTS_SACK_PERM) { + /* Pad with two NOP options to make everything nicely aligned + * NOTE: When we send both timestamp and SACK_PERM options, + * we could use the first two NOPs before the timestamp to store SACK_PERM option, + * but that would complicate the code. + */ + *(opts++) = PP_HTONL(0x01010402); + } +#endif /* Set retransmission timer running if it is not currently enabled This must be set before checking the route. */ diff --git a/src/include/lwip/opt.h b/src/include/lwip/opt.h index c87e794c..8faea95d 100644 --- a/src/include/lwip/opt.h +++ b/src/include/lwip/opt.h @@ -1211,6 +1211,27 @@ #define TCP_QUEUE_OOSEQ (LWIP_TCP) #endif +/** + * LWIP_TCP_SACK_OUT==1: TCP will support sending selective acknowledgements (SACKs). + */ +#if !defined LWIP_TCP_SACK_OUT || defined __DOXYGEN__ +#define LWIP_TCP_SACK_OUT 0 +#endif + +/** + * LWIP_TCP_MAX_SACK_NUM: The maximum number of SACK values to include in TCP packets. + * Must be at least 1, but is only used if LWIP_TCP_SACK_OUT is enabled. + * NOTE: Even though we never send more than 4 SACK ranges in a single packet + * (depending on other options), setting this option to values greater than 4 is not pointless. + * This is basically the max number of SACK ranges we want to keep track of. + * As new data is delivered, some of the SACK ranges may be removed or merged. + * In that case some of those older SACK ranges may be used again. + * The amount of memory used to store SACK ranges is LWIP_TCP_MAX_SACK_NUM * 8 bytes for each TCP PCB. + */ +#if !defined LWIP_TCP_MAX_SACK_NUM || defined __DOXYGEN__ +#define LWIP_TCP_MAX_SACK_NUM 4 +#endif + /** * TCP_MSS: TCP Maximum segment size. (default is 536, a conservative default, * you might want to increase this.) diff --git a/src/include/lwip/priv/tcp_priv.h b/src/include/lwip/priv/tcp_priv.h index bbf4305c..1670805c 100644 --- a/src/include/lwip/priv/tcp_priv.h +++ b/src/include/lwip/priv/tcp_priv.h @@ -264,6 +264,7 @@ struct tcp_seg { #define TF_SEG_DATA_CHECKSUMMED (u8_t)0x04U /* ALL data (not the header) is checksummed into 'chksum' */ #define TF_SEG_OPTS_WND_SCALE (u8_t)0x08U /* Include WND SCALE option */ +#define TF_SEG_OPTS_SACK_PERM (u8_t)0x10U /* Include SACK Permitted option */ struct tcp_hdr *tcphdr; /* the TCP header */ }; @@ -271,6 +272,7 @@ struct tcp_seg { #define LWIP_TCP_OPT_NOP 1 #define LWIP_TCP_OPT_MSS 2 #define LWIP_TCP_OPT_WS 3 +#define LWIP_TCP_OPT_SACK_PERM 4 #define LWIP_TCP_OPT_TS 8 #define LWIP_TCP_OPT_LEN_MSS 4 @@ -287,10 +289,18 @@ struct tcp_seg { #define LWIP_TCP_OPT_LEN_WS_OUT 0 #endif +#if LWIP_TCP_SACK_OUT +#define LWIP_TCP_OPT_LEN_SACK_PERM 2 +#define LWIP_TCP_OPT_LEN_SACK_PERM_OUT 4 /* aligned for output (includes NOP padding) */ +#else +#define LWIP_TCP_OPT_LEN_SACK_PERM_OUT 0 +#endif + #define LWIP_TCP_OPT_LENGTH(flags) \ - (flags & TF_SEG_OPTS_MSS ? LWIP_TCP_OPT_LEN_MSS : 0) + \ - (flags & TF_SEG_OPTS_TS ? LWIP_TCP_OPT_LEN_TS_OUT : 0) + \ - (flags & TF_SEG_OPTS_WND_SCALE ? LWIP_TCP_OPT_LEN_WS_OUT : 0) + (flags & TF_SEG_OPTS_MSS ? LWIP_TCP_OPT_LEN_MSS : 0) + \ + (flags & TF_SEG_OPTS_TS ? LWIP_TCP_OPT_LEN_TS_OUT : 0) + \ + (flags & TF_SEG_OPTS_WND_SCALE ? LWIP_TCP_OPT_LEN_WS_OUT : 0) + \ + (flags & TF_SEG_OPTS_SACK_PERM ? LWIP_TCP_OPT_LEN_SACK_PERM_OUT : 0) /** This returns a TCP header option for MSS in an u32_t */ #define TCP_BUILD_MSS_OPTION(mss) lwip_htonl(0x02040000 | ((mss) & 0xFFFF)) diff --git a/src/include/lwip/tcp.h b/src/include/lwip/tcp.h index 6b7a2b1e..ccc1ab7d 100644 --- a/src/include/lwip/tcp.h +++ b/src/include/lwip/tcp.h @@ -214,6 +214,9 @@ struct tcp_pcb { #define TF_TIMESTAMP 0x0400U /* Timestamp option enabled */ #endif #define TF_RTO 0x0800U /* RTO timer has fired, in-flight data moved to unsent and being retransmitted */ +#if LWIP_TCP_SACK_OUT +#define TF_SACK 0x1000U /* Selective ACKs enabled */ +#endif /* the rest of the fields are in host byte order as we have to do some math with them */ @@ -229,6 +232,19 @@ struct tcp_pcb { tcpwnd_size_t rcv_ann_wnd; /* receiver window to announce */ u32_t rcv_ann_right_edge; /* announced right edge of window */ +#ifdef LWIP_TCP_SACK_OUT + /* SACK ranges to include in ACK packets. + SACK entry is invalid if left=right. */ + struct + { + /* Left edge of the SACK: the first acknowledged sequence number. */ + u32_t left; + + /* Right edge of the SACK: the last acknowledged sequence number +1 (so first NOT acknowledged). */ + u32_t right; + } rcv_sacks[LWIP_TCP_MAX_SACK_NUM]; +#endif + /* Retransmission timer. */ s16_t rtime;