From 8ba7363d11088fe37ceeaae5652d745ea3795342 Mon Sep 17 00:00:00 2001 From: Ambroz Bizjak Date: Thu, 24 Nov 2016 11:27:34 +0100 Subject: [PATCH] Optimize passing contiguous nocopy buffers to tcp_write While TCP_OVERSIZE works only when tcp_write() is used with TCP_WRITE_FLAG_COPY, this new code achieves similar benefits for the use case that the caller manages their own send buffers and passes successive chunks of those to tcp_write() without TCP_WRITE_FLAG_COPY. In particular, if a buffer is passed to tcp_write() that is adjacent in memory to the previously passed buffer, it will be combined into the previous ROM pbuf reference whenever possible, thus extending that ROM pbuf rather than allocating a new ROM pbuf. For the aforementioned use case, the advantages of this code are twofold: 1) fewer ROM pbufs need to be allocated to send the same data, and, 2) the MAC layer gets outgoing TCP packets with shorter pbuf chains. Original patch by Ambroz Bizjak Edited by David van Moolenbroek Signed-off-by: goldsimon --- CHANGELOG | 3 +++ src/core/tcp_out.c | 64 +++++++++++++++++++++++++++++++++------------- 2 files changed, 49 insertions(+), 18 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 482ab89b..347ef150 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -6,6 +6,9 @@ HISTORY ++ New features: + 2016-11-24: Ambroz Bizjak, David van Moolenbroek + * tcp_out.c: Optimize passing contiguous nocopy buffers to tcp_write (bug #46290) + 2016-11-16: Dirk Ziegelmeier * sockets.c: added support for IPv6 mapped IPv4 addresses diff --git a/src/core/tcp_out.c b/src/core/tcp_out.c index 28d17441..8ccdffb9 100644 --- a/src/core/tcp_out.c +++ b/src/core/tcp_out.c @@ -377,6 +377,7 @@ tcp_write(struct tcp_pcb *pcb, const void *arg, u16_t len, u8_t apiflags) u16_t oversize = 0; u16_t oversize_used = 0; #endif /* TCP_OVERSIZE */ + u16_t extendlen = 0; #if TCP_CHECKSUM_ON_COPY u16_t concat_chksum = 0; u8_t concat_chksum_swapped = 0; @@ -480,6 +481,10 @@ tcp_write(struct tcp_pcb *pcb, const void *arg, u16_t len, u8_t apiflags) /* * Phase 2: Chain a new pbuf to the end of pcb->unsent. * + * As an exception when NOT copying the data, if the given data buffer + * directly follows the last unsent data buffer in memory, extend the last + * ROM pbuf reference to the buffer, thus saving a ROM pbuf allocation. + * * We don't extend segments containing SYN/FIN flags or options * (len==0). The new pbuf is kept in concat_p and pbuf_cat'ed at * the end. @@ -506,12 +511,24 @@ tcp_write(struct tcp_pcb *pcb, const void *arg, u16_t len, u8_t apiflags) #if TCP_CHECKSUM_ON_COPY concat_chksummed += seglen; #endif /* TCP_CHECKSUM_ON_COPY */ + queuelen += pbuf_clen(concat_p); } else { /* Data is not copied */ - if ((concat_p = pbuf_alloc(PBUF_RAW, seglen, PBUF_ROM)) == NULL) { - LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_LEVEL_SERIOUS, - ("tcp_write: could not allocate memory for zero-copy pbuf\n")); - goto memerr; + /* If the last unsent pbuf is of type PBUF_ROM, try to extend it. */ + struct pbuf *p; + for (p = last_unsent->p; p->next != NULL; p = p->next); + if (p->type == PBUF_ROM && (const u8_t *)p->payload + p->len == (const u8_t *)arg) { + LWIP_ASSERT("tcp_write: ROM pbufs cannot be oversized", pos == 0); + extendlen = seglen; + } else { + if ((concat_p = pbuf_alloc(PBUF_RAW, seglen, PBUF_ROM)) == NULL) { + LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_LEVEL_SERIOUS, + ("tcp_write: could not allocate memory for zero-copy pbuf\n")); + goto memerr; + } + /* reference the non-volatile payload data */ + ((struct pbuf_rom*)concat_p)->payload = (const u8_t*)arg + pos; + queuelen += pbuf_clen(concat_p); } #if TCP_CHECKSUM_ON_COPY /* calculate the checksum of nocopy-data */ @@ -519,12 +536,9 @@ tcp_write(struct tcp_pcb *pcb, const void *arg, u16_t len, u8_t apiflags) &concat_chksum, &concat_chksum_swapped); concat_chksummed += seglen; #endif /* TCP_CHECKSUM_ON_COPY */ - /* reference the non-volatile payload data */ - ((struct pbuf_rom*)concat_p)->payload = (const u8_t*)arg + pos; } pos += seglen; - queuelen += pbuf_clen(concat_p); } } else { #if TCP_OVERSIZE @@ -669,26 +683,40 @@ tcp_write(struct tcp_pcb *pcb, const void *arg, u16_t len, u8_t apiflags) #endif /* TCP_OVERSIZE */ /* - * Phase 2: concat_p can be concatenated onto last_unsent->p + * Phase 2: concat_p can be concatenated onto last_unsent->p, unless we + * determined that the last ROM pbuf can be extended to include the new data. */ if (concat_p != NULL) { LWIP_ASSERT("tcp_write: cannot concatenate when pcb->unsent is empty", (last_unsent != NULL)); pbuf_cat(last_unsent->p, concat_p); last_unsent->len += concat_p->tot_len; -#if TCP_CHECKSUM_ON_COPY - if (concat_chksummed) { - /*if concat checksumm swapped - swap it back */ - if (concat_chksum_swapped) { - concat_chksum = SWAP_BYTES_IN_WORD(concat_chksum); - } - tcp_seg_add_chksum(concat_chksum, concat_chksummed, &last_unsent->chksum, - &last_unsent->chksum_swapped); - last_unsent->flags |= TF_SEG_DATA_CHECKSUMMED; + } else if (extendlen > 0) { + struct pbuf *p; + LWIP_ASSERT("tcp_write: extension of reference requires reference", + last_unsent != NULL && last_unsent->p != NULL); + for (p = last_unsent->p; p->next != NULL; p = p->next) { + p->tot_len += extendlen; } -#endif /* TCP_CHECKSUM_ON_COPY */ + p->tot_len += extendlen; + p->len += extendlen; + last_unsent->len += extendlen; } +#if TCP_CHECKSUM_ON_COPY + if (concat_chksummed) { + LWIP_ASSERT("tcp_write: concat checksum needs concatenated data", + concat_p != NULL || extendlen > 0); + /*if concat checksumm swapped - swap it back */ + if (concat_chksum_swapped) { + concat_chksum = SWAP_BYTES_IN_WORD(concat_chksum); + } + tcp_seg_add_chksum(concat_chksum, concat_chksummed, &last_unsent->chksum, + &last_unsent->chksum_swapped); + last_unsent->flags |= TF_SEG_DATA_CHECKSUMMED; + } +#endif /* TCP_CHECKSUM_ON_COPY */ + /* * Phase 3: Append queue to pcb->unsent. Queue may be NULL, but that * is harmless