diff --git a/CHANGELOG b/CHANGELOG index 1ae3fae9..aadb2309 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -19,7 +19,7 @@ HISTORY ++ New features: - 2008-06-12 Simon Goldschmidt (patch by Luca Ceresoli) + 2008-06-17 Simon Goldschmidt (patch by Luca Ceresoli) * netif.c, loopif.c, ip.c, netif.h, loopif.h, opt.h: Checked in slightly modified version of patch # 6370: Moved loopif code to netif.c so that loopback traffic is supported on all netifs (all local IPs). @@ -27,6 +27,10 @@ HISTORY ++ Bugfixes: + 2008-06-17 Simon Goldschmidt + * inet_chksum.c: Checked in some ideas of patch #6460 (loop optimizations) + and created defines for swapping bytes and folding u32 to u16. + 2008-05-30 Kieran Mansley * tcp_in.c Remove redundant "if" statement, and use real rcv_wnd rather than rcv_ann_wnd when deciding if packets are in-window. diff --git a/src/core/ipv4/inet_chksum.c b/src/core/ipv4/inet_chksum.c index bc492fea..9248f7d4 100644 --- a/src/core/ipv4/inet_chksum.c +++ b/src/core/ipv4/inet_chksum.c @@ -41,8 +41,6 @@ #include "lwip/inet_chksum.h" #include "lwip/inet.h" -#include - /* These are some reference implementations of the checksum algorithm, with the * aim of being simple, correct and fully portable. Checksumming is the * first thing you would want to optimize for your platform. If you create @@ -65,6 +63,11 @@ # define LWIP_CHKSUM_ALGORITHM 0 #endif +/** Like the name says... */ +#define SWAP_BYTES_IN_WORD(w) ((w & 0xff) << 8) | ((w & 0xff00) >> 8) +/** Split an u32_t in two u16_ts and add them up */ +#define FOLD_U32T(u) ((u >> 16) + (u & 0x0000ffffUL)) + #if (LWIP_CHKSUM_ALGORITHM == 1) /* Version #1 */ /** * lwip checksum @@ -86,8 +89,7 @@ lwip_standard_chksum(void *dataptr, u16_t len) acc = 0; /* dataptr may be at odd or even addresses */ octetptr = (u8_t*)dataptr; - while (len > 1) - { + while (len > 1) { /* declare first octet as most significant thus assume network order, ignoring host order */ src = (*octetptr) << 8; @@ -98,8 +100,7 @@ lwip_standard_chksum(void *dataptr, u16_t len) acc += src; len -= 2; } - if (len > 0) - { + if (len > 0) { /* accumulate remaining octet */ src = (*octetptr) << 8; acc += src; @@ -154,19 +155,22 @@ lwip_standard_chksum(void *dataptr, int len) } /* Consume left-over byte, if any */ - if (len > 0) + if (len > 0) { ((u8_t *)&t)[0] = *(u8_t *)ps;; + } /* Add end bytes */ sum += t; - /* Fold 32-bit sum to 16 bits */ - while ((sum >> 16) != 0) - sum = (sum & 0xffff) + (sum >> 16); + /* Fold 32-bit sum to 16 bits + calling this twice is propably faster than if statements... */ + sum = FOLD_U32T(sum); + sum = FOLD_U32T(sum); /* Swap if alignment was odd */ - if (odd) - sum = ((sum & 0xff) << 8) | ((sum & 0xff00) >> 8); + if (odd) { + sum = SWAP_BYTES_IN_WORD(sum); + } return sum; } @@ -211,18 +215,20 @@ lwip_standard_chksum(void *dataptr, int len) while (len > 7) { tmp = sum + *pl++; /* ping */ - if (tmp < sum) + if (tmp < sum) { tmp++; /* add back carry */ + } sum = tmp + *pl++; /* pong */ - if (sum < tmp) + if (sum < tmp) { sum++; /* add back carry */ + } len -= 8; } /* make room in upper bits */ - sum = (sum >> 16) + (sum & 0xffff); + sum = FOLD_U32T(sum); ps = (u16_t *)pl; @@ -233,16 +239,20 @@ lwip_standard_chksum(void *dataptr, int len) } /* dangling tail byte remaining? */ - if (len > 0) /* include odd byte */ + if (len > 0) { /* include odd byte */ ((u8_t *)&t)[0] = *(u8_t *)ps; + } sum += t; /* add end bytes */ - while ((sum >> 16) != 0) /* combine halves */ - sum = (sum >> 16) + (sum & 0xffff); + /* Fold 32-bit sum to 16 bits + calling this twice is propably faster than if statements... */ + sum = FOLD_U32T(sum); + sum = FOLD_U32T(sum); - if (odd) - sum = ((sum & 0xff) << 8) | ((sum & 0xff00) >> 8); + if (odd) { + sum = SWAP_BYTES_IN_WORD(sum); + } return sum; } @@ -277,18 +287,18 @@ inet_chksum_pseudo(struct pbuf *p, (void *)q, (void *)q->next)); acc += LWIP_CHKSUM(q->payload, q->len); /*LWIP_DEBUGF(INET_DEBUG, ("inet_chksum_pseudo(): unwrapped lwip_chksum()=%"X32_F" \n", acc));*/ - while ((acc >> 16) != 0) { - acc = (acc & 0xffffUL) + (acc >> 16); - } + /* just executing this next line is probably faster that the if statement needed + to check whether we really need to execute it, and does no harm */ + acc = FOLD_U32T(acc); if (q->len % 2 != 0) { swapped = 1 - swapped; - acc = ((acc & 0xff) << 8) | ((acc & 0xff00UL) >> 8); + acc = SWAP_BYTES_IN_WORD(acc); } /*LWIP_DEBUGF(INET_DEBUG, ("inet_chksum_pseudo(): wrapped lwip_chksum()=%"X32_F" \n", acc));*/ } if (swapped) { - acc = ((acc & 0xff) << 8) | ((acc & 0xff00UL) >> 8); + acc = SWAP_BYTES_IN_WORD(acc); } acc += (src->addr & 0xffffUL); acc += ((src->addr >> 16) & 0xffffUL); @@ -297,9 +307,10 @@ inet_chksum_pseudo(struct pbuf *p, acc += (u32_t)htons((u16_t)proto); acc += (u32_t)htons(proto_len); - while ((acc >> 16) != 0) { - acc = (acc & 0xffffUL) + (acc >> 16); - } + /* Fold 32-bit sum to 16 bits + calling this twice is propably faster than if statements... */ + acc = FOLD_U32T(acc); + acc = FOLD_U32T(acc); LWIP_DEBUGF(INET_DEBUG, ("inet_chksum_pseudo(): pbuf chain lwip_chksum()=%"X32_F"\n", acc)); return (u16_t)~(acc & 0xffffUL); } @@ -340,18 +351,17 @@ inet_chksum_pseudo_partial(struct pbuf *p, chksum_len -= chklen; LWIP_ASSERT("delete me", chksum_len < 0x7fff); /*LWIP_DEBUGF(INET_DEBUG, ("inet_chksum_pseudo(): unwrapped lwip_chksum()=%"X32_F" \n", acc));*/ - while ((acc >> 16) != 0) { - acc = (acc & 0xffffUL) + (acc >> 16); - } + /* fold the upper bit down */ + acc = FOLD_U32T(acc); if (q->len % 2 != 0) { swapped = 1 - swapped; - acc = ((acc & 0xff) << 8) | ((acc & 0xff00UL) >> 8); + acc = SWAP_BYTES_IN_WORD(acc); } /*LWIP_DEBUGF(INET_DEBUG, ("inet_chksum_pseudo(): wrapped lwip_chksum()=%"X32_F" \n", acc));*/ } if (swapped) { - acc = ((acc & 0xff) << 8) | ((acc & 0xff00UL) >> 8); + acc = SWAP_BYTES_IN_WORD(acc); } acc += (src->addr & 0xffffUL); acc += ((src->addr >> 16) & 0xffffUL); @@ -360,9 +370,10 @@ inet_chksum_pseudo_partial(struct pbuf *p, acc += (u32_t)htons((u16_t)proto); acc += (u32_t)htons(proto_len); - while ((acc >> 16) != 0) { - acc = (acc & 0xffffUL) + (acc >> 16); - } + /* Fold 32-bit sum to 16 bits + calling this twice is propably faster than if statements... */ + acc = FOLD_U32T(acc); + acc = FOLD_U32T(acc); LWIP_DEBUGF(INET_DEBUG, ("inet_chksum_pseudo(): pbuf chain lwip_chksum()=%"X32_F"\n", acc)); return (u16_t)~(acc & 0xffffUL); } @@ -380,13 +391,7 @@ inet_chksum_pseudo_partial(struct pbuf *p, u16_t inet_chksum(void *dataptr, u16_t len) { - u32_t acc; - - acc = LWIP_CHKSUM(dataptr, len); - while ((acc >> 16) != 0) { - acc = (acc & 0xffff) + (acc >> 16); - } - return (u16_t)~(acc & 0xffff); + return ~LWIP_CHKSUM(dataptr, len); } /** @@ -407,17 +412,15 @@ inet_chksum_pbuf(struct pbuf *p) swapped = 0; for(q = p; q != NULL; q = q->next) { acc += LWIP_CHKSUM(q->payload, q->len); - while ((acc >> 16) != 0) { - acc = (acc & 0xffffUL) + (acc >> 16); - } + acc = FOLD_U32T(acc); if (q->len % 2 != 0) { swapped = 1 - swapped; - acc = (acc & 0x00ffUL << 8) | (acc & 0xff00UL >> 8); + acc = SWAP_BYTES_IN_WORD(acc); } } if (swapped) { - acc = ((acc & 0x00ffUL) << 8) | ((acc & 0xff00UL) >> 8); + acc = SWAP_BYTES_IN_WORD(acc); } return (u16_t)~(acc & 0xffffUL); }