Checked in some ideas of patch #6460 (loop optimizations) and created defines for swapping bytes and folding u32 to u16.

This commit is contained in:
goldsimon 2008-06-17 20:06:25 +00:00
parent e0aaa87b1f
commit ef3666ef26
2 changed files with 56 additions and 49 deletions

View File

@ -19,7 +19,7 @@ HISTORY
++ New features: ++ New features:
2008-06-12 Simon Goldschmidt (patch by Luca Ceresoli) 2008-06-17 Simon Goldschmidt (patch by Luca Ceresoli)
* netif.c, loopif.c, ip.c, netif.h, loopif.h, opt.h: Checked in slightly * netif.c, loopif.c, ip.c, netif.h, loopif.h, opt.h: Checked in slightly
modified version of patch # 6370: Moved loopif code to netif.c so that modified version of patch # 6370: Moved loopif code to netif.c so that
loopback traffic is supported on all netifs (all local IPs). loopback traffic is supported on all netifs (all local IPs).
@ -27,6 +27,10 @@ HISTORY
++ Bugfixes: ++ Bugfixes:
2008-06-17 Simon Goldschmidt
* inet_chksum.c: Checked in some ideas of patch #6460 (loop optimizations)
and created defines for swapping bytes and folding u32 to u16.
2008-05-30 Kieran Mansley 2008-05-30 Kieran Mansley
* tcp_in.c Remove redundant "if" statement, and use real rcv_wnd * tcp_in.c Remove redundant "if" statement, and use real rcv_wnd
rather than rcv_ann_wnd when deciding if packets are in-window. rather than rcv_ann_wnd when deciding if packets are in-window.

View File

@ -41,8 +41,6 @@
#include "lwip/inet_chksum.h" #include "lwip/inet_chksum.h"
#include "lwip/inet.h" #include "lwip/inet.h"
#include <string.h>
/* These are some reference implementations of the checksum algorithm, with the /* These are some reference implementations of the checksum algorithm, with the
* aim of being simple, correct and fully portable. Checksumming is the * aim of being simple, correct and fully portable. Checksumming is the
* first thing you would want to optimize for your platform. If you create * first thing you would want to optimize for your platform. If you create
@ -65,6 +63,11 @@
# define LWIP_CHKSUM_ALGORITHM 0 # define LWIP_CHKSUM_ALGORITHM 0
#endif #endif
/** Like the name says... */
#define SWAP_BYTES_IN_WORD(w) ((w & 0xff) << 8) | ((w & 0xff00) >> 8)
/** Split an u32_t in two u16_ts and add them up */
#define FOLD_U32T(u) ((u >> 16) + (u & 0x0000ffffUL))
#if (LWIP_CHKSUM_ALGORITHM == 1) /* Version #1 */ #if (LWIP_CHKSUM_ALGORITHM == 1) /* Version #1 */
/** /**
* lwip checksum * lwip checksum
@ -86,8 +89,7 @@ lwip_standard_chksum(void *dataptr, u16_t len)
acc = 0; acc = 0;
/* dataptr may be at odd or even addresses */ /* dataptr may be at odd or even addresses */
octetptr = (u8_t*)dataptr; octetptr = (u8_t*)dataptr;
while (len > 1) while (len > 1) {
{
/* declare first octet as most significant /* declare first octet as most significant
thus assume network order, ignoring host order */ thus assume network order, ignoring host order */
src = (*octetptr) << 8; src = (*octetptr) << 8;
@ -98,8 +100,7 @@ lwip_standard_chksum(void *dataptr, u16_t len)
acc += src; acc += src;
len -= 2; len -= 2;
} }
if (len > 0) if (len > 0) {
{
/* accumulate remaining octet */ /* accumulate remaining octet */
src = (*octetptr) << 8; src = (*octetptr) << 8;
acc += src; acc += src;
@ -154,19 +155,22 @@ lwip_standard_chksum(void *dataptr, int len)
} }
/* Consume left-over byte, if any */ /* Consume left-over byte, if any */
if (len > 0) if (len > 0) {
((u8_t *)&t)[0] = *(u8_t *)ps;; ((u8_t *)&t)[0] = *(u8_t *)ps;;
}
/* Add end bytes */ /* Add end bytes */
sum += t; sum += t;
/* Fold 32-bit sum to 16 bits */ /* Fold 32-bit sum to 16 bits
while ((sum >> 16) != 0) calling this twice is propably faster than if statements... */
sum = (sum & 0xffff) + (sum >> 16); sum = FOLD_U32T(sum);
sum = FOLD_U32T(sum);
/* Swap if alignment was odd */ /* Swap if alignment was odd */
if (odd) if (odd) {
sum = ((sum & 0xff) << 8) | ((sum & 0xff00) >> 8); sum = SWAP_BYTES_IN_WORD(sum);
}
return sum; return sum;
} }
@ -211,18 +215,20 @@ lwip_standard_chksum(void *dataptr, int len)
while (len > 7) { while (len > 7) {
tmp = sum + *pl++; /* ping */ tmp = sum + *pl++; /* ping */
if (tmp < sum) if (tmp < sum) {
tmp++; /* add back carry */ tmp++; /* add back carry */
}
sum = tmp + *pl++; /* pong */ sum = tmp + *pl++; /* pong */
if (sum < tmp) if (sum < tmp) {
sum++; /* add back carry */ sum++; /* add back carry */
}
len -= 8; len -= 8;
} }
/* make room in upper bits */ /* make room in upper bits */
sum = (sum >> 16) + (sum & 0xffff); sum = FOLD_U32T(sum);
ps = (u16_t *)pl; ps = (u16_t *)pl;
@ -233,16 +239,20 @@ lwip_standard_chksum(void *dataptr, int len)
} }
/* dangling tail byte remaining? */ /* dangling tail byte remaining? */
if (len > 0) /* include odd byte */ if (len > 0) { /* include odd byte */
((u8_t *)&t)[0] = *(u8_t *)ps; ((u8_t *)&t)[0] = *(u8_t *)ps;
}
sum += t; /* add end bytes */ sum += t; /* add end bytes */
while ((sum >> 16) != 0) /* combine halves */ /* Fold 32-bit sum to 16 bits
sum = (sum >> 16) + (sum & 0xffff); calling this twice is propably faster than if statements... */
sum = FOLD_U32T(sum);
sum = FOLD_U32T(sum);
if (odd) if (odd) {
sum = ((sum & 0xff) << 8) | ((sum & 0xff00) >> 8); sum = SWAP_BYTES_IN_WORD(sum);
}
return sum; return sum;
} }
@ -277,18 +287,18 @@ inet_chksum_pseudo(struct pbuf *p,
(void *)q, (void *)q->next)); (void *)q, (void *)q->next));
acc += LWIP_CHKSUM(q->payload, q->len); acc += LWIP_CHKSUM(q->payload, q->len);
/*LWIP_DEBUGF(INET_DEBUG, ("inet_chksum_pseudo(): unwrapped lwip_chksum()=%"X32_F" \n", acc));*/ /*LWIP_DEBUGF(INET_DEBUG, ("inet_chksum_pseudo(): unwrapped lwip_chksum()=%"X32_F" \n", acc));*/
while ((acc >> 16) != 0) { /* just executing this next line is probably faster that the if statement needed
acc = (acc & 0xffffUL) + (acc >> 16); to check whether we really need to execute it, and does no harm */
} acc = FOLD_U32T(acc);
if (q->len % 2 != 0) { if (q->len % 2 != 0) {
swapped = 1 - swapped; swapped = 1 - swapped;
acc = ((acc & 0xff) << 8) | ((acc & 0xff00UL) >> 8); acc = SWAP_BYTES_IN_WORD(acc);
} }
/*LWIP_DEBUGF(INET_DEBUG, ("inet_chksum_pseudo(): wrapped lwip_chksum()=%"X32_F" \n", acc));*/ /*LWIP_DEBUGF(INET_DEBUG, ("inet_chksum_pseudo(): wrapped lwip_chksum()=%"X32_F" \n", acc));*/
} }
if (swapped) { if (swapped) {
acc = ((acc & 0xff) << 8) | ((acc & 0xff00UL) >> 8); acc = SWAP_BYTES_IN_WORD(acc);
} }
acc += (src->addr & 0xffffUL); acc += (src->addr & 0xffffUL);
acc += ((src->addr >> 16) & 0xffffUL); acc += ((src->addr >> 16) & 0xffffUL);
@ -297,9 +307,10 @@ inet_chksum_pseudo(struct pbuf *p,
acc += (u32_t)htons((u16_t)proto); acc += (u32_t)htons((u16_t)proto);
acc += (u32_t)htons(proto_len); acc += (u32_t)htons(proto_len);
while ((acc >> 16) != 0) { /* Fold 32-bit sum to 16 bits
acc = (acc & 0xffffUL) + (acc >> 16); calling this twice is propably faster than if statements... */
} acc = FOLD_U32T(acc);
acc = FOLD_U32T(acc);
LWIP_DEBUGF(INET_DEBUG, ("inet_chksum_pseudo(): pbuf chain lwip_chksum()=%"X32_F"\n", acc)); LWIP_DEBUGF(INET_DEBUG, ("inet_chksum_pseudo(): pbuf chain lwip_chksum()=%"X32_F"\n", acc));
return (u16_t)~(acc & 0xffffUL); return (u16_t)~(acc & 0xffffUL);
} }
@ -340,18 +351,17 @@ inet_chksum_pseudo_partial(struct pbuf *p,
chksum_len -= chklen; chksum_len -= chklen;
LWIP_ASSERT("delete me", chksum_len < 0x7fff); LWIP_ASSERT("delete me", chksum_len < 0x7fff);
/*LWIP_DEBUGF(INET_DEBUG, ("inet_chksum_pseudo(): unwrapped lwip_chksum()=%"X32_F" \n", acc));*/ /*LWIP_DEBUGF(INET_DEBUG, ("inet_chksum_pseudo(): unwrapped lwip_chksum()=%"X32_F" \n", acc));*/
while ((acc >> 16) != 0) { /* fold the upper bit down */
acc = (acc & 0xffffUL) + (acc >> 16); acc = FOLD_U32T(acc);
}
if (q->len % 2 != 0) { if (q->len % 2 != 0) {
swapped = 1 - swapped; swapped = 1 - swapped;
acc = ((acc & 0xff) << 8) | ((acc & 0xff00UL) >> 8); acc = SWAP_BYTES_IN_WORD(acc);
} }
/*LWIP_DEBUGF(INET_DEBUG, ("inet_chksum_pseudo(): wrapped lwip_chksum()=%"X32_F" \n", acc));*/ /*LWIP_DEBUGF(INET_DEBUG, ("inet_chksum_pseudo(): wrapped lwip_chksum()=%"X32_F" \n", acc));*/
} }
if (swapped) { if (swapped) {
acc = ((acc & 0xff) << 8) | ((acc & 0xff00UL) >> 8); acc = SWAP_BYTES_IN_WORD(acc);
} }
acc += (src->addr & 0xffffUL); acc += (src->addr & 0xffffUL);
acc += ((src->addr >> 16) & 0xffffUL); acc += ((src->addr >> 16) & 0xffffUL);
@ -360,9 +370,10 @@ inet_chksum_pseudo_partial(struct pbuf *p,
acc += (u32_t)htons((u16_t)proto); acc += (u32_t)htons((u16_t)proto);
acc += (u32_t)htons(proto_len); acc += (u32_t)htons(proto_len);
while ((acc >> 16) != 0) { /* Fold 32-bit sum to 16 bits
acc = (acc & 0xffffUL) + (acc >> 16); calling this twice is propably faster than if statements... */
} acc = FOLD_U32T(acc);
acc = FOLD_U32T(acc);
LWIP_DEBUGF(INET_DEBUG, ("inet_chksum_pseudo(): pbuf chain lwip_chksum()=%"X32_F"\n", acc)); LWIP_DEBUGF(INET_DEBUG, ("inet_chksum_pseudo(): pbuf chain lwip_chksum()=%"X32_F"\n", acc));
return (u16_t)~(acc & 0xffffUL); return (u16_t)~(acc & 0xffffUL);
} }
@ -380,13 +391,7 @@ inet_chksum_pseudo_partial(struct pbuf *p,
u16_t u16_t
inet_chksum(void *dataptr, u16_t len) inet_chksum(void *dataptr, u16_t len)
{ {
u32_t acc; return ~LWIP_CHKSUM(dataptr, len);
acc = LWIP_CHKSUM(dataptr, len);
while ((acc >> 16) != 0) {
acc = (acc & 0xffff) + (acc >> 16);
}
return (u16_t)~(acc & 0xffff);
} }
/** /**
@ -407,17 +412,15 @@ inet_chksum_pbuf(struct pbuf *p)
swapped = 0; swapped = 0;
for(q = p; q != NULL; q = q->next) { for(q = p; q != NULL; q = q->next) {
acc += LWIP_CHKSUM(q->payload, q->len); acc += LWIP_CHKSUM(q->payload, q->len);
while ((acc >> 16) != 0) { acc = FOLD_U32T(acc);
acc = (acc & 0xffffUL) + (acc >> 16);
}
if (q->len % 2 != 0) { if (q->len % 2 != 0) {
swapped = 1 - swapped; swapped = 1 - swapped;
acc = (acc & 0x00ffUL << 8) | (acc & 0xff00UL >> 8); acc = SWAP_BYTES_IN_WORD(acc);
} }
} }
if (swapped) { if (swapped) {
acc = ((acc & 0x00ffUL) << 8) | ((acc & 0xff00UL) >> 8); acc = SWAP_BYTES_IN_WORD(acc);
} }
return (u16_t)~(acc & 0xffffUL); return (u16_t)~(acc & 0xffffUL);
} }