Checked in some ideas of patch #6460 (loop optimizations) and created defines for swapping bytes and folding u32 to u16.

This commit is contained in:
goldsimon 2008-06-17 20:06:25 +00:00
parent e0aaa87b1f
commit ef3666ef26
2 changed files with 56 additions and 49 deletions

View File

@ -19,7 +19,7 @@ HISTORY
++ New features:
2008-06-12 Simon Goldschmidt (patch by Luca Ceresoli)
2008-06-17 Simon Goldschmidt (patch by Luca Ceresoli)
* netif.c, loopif.c, ip.c, netif.h, loopif.h, opt.h: Checked in slightly
modified version of patch # 6370: Moved loopif code to netif.c so that
loopback traffic is supported on all netifs (all local IPs).
@ -27,6 +27,10 @@ HISTORY
++ Bugfixes:
2008-06-17 Simon Goldschmidt
* inet_chksum.c: Checked in some ideas of patch #6460 (loop optimizations)
and created defines for swapping bytes and folding u32 to u16.
2008-05-30 Kieran Mansley
* tcp_in.c Remove redundant "if" statement, and use real rcv_wnd
rather than rcv_ann_wnd when deciding if packets are in-window.

View File

@ -41,8 +41,6 @@
#include "lwip/inet_chksum.h"
#include "lwip/inet.h"
#include <string.h>
/* These are some reference implementations of the checksum algorithm, with the
* aim of being simple, correct and fully portable. Checksumming is the
* first thing you would want to optimize for your platform. If you create
@ -65,6 +63,11 @@
# define LWIP_CHKSUM_ALGORITHM 0
#endif
/** Like the name says... */
#define SWAP_BYTES_IN_WORD(w) ((w & 0xff) << 8) | ((w & 0xff00) >> 8)
/** Split an u32_t in two u16_ts and add them up */
#define FOLD_U32T(u) ((u >> 16) + (u & 0x0000ffffUL))
#if (LWIP_CHKSUM_ALGORITHM == 1) /* Version #1 */
/**
* lwip checksum
@ -86,8 +89,7 @@ lwip_standard_chksum(void *dataptr, u16_t len)
acc = 0;
/* dataptr may be at odd or even addresses */
octetptr = (u8_t*)dataptr;
while (len > 1)
{
while (len > 1) {
/* declare first octet as most significant
thus assume network order, ignoring host order */
src = (*octetptr) << 8;
@ -98,8 +100,7 @@ lwip_standard_chksum(void *dataptr, u16_t len)
acc += src;
len -= 2;
}
if (len > 0)
{
if (len > 0) {
/* accumulate remaining octet */
src = (*octetptr) << 8;
acc += src;
@ -154,19 +155,22 @@ lwip_standard_chksum(void *dataptr, int len)
}
/* Consume left-over byte, if any */
if (len > 0)
if (len > 0) {
((u8_t *)&t)[0] = *(u8_t *)ps;;
}
/* Add end bytes */
sum += t;
/* Fold 32-bit sum to 16 bits */
while ((sum >> 16) != 0)
sum = (sum & 0xffff) + (sum >> 16);
/* Fold 32-bit sum to 16 bits
calling this twice is propably faster than if statements... */
sum = FOLD_U32T(sum);
sum = FOLD_U32T(sum);
/* Swap if alignment was odd */
if (odd)
sum = ((sum & 0xff) << 8) | ((sum & 0xff00) >> 8);
if (odd) {
sum = SWAP_BYTES_IN_WORD(sum);
}
return sum;
}
@ -211,18 +215,20 @@ lwip_standard_chksum(void *dataptr, int len)
while (len > 7) {
tmp = sum + *pl++; /* ping */
if (tmp < sum)
if (tmp < sum) {
tmp++; /* add back carry */
}
sum = tmp + *pl++; /* pong */
if (sum < tmp)
if (sum < tmp) {
sum++; /* add back carry */
}
len -= 8;
}
/* make room in upper bits */
sum = (sum >> 16) + (sum & 0xffff);
sum = FOLD_U32T(sum);
ps = (u16_t *)pl;
@ -233,16 +239,20 @@ lwip_standard_chksum(void *dataptr, int len)
}
/* dangling tail byte remaining? */
if (len > 0) /* include odd byte */
if (len > 0) { /* include odd byte */
((u8_t *)&t)[0] = *(u8_t *)ps;
}
sum += t; /* add end bytes */
while ((sum >> 16) != 0) /* combine halves */
sum = (sum >> 16) + (sum & 0xffff);
/* Fold 32-bit sum to 16 bits
calling this twice is propably faster than if statements... */
sum = FOLD_U32T(sum);
sum = FOLD_U32T(sum);
if (odd)
sum = ((sum & 0xff) << 8) | ((sum & 0xff00) >> 8);
if (odd) {
sum = SWAP_BYTES_IN_WORD(sum);
}
return sum;
}
@ -277,18 +287,18 @@ inet_chksum_pseudo(struct pbuf *p,
(void *)q, (void *)q->next));
acc += LWIP_CHKSUM(q->payload, q->len);
/*LWIP_DEBUGF(INET_DEBUG, ("inet_chksum_pseudo(): unwrapped lwip_chksum()=%"X32_F" \n", acc));*/
while ((acc >> 16) != 0) {
acc = (acc & 0xffffUL) + (acc >> 16);
}
/* just executing this next line is probably faster that the if statement needed
to check whether we really need to execute it, and does no harm */
acc = FOLD_U32T(acc);
if (q->len % 2 != 0) {
swapped = 1 - swapped;
acc = ((acc & 0xff) << 8) | ((acc & 0xff00UL) >> 8);
acc = SWAP_BYTES_IN_WORD(acc);
}
/*LWIP_DEBUGF(INET_DEBUG, ("inet_chksum_pseudo(): wrapped lwip_chksum()=%"X32_F" \n", acc));*/
}
if (swapped) {
acc = ((acc & 0xff) << 8) | ((acc & 0xff00UL) >> 8);
acc = SWAP_BYTES_IN_WORD(acc);
}
acc += (src->addr & 0xffffUL);
acc += ((src->addr >> 16) & 0xffffUL);
@ -297,9 +307,10 @@ inet_chksum_pseudo(struct pbuf *p,
acc += (u32_t)htons((u16_t)proto);
acc += (u32_t)htons(proto_len);
while ((acc >> 16) != 0) {
acc = (acc & 0xffffUL) + (acc >> 16);
}
/* Fold 32-bit sum to 16 bits
calling this twice is propably faster than if statements... */
acc = FOLD_U32T(acc);
acc = FOLD_U32T(acc);
LWIP_DEBUGF(INET_DEBUG, ("inet_chksum_pseudo(): pbuf chain lwip_chksum()=%"X32_F"\n", acc));
return (u16_t)~(acc & 0xffffUL);
}
@ -340,18 +351,17 @@ inet_chksum_pseudo_partial(struct pbuf *p,
chksum_len -= chklen;
LWIP_ASSERT("delete me", chksum_len < 0x7fff);
/*LWIP_DEBUGF(INET_DEBUG, ("inet_chksum_pseudo(): unwrapped lwip_chksum()=%"X32_F" \n", acc));*/
while ((acc >> 16) != 0) {
acc = (acc & 0xffffUL) + (acc >> 16);
}
/* fold the upper bit down */
acc = FOLD_U32T(acc);
if (q->len % 2 != 0) {
swapped = 1 - swapped;
acc = ((acc & 0xff) << 8) | ((acc & 0xff00UL) >> 8);
acc = SWAP_BYTES_IN_WORD(acc);
}
/*LWIP_DEBUGF(INET_DEBUG, ("inet_chksum_pseudo(): wrapped lwip_chksum()=%"X32_F" \n", acc));*/
}
if (swapped) {
acc = ((acc & 0xff) << 8) | ((acc & 0xff00UL) >> 8);
acc = SWAP_BYTES_IN_WORD(acc);
}
acc += (src->addr & 0xffffUL);
acc += ((src->addr >> 16) & 0xffffUL);
@ -360,9 +370,10 @@ inet_chksum_pseudo_partial(struct pbuf *p,
acc += (u32_t)htons((u16_t)proto);
acc += (u32_t)htons(proto_len);
while ((acc >> 16) != 0) {
acc = (acc & 0xffffUL) + (acc >> 16);
}
/* Fold 32-bit sum to 16 bits
calling this twice is propably faster than if statements... */
acc = FOLD_U32T(acc);
acc = FOLD_U32T(acc);
LWIP_DEBUGF(INET_DEBUG, ("inet_chksum_pseudo(): pbuf chain lwip_chksum()=%"X32_F"\n", acc));
return (u16_t)~(acc & 0xffffUL);
}
@ -380,13 +391,7 @@ inet_chksum_pseudo_partial(struct pbuf *p,
u16_t
inet_chksum(void *dataptr, u16_t len)
{
u32_t acc;
acc = LWIP_CHKSUM(dataptr, len);
while ((acc >> 16) != 0) {
acc = (acc & 0xffff) + (acc >> 16);
}
return (u16_t)~(acc & 0xffff);
return ~LWIP_CHKSUM(dataptr, len);
}
/**
@ -407,17 +412,15 @@ inet_chksum_pbuf(struct pbuf *p)
swapped = 0;
for(q = p; q != NULL; q = q->next) {
acc += LWIP_CHKSUM(q->payload, q->len);
while ((acc >> 16) != 0) {
acc = (acc & 0xffffUL) + (acc >> 16);
}
acc = FOLD_U32T(acc);
if (q->len % 2 != 0) {
swapped = 1 - swapped;
acc = (acc & 0x00ffUL << 8) | (acc & 0xff00UL >> 8);
acc = SWAP_BYTES_IN_WORD(acc);
}
}
if (swapped) {
acc = ((acc & 0x00ffUL) << 8) | ((acc & 0xff00UL) >> 8);
acc = SWAP_BYTES_IN_WORD(acc);
}
return (u16_t)~(acc & 0xffffUL);
}