From c26875e70d120f564f4e68c1dc79d9e7e3642b70 Mon Sep 17 00:00:00 2001 From: hathach Date: Mon, 26 Apr 2021 17:42:49 +0700 Subject: [PATCH] add TUP_MCU_STRICT_ALIGN macro that manually pick bytes for lpc55 port1 that is m4 but cannot unaligned acces on usb ram --- src/class/msc/msc_device.c | 19 +++--------- src/common/tusb_common.h | 63 ++++++++++++++++++++++++++++++++------ src/tusb_option.h | 11 +++++++ 3 files changed, 69 insertions(+), 24 deletions(-) diff --git a/src/class/msc/msc_device.c b/src/class/msc/msc_device.c index 194f4d3cb..539941935 100644 --- a/src/class/msc/msc_device.c +++ b/src/class/msc/msc_device.c @@ -77,28 +77,19 @@ static void proc_write10_cmd(uint8_t rhport, mscd_interface_t* p_msc); static inline uint32_t rdwr10_get_lba(uint8_t const command[]) { - // read10 & write10 has the same format - scsi_write10_t* p_rdwr10 = (scsi_write10_t*) command; + // use offsetof to avoid pointer to the odd/unaligned address + uint32_t const lba = tu_unaligned_read32(command + offsetof(scsi_write10_t, lba)); - // copy first to prevent mis-aligned access - uint32_t lba; - // use offsetof to avoid pointer to the odd/misaligned address - memcpy(&lba, (uint8_t*) p_rdwr10 + offsetof(scsi_write10_t, lba), 4); - - // lba is in Big Endian format + // lba is in Big Endian return tu_ntohl(lba); } static inline uint16_t rdwr10_get_blockcount(uint8_t const command[]) { - // read10 & write10 has the same format - scsi_write10_t* p_rdwr10 = (scsi_write10_t*) command; - - // copy first to prevent mis-aligned access - uint16_t block_count; // use offsetof to avoid pointer to the odd/misaligned address - memcpy(&block_count, (uint8_t*) p_rdwr10 + offsetof(scsi_write10_t, block_count), 2); + uint16_t const block_count = tu_unaligned_read16(command + offsetof(scsi_write10_t, block_count)); + // block count is in Big Endian return tu_ntohs(block_count); } diff --git a/src/common/tusb_common.h b/src/common/tusb_common.h index 705ff867c..2cc12c6be 100644 --- a/src/common/tusb_common.h +++ b/src/common/tusb_common.h @@ -47,13 +47,13 @@ #define U16_TO_U8S_BE(u16) TU_U16_HIGH(u16), TU_U16_LOW(u16) #define U16_TO_U8S_LE(u16) TU_U16_LOW(u16), TU_U16_HIGH(u16) -#define U32_B1_U8(u32) ((uint8_t) ((((uint32_t) u32) >> 24) & 0x000000ff)) // MSB -#define U32_B2_U8(u32) ((uint8_t) ((((uint32_t) u32) >> 16) & 0x000000ff)) -#define U32_B3_U8(u32) ((uint8_t) ((((uint32_t) u32) >> 8) & 0x000000ff)) -#define U32_B4_U8(u32) ((uint8_t) (((uint32_t) u32) & 0x000000ff)) // LSB +#define TU_U32_BYTE3(u32) ((uint8_t) ((((uint32_t) u32) >> 24) & 0x000000ff)) // MSB +#define TU_U32_BYTE2(u32) ((uint8_t) ((((uint32_t) u32) >> 16) & 0x000000ff)) +#define TU_U32_BYTE1(u32) ((uint8_t) ((((uint32_t) u32) >> 8) & 0x000000ff)) +#define TU_U32_BYTE0(u32) ((uint8_t) (((uint32_t) u32) & 0x000000ff)) // LSB -#define U32_TO_U8S_BE(u32) U32_B1_U8(u32), U32_B2_U8(u32), U32_B3_U8(u32), U32_B4_U8(u32) -#define U32_TO_U8S_LE(u32) U32_B4_U8(u32), U32_B3_U8(u32), U32_B2_U8(u32), U32_B1_U8(u32) +#define U32_TO_U8S_BE(u32) TU_U32_BYTE3(u32), TU_U32_BYTE2(u32), TU_U32_BYTE1(u32), TU_U32_BYTE0(u32) +#define U32_TO_U8S_LE(u32) TU_U32_BYTE0(u32), TU_U32_BYTE1(u32), TU_U32_BYTE2(u32), TU_U32_BYTE3(u32) #define TU_BIT(n) (1U << (n)) @@ -81,9 +81,9 @@ #define tu_varclr(_var) tu_memclr(_var, sizeof(*(_var))) //------------- Bytes -------------// -TU_ATTR_ALWAYS_INLINE static inline uint32_t tu_u32(uint8_t b1, uint8_t b2, uint8_t b3, uint8_t b4) +TU_ATTR_ALWAYS_INLINE static inline uint32_t tu_u32(uint8_t b3, uint8_t b2, uint8_t b1, uint8_t b0) { - return ( ((uint32_t) b1) << 24) | ( ((uint32_t) b2) << 16) | ( ((uint32_t) b3) << 8) | b4; + return ( ((uint32_t) b3) << 24) | ( ((uint32_t) b2) << 16) | ( ((uint32_t) b1) << 8) | b0; } TU_ATTR_ALWAYS_INLINE static inline uint16_t tu_u16(uint8_t high, uint8_t low) @@ -91,8 +91,13 @@ TU_ATTR_ALWAYS_INLINE static inline uint16_t tu_u16(uint8_t high, uint8_t low) return (uint16_t) ((((uint16_t) high) << 8) | low); } -TU_ATTR_ALWAYS_INLINE static inline uint8_t tu_u16_high(uint16_t u16) { return (uint8_t) (((uint16_t) (u16 >> 8)) & 0x00ff); } -TU_ATTR_ALWAYS_INLINE static inline uint8_t tu_u16_low (uint16_t u16) { return (uint8_t) (u16 & 0x00ff); } +TU_ATTR_ALWAYS_INLINE static inline uint8_t tu_u32_byte3(uint32_t u32) { return TU_U32_BYTE3(u32); } +TU_ATTR_ALWAYS_INLINE static inline uint8_t tu_u32_byte2(uint32_t u32) { return TU_U32_BYTE2(u32); } +TU_ATTR_ALWAYS_INLINE static inline uint8_t tu_u32_byte1(uint32_t u32) { return TU_U32_BYTE1(u32); } +TU_ATTR_ALWAYS_INLINE static inline uint8_t tu_u32_byte0(uint32_t u32) { return TU_U32_BYTE0(u32); } + +TU_ATTR_ALWAYS_INLINE static inline uint8_t tu_u16_high(uint16_t u16) { return TU_U16_HIGH(u16); } +TU_ATTR_ALWAYS_INLINE static inline uint8_t tu_u16_low (uint16_t u16) { return TU_U16_LOW(u16); } //------------- Bits -------------// TU_ATTR_ALWAYS_INLINE static inline uint32_t tu_bit_set (uint32_t value, uint8_t pos) { return value | TU_BIT(pos); } @@ -141,6 +146,8 @@ static inline uint8_t tu_log2(uint32_t value) //------------- Unaligned Access -------------// #if TUP_ARCH_STRICT_ALIGN +// Rely on compiler to generate correct code for unaligned access + typedef struct { uint16_t val; } TU_ATTR_PACKED tu_unaligned_uint16_t; typedef struct { uint32_t val; } TU_ATTR_PACKED tu_unaligned_uint32_t; @@ -168,8 +175,44 @@ TU_ATTR_ALWAYS_INLINE static inline void tu_unaligned_write16(void* mem, uint16_ ua16->val = value; } +#elif TUP_MCU_STRICT_ALIGN + +// MCU such as LPC_IP3511 Highspeed cannot access unaligned memory on USB_RAM although it is ARM M4. +// We have to manually pick up bytes since tu_unaligned_uint32_t will still generate unaligned code +// NOTE: volatile cast to memory to prevent compiler to optimize and generate unaligned code +// TODO Big Endian may need minor changes +TU_ATTR_ALWAYS_INLINE static inline uint32_t tu_unaligned_read32(const void* mem) +{ + volatile uint8_t const* buf8 = (uint8_t const*) mem; + return tu_u32(buf8[3], buf8[2], buf8[1], buf8[0]); +} + +TU_ATTR_ALWAYS_INLINE static inline void tu_unaligned_write32(void* mem, uint32_t value) +{ + volatile uint8_t* buf8 = (uint8_t*) mem; + buf8[0] = tu_u32_byte0(value); + buf8[1] = tu_u32_byte1(value); + buf8[2] = tu_u32_byte2(value); + buf8[3] = tu_u32_byte3(value); +} + +TU_ATTR_ALWAYS_INLINE static inline uint16_t tu_unaligned_read16(const void* mem) +{ + volatile uint8_t const* buf8 = (uint8_t const*) mem; + return tu_u16(buf8[1], buf8[0]); +} + +TU_ATTR_ALWAYS_INLINE static inline void tu_unaligned_write16(void* mem, uint16_t value) +{ + volatile uint8_t* buf8 = (uint8_t*) mem; + buf8[0] = tu_u16_low(value); + buf8[1] = tu_u16_high(value); +} + + #else +// MCU that could access unaligned memory natively TU_ATTR_ALWAYS_INLINE static inline uint32_t tu_unaligned_read32 (const void* mem ) { return *((uint32_t*) mem); } TU_ATTR_ALWAYS_INLINE static inline uint16_t tu_unaligned_read16 (const void* mem ) { return *((uint16_t*) mem); } diff --git a/src/tusb_option.h b/src/tusb_option.h index 9fcc6c565..b59c7cb22 100644 --- a/src/tusb_option.h +++ b/src/tusb_option.h @@ -283,6 +283,7 @@ // TUP_ARCH_STRICT_ALIGN if arch cannot access unaligned memory + // ARMv7+ (M3-M7, M23-M33) can access unaligned memory #if (defined(__ARM_ARCH) && (__ARM_ARCH >= 7)) #define TUP_ARCH_STRICT_ALIGN 0 @@ -290,6 +291,16 @@ #define TUP_ARCH_STRICT_ALIGN 1 #endif +// TUP_MCU_STRICT_ALIGN will overwrite TUP_ARCH_STRICT_ALIGN. +// In case TUP_MCU_STRICT_ALIGN = 1 and TUP_ARCH_STRICT_ALIGN =0, we will not reply on compiler +// to generate unaligned access code. +// LPC_IP3511 Highspeed cannot access unaligned memory on USB_RAM +#if TUD_OPT_HIGH_SPEED && (CFG_TUSB_MCU == OPT_MCU_LPC54XXX || CFG_TUSB_MCU == OPT_MCU_LPC55XX) + #define TUP_MCU_STRICT_ALIGN 1 +#else + #define TUP_MCU_STRICT_ALIGN 0 +#endif + //------------------------------------------------------------------ // Configuration Validation //------------------------------------------------------------------