Add XIP cache maintenance API (fixes #2005) (#2013)

* Add XIP cache maintenance API (fixes #2005)

Also add a cache clean to hardware_flash implementations, to avoid
losing pending writes on the subsequent invalidate.

* Fix comment typos, add to docs index, remove unnecessary defaulting of XIP_CACHE assertions on FLASH

* Fix sense of unsigned wrapping test

* update bazel build

---------

Co-authored-by: graham sanderson <graham.sanderson@raspberrypi.com>
This commit is contained in:
Luke Wren 2024-11-11 18:09:30 +00:00 committed by GitHub
parent 09a9379ea9
commit f9eb48bd91
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 348 additions and 0 deletions

View File

@ -46,6 +46,7 @@
* \cond hardware_uart \defgroup hardware_uart hardware_uart \endcond
* \cond hardware_vreg \defgroup hardware_vreg hardware_vreg \endcond
* \cond hardware_watchdog \defgroup hardware_watchdog hardware_watchdog \endcond
* \cond hardware_xip_cache \defgroup hardware_xip_cache hardware_xip_cache \endcond
* \cond hardware_xosc \defgroup hardware_xosc hardware_xosc \endcond
* \cond hardware_powman hardware_powman
* \cond hardware_hazard3 hardware_hazard3

View File

@ -60,6 +60,7 @@ pico_add_subdirectory(rp2_common/hardware_timer)
pico_add_subdirectory(rp2_common/hardware_uart)
pico_add_subdirectory(rp2_common/hardware_vreg)
pico_add_subdirectory(rp2_common/hardware_watchdog)
pico_add_subdirectory(rp2_common/hardware_xip_cache)
pico_add_subdirectory(rp2_common/hardware_xosc)
if (PICO_RP2350 OR PICO_COMBINED_DOCS)

View File

@ -11,6 +11,7 @@ cc_library(
target_compatible_with = compatible_with_rp2(),
deps = [
"//src/rp2_common:hardware_structs",
"//src/rp2_common/hardware_xip_cache",
"//src/rp2_common:pico_platform",
"//src/rp2_common/pico_bootrom",
"//src/rp2_common/pico_multicore",

View File

@ -1,2 +1,3 @@
pico_simple_hardware_target(flash)
pico_mirrored_target_link_libraries(hardware_flash INTERFACE pico_bootrom)
pico_mirrored_target_link_libraries(hardware_flash INTERFACE hardware_xip_cache)

View File

@ -13,6 +13,7 @@
#else
#include "hardware/structs/qmi.h"
#endif
#include "hardware/xip_cache.h"
#define FLASH_BLOCK_ERASE_CMD 0xd8
@ -84,6 +85,8 @@ void __no_inline_not_in_flash_func(flash_range_erase)(uint32_t flash_offs, size_
rom_flash_flush_cache_fn flash_flush_cache_func = (rom_flash_flush_cache_fn)rom_func_lookup_inline(ROM_FUNC_FLASH_FLUSH_CACHE);
assert(connect_internal_flash_func && flash_exit_xip_func && flash_range_erase_func && flash_flush_cache_func);
flash_init_boot2_copyout();
// Commit any pending writes to external RAM, to avoid losing them in the subsequent flush:
xip_cache_clean_all();
// No flash accesses after this point
__compiler_memory_barrier();
@ -112,6 +115,7 @@ void __no_inline_not_in_flash_func(flash_range_program)(uint32_t flash_offs, con
rom_flash_flush_cache_fn flash_flush_cache_func = (rom_flash_flush_cache_fn)rom_func_lookup_inline(ROM_FUNC_FLASH_FLUSH_CACHE);
assert(connect_internal_flash_func && flash_exit_xip_func && flash_range_program_func && flash_flush_cache_func);
flash_init_boot2_copyout();
xip_cache_clean_all();
__compiler_memory_barrier();
@ -152,6 +156,8 @@ void __no_inline_not_in_flash_func(flash_do_cmd)(const uint8_t *txbuf, uint8_t *
rom_flash_flush_cache_fn flash_flush_cache_func = (rom_flash_flush_cache_fn)rom_func_lookup_inline(ROM_FUNC_FLASH_FLUSH_CACHE);
assert(connect_internal_flash_func && flash_exit_xip_func && flash_flush_cache_func);
flash_init_boot2_copyout();
xip_cache_clean_all();
__compiler_memory_barrier();
connect_internal_flash_func();
flash_exit_xip_func();

View File

@ -0,0 +1,14 @@
load("//bazel:defs.bzl", "compatible_with_rp2")
package(default_visibility = ["//visibility:public"])
cc_library(
name = "hardware_xip_cache",
srcs = ["xip_cache.c"],
hdrs = ["include/hardware/xip_cache.h"],
includes = ["include"],
target_compatible_with = compatible_with_rp2(),
deps = [
"//src/rp2_common/hardware_sync",
],
)

View File

@ -0,0 +1,3 @@
pico_simple_hardware_target(xip_cache)
pico_mirrored_target_link_libraries(hardware_xip_cache INTERFACE hardware_sync)

View File

@ -0,0 +1,210 @@
/*
* Copyright (c) 2024 Raspberry Pi Ltd.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
#ifndef _HARDWARE_XIP_CACHE_H
#define _HARDWARE_XIP_CACHE_H
#include "pico.h"
#include "hardware/regs/addressmap.h"
/** \file xip_cache.h
* \defgroup hardware_xip_cache hardware_xip_cache
*
* \brief Low-level cache maintenance operations for the XIP cache
*
* These functions apply some maintenance operation to either the entire cache contents, or a range
* of offsets within the downstream address space. Offsets start from 0 (indicating the first byte
* of flash), so pointers should have XIP_BASE subtracted before passing into one of these
* functions.
*
* \if rp2040-specific
* The only valid cache maintenance operation on RP2040 is "invalidate", which tells the cache to
* forget everything it knows about some address. This is necessary after a programming operation,
* because the cache does not automatically know about any serial programming operations performed
* on the external flash device, and could return stale data.
* \endif
*
* \if rp2350-specific
* On RP2350, the three types of operation are:
*
* * Invalidate: tell the cache to forget everything it knows about some address. The next access to
* that address will fetch from downstream memory.
*
* * Clean: if the addressed cache line contains data not yet written to external memory, then write
* that data out now, and mark the line as "clean" (i.e. not containing uncommitted write data)
*
* * Pin: mark an address as always being resident in the cache. This persists until the line is
* invalidated, and can be used to allocate part of the cache for cache-as-SRAM use.
*
* When using both external flash and external RAM (e.g. PSRAM), a simple way to maintain coherence
* over flash programming operations is to:
*
* 1. Clean the entire cache (e.g. using xip_cache_clean_all())
*
* 2. Erase + program the flash using serial SPI commands
*
* 3. Invalidate ("flush") the entire cache (e.g. using xip_cache_invalidate_all())
*
* The invalidate ensures the programming is visible to subsequent reads. The clean ensures that the
* invalidate does not discard any cached PSRAM write data.
*
* \endif
*
*/
// PICO_CONFIG: PARAM_ASSERTIONS_ENABLED_HARDWARE_XIP_CACHE, Enable/disable assertions in the hardware_xip_cache module, type=bool, default=0, group=hardware_xip_cache
#ifndef PARAM_ASSERTIONS_ENABLED_HARDWARE_XIP_CACHE
#define PARAM_ASSERTIONS_ENABLED_HARDWARE_XIP_CACHE 0
#endif
#define XIP_CACHE_LINE_SIZE _u(8)
#define XIP_CACHE_SIZE (_u(16) * _u(1024))
#if PICO_RP2040
#define XIP_CACHE_ADDRESS_SPACE_SIZE (_u(16) * _u(1024) * _u(1024))
#else
#define XIP_CACHE_ADDRESS_SPACE_SIZE (XIP_END - XIP_BASE)
#endif
// A read-only cache never requires cleaning (you can still call the functions, they are just no-ops)
#if PICO_RP2040
#define XIP_CACHE_IS_READ_ONLY 1
#else
#define XIP_CACHE_IS_READ_ONLY 0
#endif
#ifndef __ASSEMBLER__
#ifdef __cplusplus
extern "C" {
#endif
/*! \brief Invalidate the cache for the entire XIP address space
* \ingroup hardware_xip_cache
*
* Invalidation ensures that subsequent reads will fetch data from the downstream memory, rather
* than using (potentially stale) cached data.
*
* This function is faster than calling xip_cache_invalidate_range() for the entire address space,
* because it iterates over cachelines instead of addresses.
*
* @note Any pending write data held in the cache is lost: you can force the cache to commit these
* writes first, by calling xip_cache_clean_all()
*
* @note Unlike flash_flush_cache(), this function affects *only* the cache line state.
* flash_flush_cache() calls a ROM API which can have other effects on some platforms, like
* cleaning up the bootrom's QSPI GPIO setup on RP2040. Prefer this function for general cache
* maintenance use, and prefer flash_flush_cache in sequences of ROM flash API calls.
*/
void xip_cache_invalidate_all(void);
/*! \brief Invalidate a range of offsets within the XIP address space
* \ingroup hardware_xip_cache
*
* \param start_offset The first offset to be invalidated. Offset 0 means the first byte of XIP
* memory (e.g. flash). Pointers must have XIP_BASE subtracted before passing into this function.
* Must be 4-byte-aligned on RP2040. Must be a aligned to the start of a cache line
* (XIP_CACHE_LINE_SIZE) on other platforms.
*
* \param size_bytes The number of bytes to invalidate. Must be a multiple of 4 bytes on RP2040.
* Must be a multiple of XIP_CACHE_LINE_SIZE on other platforms.
*
* Invalidation ensures that subsequent reads will fetch data from the downstream memory, rather
* than using (potentially stale) cached data.
* @note Any pending write data held in the cache is lost: you can force the cache to commit these
* writes first, by calling xip_cache_clean_range() with the same parameters. Generally this is
* not necessary because invalidation is used with flash (write-behind via programming), and
* cleaning is used with PSRAM (writing through the cache).
*
*/
void xip_cache_invalidate_range(uintptr_t start_offset, uintptr_t size_bytes);
#if !XIP_CACHE_IS_READ_ONLY
/*! \brief Clean the cache for the entire XIP address space
* \ingroup hardware_xip_cache
*
* This causes the cache to write out all pending write data to the downstream memory. For example,
* when suspending the system with state retained in external PSRAM, this ensures all data has made
* it out to external PSRAM before powering down.
*
* This function is faster than calling xip_cache_clean_range() for the entire address space,
* because it iterates over cachelines instead of addresses.
*
* \if rp2040-specific
* On RP2040 this is a no-op, as the XIP cache is read-only. This is indicated by the
* XIP_CACHE_IS_READ_ONLY macro.
* \endif
*
* \if rp2350-specific
* On RP2350, due to the workaround applied for RP2350-E11, this function also effectively
* invalidates all cache lines after cleaning them. The next access to each line will miss. Avoid
* this by calling xip_cache_clean_range() which does not suffer this issue.
* \endif
*
*/
void xip_cache_clean_all(void);
/*! \brief Clean a range of offsets within the XIP address space
* \ingroup hardware_xip_cache
*
* This causes the cache to write out pending write data at these offsets to the downstream memory.
*
* \if rp2040-specific
* On RP2040 this is a no-op, as the XIP cache is read-only. This is indicated by the
* XIP_CACHE_IS_READ_ONLY macro.
* \endif
*
* \param start_offset The first offset to be invalidated. Offset 0 means the first byte of XIP
* memory (e.g. flash). Pointers must have XIP_BASE subtracted before passing into this function.
* Must be aligned to the start of a cache line (XIP_CACHE_LINE_SIZE).
*
* \param size_bytes The number of bytes to clean. Must be a multiple of XIP_CACHE_LINE_SIZE.
*/
void xip_cache_clean_range(uintptr_t start_offset, uintptr_t size_bytes);
#else
// Stub these out inline to avoid generating a call to an empty function when they are no-ops
static inline void xip_cache_clean_all(void) {}
static inline void xip_cache_clean_range(uintptr_t start_offset, uintptr_t size_bytes) {
(void)start_offset;
(void)size_bytes;
}
#endif
#if !PICO_RP2040
/*! \brief Pin a range of offsets within the XIP address space
* \ingroup hardware_xip_cache
*
* Pinning a line at an address allocates the line exclusively for use at that address. This means
* that all subsequent accesses to that address will hit the cache, and will not go to downstream
* memory. This persists until one of two things happens:
*
* * The line is invalidated, e.g. via xip_cache_invalidate_all()
*
* * The same line is pinned at a different address (note lines are selected by address modulo
* XIP_CACHE_SIZE)
*
* \param start_offset The first offset to be pinnned. Offset 0 means the first byte of XIP
* memory (e.g. flash). Pointers must have XIP_BASE subtracted before passing into this function.
* Must be aligned to the start of a cache line (XIP_CACHE_LINE_SIZE).
*
* \param size_bytes The number of bytes to pin. Must be a multiple of XIP_CACHE_LINE_SIZE.
*
*/
void xip_cache_pin_range(uintptr_t start_offset, uintptr_t size_bytes);
#endif
#ifdef __cplusplus
}
#endif
#endif // !__ASSEMBLER__
#endif // !_HARDWARE_XIP_CACHE_H

View File

@ -0,0 +1,109 @@
#include "hardware/xip_cache.h"
#include "hardware/structs/xip.h"
// For barrier macros:
#include "hardware/sync.h"
// Implementation-private constants (exporting these would create a compatibility headache as they
// don't exist on all platforms; all of these operations are exposed through APIs anyways)
#if !PICO_RP2040
typedef enum {
XIP_CACHE_INVALIDATE_BY_SET_WAY = 0,
XIP_CACHE_CLEAN_BY_SET_WAY = 1,
XIP_CACHE_INVALIDATE_BY_ADDRESS = 2,
XIP_CACHE_CLEAN_BY_ADDRESS = 3,
XIP_CACHE_PIN_AT_ADDRESS = 7,
XIP_CACHE_OP_MAX = 7
} cache_op_t;
#endif
// Used to ensure subsequent accesses observe the new state of the maintained cache lines
#define __post_maintenance_barrier() do {__dsb(); __isb();} while (0)
// All functions in this file are marked non-flash, even though they themselves may be executed
// safely from flash, because they are likely to be called during a flash programming operation
// (which makes flash execution momentarily unsafe)
__always_inline static void check_xip_offset_range(uintptr_t start_offset, uintptr_t size_bytes) {
// We use offsets, not addresses, for consistency with the flash API. This means the range of
// valid inputs starts at 0.
(void)start_offset;
(void)size_bytes;
valid_params_if(HARDWARE_XIP_CACHE, start_offset <= XIP_CACHE_ADDRESS_SPACE_SIZE);
valid_params_if(HARDWARE_XIP_CACHE, start_offset + size_bytes <= XIP_CACHE_ADDRESS_SPACE_SIZE);
// Check for unsigned wrapping too:
valid_params_if(HARDWARE_XIP_CACHE, start_offset + size_bytes >= start_offset);
}
#if !PICO_RP2040
// Generic code for RP2350-style caches: apply a maintenance operation to a range of offsets
static void __no_inline_not_in_flash_func(xip_cache_maintain)(uintptr_t start_offset, uintptr_t size_bytes, cache_op_t op) {
check_xip_offset_range(start_offset, size_bytes);
valid_params_if(HARDWARE_XIP_CACHE, (start_offset & (XIP_CACHE_LINE_SIZE - 1u)) == 0);
valid_params_if(HARDWARE_XIP_CACHE, (size_bytes & (XIP_CACHE_LINE_SIZE - 1u)) == 0);
valid_params_if(HARDWARE_XIP_CACHE, (uint)op <= (uint)XIP_CACHE_OP_MAX);
uintptr_t end = start_offset + size_bytes;
for (uintptr_t offset = start_offset; offset < end; offset += XIP_CACHE_LINE_SIZE) {
*(io_wo_8 *) (XIP_MAINTENANCE_BASE + offset + (uintptr_t)op) = 0;
}
__post_maintenance_barrier();
}
#endif
void __no_inline_not_in_flash_func(xip_cache_invalidate_all)(void) {
#if PICO_RP2040
xip_ctrl_hw->flush = 1;
// Read back to wait for completion
(void)xip_ctrl_hw->flush;
__post_maintenance_barrier();
#else
xip_cache_maintain(XIP_CACHE_ADDRESS_SPACE_SIZE - XIP_CACHE_SIZE, XIP_CACHE_SIZE, XIP_CACHE_INVALIDATE_BY_SET_WAY);
#endif
}
void __no_inline_not_in_flash_func(xip_cache_invalidate_range)(uintptr_t start_offset, uintptr_t size_bytes) {
#if PICO_RP2040
// Accsses are at intervals of one half cache line (so 4 bytes) because RP2040's cache has two
// valid flags per cache line, and we need to clear both.
check_xip_offset_range(start_offset, size_bytes);
valid_params_if(HARDWARE_XIP_CACHE, (start_offset & 3u) == 0);
valid_params_if(HARDWARE_XIP_CACHE, (size_bytes & 3u) == 0);
uintptr_t end = start_offset + size_bytes;
// On RP2040 you can invalidate a sector (half-line) by writing to its normal cached+allocating address
for (uintptr_t offset = start_offset; offset < end; offset += 4u) {
*(io_wo_32 *)(offset + XIP_BASE) = 0;
}
__post_maintenance_barrier();
#else
xip_cache_maintain(start_offset, size_bytes, XIP_CACHE_INVALIDATE_BY_ADDRESS);
#endif
}
#if !XIP_CACHE_IS_READ_ONLY
void __no_inline_not_in_flash_func(xip_cache_clean_all)(void) {
// Use addresses outside of the downstream QMI address range to work around RP2350-E11; this
// effectively performs a clean+invalidate (except being a no-op on pinned lines) due to the
// erroneous update of the tag. Consequently you will take a miss on the next access to the
// cleaned address.
xip_cache_maintain(XIP_END - XIP_BASE - XIP_CACHE_SIZE, XIP_CACHE_SIZE, XIP_CACHE_CLEAN_BY_SET_WAY);
}
#endif
#if !XIP_CACHE_IS_READ_ONLY
void __no_inline_not_in_flash_func(xip_cache_clean_range)(uintptr_t start_offset, uintptr_t size_bytes) {
xip_cache_maintain(start_offset, size_bytes, XIP_CACHE_CLEAN_BY_ADDRESS);
}
#endif
#if !PICO_RP2040
void __no_inline_not_in_flash_func(xip_cache_pin_range)(uintptr_t start_offset, uintptr_t size_bytes) {
valid_params_if(HARDWARE_XIP_CACHE, size_bytes <= XIP_CACHE_SIZE);
xip_cache_maintain(start_offset, size_bytes, XIP_CACHE_PIN_AT_ADDRESS);
}
#endif

View File

@ -60,6 +60,7 @@ cc_library(
"//src/rp2_common/hardware_uart",
"//src/rp2_common/hardware_vreg",
"//src/rp2_common/hardware_watchdog",
"//src/rp2_common/hardware_xip_cache",
"//src/rp2_common/hardware_xosc",
"//src/rp2_common/pico_aon_timer",
"//src/rp2_common/pico_bootrom",

View File

@ -24,6 +24,7 @@ set(KITCHEN_SINK_LIBS
hardware_uart
hardware_vreg
hardware_watchdog
hardware_xip_cache
hardware_xosc
pico_aon_timer
pico_bit_ops