mirror of
https://github.com/rt64/rt64.git
synced 2024-12-26 03:15:44 +00:00
Fix TLUT masking to the lower half of TMEM. (#91)
This commit is contained in:
parent
180f663938
commit
c7e270cf15
@ -6,7 +6,7 @@
|
||||
|
||||
namespace RT64 {
|
||||
struct TMEMHasher {
|
||||
static const uint32_t CurrentHashVersion = 2;
|
||||
static const uint32_t CurrentHashVersion = 3;
|
||||
|
||||
static bool needsToHashRowsIndividually(const LoadTile &loadTile, uint32_t width) {
|
||||
// When using 32-bit formats, TMEM contents are split in half in the lower and upper half, so the size per row is effectively
|
||||
@ -24,10 +24,18 @@ namespace RT64 {
|
||||
XXH3_state_t xxh3;
|
||||
XXH3_64bits_reset(&xxh3);
|
||||
const bool RGBA32 = (loadTile.siz == G_IM_SIZ_32b) && (loadTile.fmt == G_IM_FMT_RGBA);
|
||||
const uint32_t tmemSize = RGBA32 ? (TMEMBytes >> 1) : TMEMBytes;
|
||||
const bool usesTLUT = tlut > 0;
|
||||
bool halfTMEM = RGBA32;
|
||||
|
||||
// Version 3 fixes an error where using TLUT did not mask the address to the lower half of TMEM.
|
||||
if ((version >= 3) && usesTLUT) {
|
||||
halfTMEM = true;
|
||||
}
|
||||
|
||||
const uint32_t tmemSize = halfTMEM ? (TMEMBytes >> 1) : TMEMBytes;
|
||||
const uint32_t drawBytesPerRow = std::max(uint32_t(width) << (RGBA32 ? G_IM_SIZ_16b : loadTile.siz) >> 1U, 1U);
|
||||
const uint32_t drawBytesTotal = (loadTile.line << 3) * (height - 1) + drawBytesPerRow;
|
||||
const uint32_t tmemMask = RGBA32 ? TMEMMask16 : TMEMMask8;
|
||||
const uint32_t tmemMask = halfTMEM ? TMEMMask16 : TMEMMask8;
|
||||
const uint32_t tmemAddress = (loadTile.tmem << 3) & tmemMask;
|
||||
auto hashTMEM = [&](uint32_t tmemBaseAddress, uint32_t tmemOrAddress, uint32_t byteCount) {
|
||||
// Too many bytes to hash in a single step. Wrap around TMEM and hash the rest.
|
||||
@ -64,7 +72,7 @@ namespace RT64 {
|
||||
}
|
||||
|
||||
// If TLUT is active, we also hash the corresponding palette bytes.
|
||||
if (tlut > 0) {
|
||||
if (usesTLUT) {
|
||||
const bool CI4 = (loadTile.siz == G_IM_SIZ_4b);
|
||||
const int32_t paletteOffset = CI4 ? (loadTile.palette << 7) : 0;
|
||||
const int32_t bytesToHash = CI4 ? 0x80 : 0x800;
|
||||
|
@ -168,6 +168,7 @@ float4 sampleTMEM(int2 texelInt, uint siz, uint fmt, uint address, uint stride,
|
||||
const bool oddRow = (texelInt.y & 1);
|
||||
const bool oddColumn = (texelInt.x & 1);
|
||||
const bool isRgba32 = and(fmt == G_IM_FMT_RGBA, siz == G_IM_SIZ_32b);
|
||||
const bool usesTlut = tlut > 0;
|
||||
// Determine the left shift to use to calculate the TMEM address. Effectively log2 of the pixel stride in half-bytes.
|
||||
// 4-bit (siz 0) -> 0
|
||||
// 8-bit (siz 1) -> 1
|
||||
@ -176,8 +177,8 @@ float4 sampleTMEM(int2 texelInt, uint siz, uint fmt, uint address, uint stride,
|
||||
// RGBA32 (siz 3) -> 2 (32-bit RGBA textures sample both halves of TMEM, so their pixel stride is only 16 bits).
|
||||
const uint tmemShift = select_uint(isRgba32, 2, siz);
|
||||
|
||||
// Determin the TMEM address mask. Each sample in RGBA32 only addresses half of TMEM.
|
||||
const uint addressMask = select_uint(isRgba32, RDP_TMEM_MASK16, RDP_TMEM_MASK8);
|
||||
// Determine the TMEM address mask. When using RGBA32 or TLUT, each sample only addresses half of TMEM.
|
||||
const uint addressMask = select_uint(or(isRgba32, usesTlut), RDP_TMEM_MASK16, RDP_TMEM_MASK8);
|
||||
|
||||
// Load the two low samples for most formats.
|
||||
const uint pixelAddress = texelInt.y * stride + ((texelInt.x << tmemShift) >> 1);
|
||||
@ -188,7 +189,7 @@ float4 sampleTMEM(int2 texelInt, uint siz, uint fmt, uint address, uint stride,
|
||||
const uint pixelShift = select_uint(oddColumn, 0, 4);
|
||||
const uint pixelValue4bit = (pixelValue0 >> pixelShift) & 0xF;
|
||||
|
||||
if (tlut > 0) {
|
||||
if (usesTlut) {
|
||||
// Determine the palette index and load the value from the palette.
|
||||
const uint paletteAddress = select_uint(siz == G_IM_SIZ_4b,
|
||||
RDP_TMEM_PALETTE + (palette << 7) + ((pixelValue4bit) << 3),
|
||||
|
Loading…
Reference in New Issue
Block a user