Fix TLUT masking to the lower half of TMEM. (#91)

This commit is contained in:
Darío 2024-11-05 21:40:11 -03:00 committed by GitHub
parent 180f663938
commit c7e270cf15
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 16 additions and 7 deletions

View File

@ -6,7 +6,7 @@
namespace RT64 {
struct TMEMHasher {
static const uint32_t CurrentHashVersion = 2;
static const uint32_t CurrentHashVersion = 3;
static bool needsToHashRowsIndividually(const LoadTile &loadTile, uint32_t width) {
// When using 32-bit formats, TMEM contents are split in half in the lower and upper half, so the size per row is effectively
@ -24,10 +24,18 @@ namespace RT64 {
XXH3_state_t xxh3;
XXH3_64bits_reset(&xxh3);
const bool RGBA32 = (loadTile.siz == G_IM_SIZ_32b) && (loadTile.fmt == G_IM_FMT_RGBA);
const uint32_t tmemSize = RGBA32 ? (TMEMBytes >> 1) : TMEMBytes;
const bool usesTLUT = tlut > 0;
bool halfTMEM = RGBA32;
// Version 3 fixes an error where using TLUT did not mask the address to the lower half of TMEM.
if ((version >= 3) && usesTLUT) {
halfTMEM = true;
}
const uint32_t tmemSize = halfTMEM ? (TMEMBytes >> 1) : TMEMBytes;
const uint32_t drawBytesPerRow = std::max(uint32_t(width) << (RGBA32 ? G_IM_SIZ_16b : loadTile.siz) >> 1U, 1U);
const uint32_t drawBytesTotal = (loadTile.line << 3) * (height - 1) + drawBytesPerRow;
const uint32_t tmemMask = RGBA32 ? TMEMMask16 : TMEMMask8;
const uint32_t tmemMask = halfTMEM ? TMEMMask16 : TMEMMask8;
const uint32_t tmemAddress = (loadTile.tmem << 3) & tmemMask;
auto hashTMEM = [&](uint32_t tmemBaseAddress, uint32_t tmemOrAddress, uint32_t byteCount) {
// Too many bytes to hash in a single step. Wrap around TMEM and hash the rest.
@ -64,7 +72,7 @@ namespace RT64 {
}
// If TLUT is active, we also hash the corresponding palette bytes.
if (tlut > 0) {
if (usesTLUT) {
const bool CI4 = (loadTile.siz == G_IM_SIZ_4b);
const int32_t paletteOffset = CI4 ? (loadTile.palette << 7) : 0;
const int32_t bytesToHash = CI4 ? 0x80 : 0x800;

View File

@ -168,6 +168,7 @@ float4 sampleTMEM(int2 texelInt, uint siz, uint fmt, uint address, uint stride,
const bool oddRow = (texelInt.y & 1);
const bool oddColumn = (texelInt.x & 1);
const bool isRgba32 = and(fmt == G_IM_FMT_RGBA, siz == G_IM_SIZ_32b);
const bool usesTlut = tlut > 0;
// Determine the left shift to use to calculate the TMEM address. Effectively log2 of the pixel stride in half-bytes.
// 4-bit (siz 0) -> 0
// 8-bit (siz 1) -> 1
@ -176,8 +177,8 @@ float4 sampleTMEM(int2 texelInt, uint siz, uint fmt, uint address, uint stride,
// RGBA32 (siz 3) -> 2 (32-bit RGBA textures sample both halves of TMEM, so their pixel stride is only 16 bits).
const uint tmemShift = select_uint(isRgba32, 2, siz);
// Determin the TMEM address mask. Each sample in RGBA32 only addresses half of TMEM.
const uint addressMask = select_uint(isRgba32, RDP_TMEM_MASK16, RDP_TMEM_MASK8);
// Determine the TMEM address mask. When using RGBA32 or TLUT, each sample only addresses half of TMEM.
const uint addressMask = select_uint(or(isRgba32, usesTlut), RDP_TMEM_MASK16, RDP_TMEM_MASK8);
// Load the two low samples for most formats.
const uint pixelAddress = texelInt.y * stride + ((texelInt.x << tmemShift) >> 1);
@ -188,7 +189,7 @@ float4 sampleTMEM(int2 texelInt, uint siz, uint fmt, uint address, uint stride,
const uint pixelShift = select_uint(oddColumn, 0, 4);
const uint pixelValue4bit = (pixelValue0 >> pixelShift) & 0xF;
if (tlut > 0) {
if (usesTlut) {
// Determine the palette index and load the value from the palette.
const uint paletteAddress = select_uint(siz == G_IM_SIZ_4b,
RDP_TMEM_PALETTE + (palette << 7) + ((pixelValue4bit) << 3),