diff --git a/Source/Core/VideoBackends/Software/TextureSampler.cpp b/Source/Core/VideoBackends/Software/TextureSampler.cpp index 9629a7600b..cc90f11ae2 100644 --- a/Source/Core/VideoBackends/Software/TextureSampler.cpp +++ b/Source/Core/VideoBackends/Software/TextureSampler.cpp @@ -106,6 +106,7 @@ void SampleMip(s32 s, s32 t, s32 mip, bool linear, u8 texmap, u8 *sample) TexMode0& tm0 = texUnit.texMode0[subTexmap]; TexImage0& ti0 = texUnit.texImage0[subTexmap]; TexTLUT& texTlut = texUnit.texTlut[subTexmap]; + TlutFormat tlutfmt = (TlutFormat) texTlut.tlut_format; u8 *imageSrc, *imageSrcOdd = nullptr; if (texUnit.texImage1[subTexmap].image_type) @@ -124,6 +125,7 @@ void SampleMip(s32 s, s32 t, s32 mip, bool linear, u8 texmap, u8 *sample) int imageHeight = ti0.height; int tlutAddress = texTlut.tmem_offset << 9; + const u8* tlut = &texMem[tlutAddress]; // reduce sample location and texture size to mip level // move texture pointer to mip location @@ -179,20 +181,18 @@ void SampleMip(s32 s, s32 t, s32 mip, bool linear, u8 texmap, u8 *sample) WrapCoord(imageSPlus1, tm0.wrap_s, imageWidth); WrapCoord(imageTPlus1, tm0.wrap_t, imageHeight); - TlutFormat tlutfmt = (TlutFormat) texTlut.tlut_format; - if (!(ti0.format == GX_TF_RGBA8 && texUnit.texImage1[subTexmap].image_type)) { - TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageT, imageWidth, ti0.format, tlutAddress, tlutfmt); + TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageT, imageWidth, ti0.format, tlut, tlutfmt); SetTexel(sampledTex, texel, (128 - fractS) * (128 - fractT)); - TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageT, imageWidth, ti0.format, tlutAddress, tlutfmt); + TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageT, imageWidth, ti0.format, tlut, tlutfmt); AddTexel(sampledTex, texel, (fractS) * (128 - fractT)); - TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageTPlus1, imageWidth, ti0.format, tlutAddress, tlutfmt); + TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageTPlus1, imageWidth, ti0.format, tlut, tlutfmt); AddTexel(sampledTex, texel, (128 - fractS) * (fractT)); - TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageTPlus1, imageWidth, ti0.format, tlutAddress, tlutfmt); + TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageTPlus1, imageWidth, ti0.format, tlut, tlutfmt); AddTexel(sampledTex, texel, (fractS) * (fractT)); } else @@ -226,7 +226,7 @@ void SampleMip(s32 s, s32 t, s32 mip, bool linear, u8 texmap, u8 *sample) WrapCoord(imageT, tm0.wrap_t, imageHeight); if (!(ti0.format == GX_TF_RGBA8 && texUnit.texImage1[subTexmap].image_type)) - TexDecoder_DecodeTexel(sample, imageSrc, imageS, imageT, imageWidth, ti0.format, tlutAddress, (TlutFormat) texTlut.tlut_format); + TexDecoder_DecodeTexel(sample, imageSrc, imageS, imageT, imageWidth, ti0.format, tlut, tlutfmt); else TexDecoder_DecodeTexelRGBA8FromTmem(sample, imageSrc, imageSrcOdd, imageS, imageT, imageWidth); } diff --git a/Source/Core/VideoCommon/TextureCacheBase.cpp b/Source/Core/VideoCommon/TextureCacheBase.cpp index cbfdb47d62..c0b46d92b6 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.cpp +++ b/Source/Core/VideoCommon/TextureCacheBase.cpp @@ -490,7 +490,8 @@ TextureCache::TCacheEntryBase* TextureCache::Load(unsigned int const stage, { if (!(texformat == GX_TF_RGBA8 && from_tmem)) { - pcfmt = TexDecoder_Decode(temp, src_data, expandedWidth, expandedHeight, texformat, tlutaddr, (TlutFormat) tlutfmt); + const u8* tlut = &texMem[tlutaddr]; + pcfmt = TexDecoder_Decode(temp, src_data, expandedWidth, expandedHeight, texformat, tlut, (TlutFormat) tlutfmt); } else { @@ -566,7 +567,8 @@ TextureCache::TCacheEntryBase* TextureCache::Load(unsigned int const stage, const u8*& mip_src_data = from_tmem ? ((level % 2) ? ptr_odd : ptr_even) : src_data; - TexDecoder_Decode(temp, mip_src_data, expanded_mip_width, expanded_mip_height, texformat, tlutaddr, (TlutFormat) tlutfmt); + const u8* tlut = &texMem[tlutaddr]; + TexDecoder_Decode(temp, mip_src_data, expanded_mip_width, expanded_mip_height, texformat, tlut, (TlutFormat) tlutfmt); mip_src_data += TexDecoder_GetTextureSizeInBytes(expanded_mip_width, expanded_mip_height, texformat); entry->Load(mip_width, mip_height, expanded_mip_width, level); diff --git a/Source/Core/VideoCommon/TextureDecoder.h b/Source/Core/VideoCommon/TextureDecoder.h index 9838881ce6..be566fcf16 100644 --- a/Source/Core/VideoCommon/TextureDecoder.h +++ b/Source/Core/VideoCommon/TextureDecoder.h @@ -78,12 +78,12 @@ enum PC_TexFormat PC_TEX_FMT_DXT1, }; -PC_TexFormat TexDecoder_Decode(u8 *dst, const u8 *src, int width, int height, int texformat, int tlutaddr, TlutFormat tlutfmt); +PC_TexFormat TexDecoder_Decode(u8 *dst, const u8 *src, int width, int height, int texformat, const u8* tlut, TlutFormat tlutfmt); PC_TexFormat TexDecoder_DecodeRGBA8FromTmem(u8* dst, const u8 *src_ar, const u8 *src_gb, int width, int height); -void TexDecoder_DecodeTexel(u8 *dst, const u8 *src, int s, int t, int imageWidth, int texformat, int tlutaddr, TlutFormat tlutfmt); +void TexDecoder_DecodeTexel(u8 *dst, const u8 *src, int s, int t, int imageWidth, int texformat, const u8* tlut, TlutFormat tlutfmt); void TexDecoder_DecodeTexelRGBA8FromTmem(u8 *dst, const u8 *src_ar, const u8* src_gb, int s, int t, int imageWidth); void TexDecoder_SetTexFmtOverlayOptions(bool enable, bool center); /* Internal method, implemented by TextureDecoder_Generic and TextureDecoder_x64. */ -PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int height, int texformat, int tlutaddr, TlutFormat tlutfmt); +PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int height, int texformat, const u8* tlut, TlutFormat tlutfmt); diff --git a/Source/Core/VideoCommon/TextureDecoder_Common.cpp b/Source/Core/VideoCommon/TextureDecoder_Common.cpp index 4cb8167104..cb71c3613f 100644 --- a/Source/Core/VideoCommon/TextureDecoder_Common.cpp +++ b/Source/Core/VideoCommon/TextureDecoder_Common.cpp @@ -242,9 +242,9 @@ static void TexDecoder_DrawOverlay(u8 *dst, int width, int height, int texformat } } -PC_TexFormat TexDecoder_Decode(u8 *dst, const u8 *src, int width, int height, int texformat, int tlutaddr, TlutFormat tlutfmt) +PC_TexFormat TexDecoder_Decode(u8 *dst, const u8 *src, int width, int height, int texformat, const u8* tlut, TlutFormat tlutfmt) { - PC_TexFormat pc_texformat = _TexDecoder_DecodeImpl((u32*)dst, src, width, height, texformat, tlutaddr, tlutfmt); + PC_TexFormat pc_texformat = _TexDecoder_DecodeImpl((u32*)dst, src, width, height, texformat, tlut, tlutfmt); if (TexFmt_Overlay_Enable && pc_texformat != PC_TEX_FMT_NONE) TexDecoder_DrawOverlay(dst, width, height, texformat, pc_texformat); @@ -301,7 +301,7 @@ struct DXTBlock u8 lines[4]; }; -void TexDecoder_DecodeTexel(u8 *dst, const u8 *src, int s, int t, int imageWidth, int texformat, int tlutaddr, TlutFormat tlutfmt) +void TexDecoder_DecodeTexel(u8 *dst, const u8 *src, int s, int t, int imageWidth, int texformat, const u8* tlut_, TlutFormat tlutfmt) { /* General formula for computing texture offset // @@ -330,7 +330,7 @@ void TexDecoder_DecodeTexel(u8 *dst, const u8 *src, int s, int t, int imageWidth u32 offset = base + (blkOff >> 1); u8 val = (*(src + offset) >> rs) & 0xF; - u16 *tlut = (u16*)(texMem + tlutaddr); + u16 *tlut = (u16*) tlut_; switch (tlutfmt) { @@ -395,7 +395,7 @@ void TexDecoder_DecodeTexel(u8 *dst, const u8 *src, int s, int t, int imageWidth u32 blkOff = (blkT << 3) + blkS; u8 val = *(src + base + blkOff); - u16 *tlut = (u16*)(texMem + tlutaddr); + u16 *tlut = (u16*) tlut_; switch (tlutfmt) { @@ -460,7 +460,7 @@ void TexDecoder_DecodeTexel(u8 *dst, const u8 *src, int s, int t, int imageWidth const u16* valAddr = (u16*)(src + offset); u16 val = Common::swap16(*valAddr) & 0x3FFF; - u16 *tlut = (u16*)(texMem + tlutaddr); + u16 *tlut = (u16*) tlut_; switch (tlutfmt) { diff --git a/Source/Core/VideoCommon/TextureDecoder_Generic.cpp b/Source/Core/VideoCommon/TextureDecoder_Generic.cpp index 3dff970afa..41144b79b5 100644 --- a/Source/Core/VideoCommon/TextureDecoder_Generic.cpp +++ b/Source/Core/VideoCommon/TextureDecoder_Generic.cpp @@ -60,9 +60,9 @@ struct DXTBlock u8 lines[4]; }; -static inline void decodebytesC4_5A3_To_rgba32(u32 *dst, const u8 *src, int tlutaddr) +static inline void decodebytesC4_5A3_To_rgba32(u32 *dst, const u8 *src, const u8* tlut_) { - u16 *tlut = (u16*)(texMem + tlutaddr); + const u16* tlut = (u16*) tlut_; for (int x = 0; x < 4; x++) { u8 val = src[x]; @@ -71,9 +71,9 @@ static inline void decodebytesC4_5A3_To_rgba32(u32 *dst, const u8 *src, int tlut } } -static inline void decodebytesC4IA8_To_RGBA(u32* dst, const u8* src, int tlutaddr) +static inline void decodebytesC4IA8_To_RGBA(u32* dst, const u8* src, const u8* tlut_) { - u16* tlut = (u16*)(texMem+tlutaddr); + const u16* tlut = (u16*) tlut_; for (int x = 0; x < 4; x++) { u8 val = src[x]; @@ -82,9 +82,9 @@ static inline void decodebytesC4IA8_To_RGBA(u32* dst, const u8* src, int tlutadd } } -static inline void decodebytesC4RGB565_To_RGBA(u32* dst, const u8* src, int tlutaddr) +static inline void decodebytesC4RGB565_To_RGBA(u32* dst, const u8* src, const u8* tlut_) { - u16* tlut = (u16*)(texMem+tlutaddr); + const u16* tlut = (u16*) tlut_; for (int x = 0; x < 4; x++) { u8 val = src[x]; @@ -93,9 +93,9 @@ static inline void decodebytesC4RGB565_To_RGBA(u32* dst, const u8* src, int tlut } } -static inline void decodebytesC8_5A3_To_RGBA32(u32 *dst, const u8 *src, int tlutaddr) +static inline void decodebytesC8_5A3_To_RGBA32(u32 *dst, const u8 *src, const u8* tlut_) { - u16 *tlut = (u16*)(texMem + tlutaddr); + const u16* tlut = (u16*) tlut_; for (int x = 0; x < 8; x++) { u8 val = src[x]; @@ -103,27 +103,27 @@ static inline void decodebytesC8_5A3_To_RGBA32(u32 *dst, const u8 *src, int tlut } } -static inline void decodebytesC8IA8_To_RGBA(u32* dst, const u8* src, int tlutaddr) +static inline void decodebytesC8IA8_To_RGBA(u32* dst, const u8* src, const u8* tlut_) { - u16* tlut = (u16*)(texMem + tlutaddr); + const u16* tlut = (u16*) tlut_; for (int x = 0; x < 8; x++) { *dst++ = decodeIA8Swapped(tlut[src[x]]); } } -static inline void decodebytesC8RGB565_To_RGBA(u32* dst, const u8* src, int tlutaddr) +static inline void decodebytesC8RGB565_To_RGBA(u32* dst, const u8* src, const u8* tlut_) { - u16* tlut = (u16*)(texMem + tlutaddr); + const u16* tlut = (u16*) tlut_; for (int x = 0; x < 8; x++) { *dst++ = decode565RGBA(Common::swap16(tlut[src[x]])); } } -static inline void decodebytesC14X2_5A3_To_RGBA(u32 *dst, const u16 *src, int tlutaddr) +static inline void decodebytesC14X2_5A3_To_RGBA(u32 *dst, const u16 *src, const u8* tlut_) { - u16 *tlut = (u16*)(texMem + tlutaddr); + const u16* tlut = (u16*) tlut_; for (int x = 0; x < 4; x++) { u16 val = Common::swap16(src[x]); @@ -131,9 +131,9 @@ static inline void decodebytesC14X2_5A3_To_RGBA(u32 *dst, const u16 *src, int tl } } -static inline void decodebytesC14X2IA8_To_RGBA(u32* dst, const u16* src, int tlutaddr) +static inline void decodebytesC14X2IA8_To_RGBA(u32* dst, const u16* src, const u8* tlut_) { - u16* tlut = (u16*)(texMem + tlutaddr); + const u16* tlut = (u16*) tlut_; for (int x = 0; x < 4; x++) { u16 val = Common::swap16(src[x]); @@ -141,9 +141,9 @@ static inline void decodebytesC14X2IA8_To_RGBA(u32* dst, const u16* src, int tlu } } -static inline void decodebytesC14X2rgb565_To_RGBA(u32* dst, const u16* src, int tlutaddr) +static inline void decodebytesC14X2rgb565_To_RGBA(u32* dst, const u16* src, const u8* tlut_) { - u16* tlut = (u16*)(texMem + tlutaddr); + const u16* tlut = (u16*) tlut_; for (int x = 0; x < 4; x++) { u16 val = Common::swap16(src[x]); @@ -246,7 +246,7 @@ static void decodeDXTBlockRGBA(u32 *dst, const DXTBlock *src, int pitch) // TODO: complete SSE2 optimization of less often used texture formats. // TODO: refactor algorithms using _mm_loadl_epi64 unaligned loads to prefer 128-bit aligned loads. -PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int height, int texformat, int tlutaddr, TlutFormat tlutfmt) +PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int height, int texformat, const u8* tlut, TlutFormat tlutfmt) { const int Wsteps4 = (width + 3) / 4; @@ -261,21 +261,21 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he for (int y = 0; y < height; y += 8) for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8,yStep++) for (int iy = 0, xStep = 8 * yStep; iy < 8; iy++,xStep++) - decodebytesC4_5A3_To_rgba32(dst + (y + iy) * width + x, src + 4 * xStep, tlutaddr); + decodebytesC4_5A3_To_rgba32(dst + (y + iy) * width + x, src + 4 * xStep, tlut); } else if (tlutfmt == GX_TL_IA8) { for (int y = 0; y < height; y += 8) for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8,yStep++) for (int iy = 0, xStep = 8 * yStep; iy < 8; iy++,xStep++) - decodebytesC4IA8_To_RGBA(dst + (y + iy) * width + x, src + 4 * xStep, tlutaddr); + decodebytesC4IA8_To_RGBA(dst + (y + iy) * width + x, src + 4 * xStep, tlut); } else if (tlutfmt == GX_TL_RGB565) { for (int y = 0; y < height; y += 8) for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8,yStep++) for (int iy = 0, xStep = 8 * yStep; iy < 8; iy++,xStep++) - decodebytesC4RGB565_To_RGBA(dst + (y + iy) * width + x, src + 4 * xStep, tlutaddr); + decodebytesC4RGB565_To_RGBA(dst + (y + iy) * width + x, src + 4 * xStep, tlut); } break; case GX_TF_I4: @@ -323,21 +323,21 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he for (int y = 0; y < height; y += 4) for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++) for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++) - decodebytesC8_5A3_To_RGBA32((u32*)dst + (y + iy) * width + x, src + 8 * xStep, tlutaddr); + decodebytesC8_5A3_To_RGBA32((u32*)dst + (y + iy) * width + x, src + 8 * xStep, tlut); } else if (tlutfmt == GX_TL_IA8) { for (int y = 0; y < height; y += 4) for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++) for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++) - decodebytesC8IA8_To_RGBA(dst + (y + iy) * width + x, src + 8 * xStep, tlutaddr); + decodebytesC8IA8_To_RGBA(dst + (y + iy) * width + x, src + 8 * xStep, tlut); } else if (tlutfmt == GX_TL_RGB565) { for (int y = 0; y < height; y += 4) for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++) for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++) - decodebytesC8RGB565_To_RGBA(dst + (y + iy) * width + x, src + 8 * xStep, tlutaddr); + decodebytesC8RGB565_To_RGBA(dst + (y + iy) * width + x, src + 8 * xStep, tlut); } break; @@ -372,21 +372,21 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he for (int y = 0; y < height; y += 4) for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++) for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++) - decodebytesC14X2_5A3_To_RGBA(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlutaddr); + decodebytesC14X2_5A3_To_RGBA(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlut); } else if (tlutfmt == GX_TL_IA8) { for (int y = 0; y < height; y += 4) for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++) for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++) - decodebytesC14X2IA8_To_RGBA(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlutaddr); + decodebytesC14X2IA8_To_RGBA(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlut); } else if (tlutfmt == GX_TL_RGB565) { for (int y = 0; y < height; y += 4) for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++) for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++) - decodebytesC14X2rgb565_To_RGBA(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlutaddr); + decodebytesC14X2rgb565_To_RGBA(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlut); } break; case GX_TF_RGB565: diff --git a/Source/Core/VideoCommon/TextureDecoder_x64.cpp b/Source/Core/VideoCommon/TextureDecoder_x64.cpp index 867566032b..9603ec1695 100644 --- a/Source/Core/VideoCommon/TextureDecoder_x64.cpp +++ b/Source/Core/VideoCommon/TextureDecoder_x64.cpp @@ -83,9 +83,9 @@ struct DXTBlock u8 lines[4]; }; -static inline void decodebytesC4_5A3_To_rgba32(u32 *dst, const u8 *src, int tlutaddr) +static inline void decodebytesC4_5A3_To_rgba32(u32 *dst, const u8 *src, const u8* tlut_) { - u16 *tlut = (u16*)(texMem + tlutaddr); + const u16* tlut = (u16*) tlut_; for (int x = 0; x < 4; x++) { u8 val = src[x]; @@ -94,9 +94,9 @@ static inline void decodebytesC4_5A3_To_rgba32(u32 *dst, const u8 *src, int tlut } } -static inline void decodebytesC4IA8_To_RGBA(u32* dst, const u8* src, int tlutaddr) +static inline void decodebytesC4IA8_To_RGBA(u32* dst, const u8* src, const u8* tlut_) { - u16* tlut = (u16*)(texMem+tlutaddr); + const u16* tlut = (u16*) tlut_; for (int x = 0; x < 4; x++) { u8 val = src[x]; @@ -105,9 +105,9 @@ static inline void decodebytesC4IA8_To_RGBA(u32* dst, const u8* src, int tlutadd } } -static inline void decodebytesC4RGB565_To_RGBA(u32* dst, const u8* src, int tlutaddr) +static inline void decodebytesC4RGB565_To_RGBA(u32* dst, const u8* src, const u8* tlut_) { - u16* tlut = (u16*)(texMem+tlutaddr); + const u16* tlut = (u16*) tlut_; for (int x = 0; x < 4; x++) { u8 val = src[x]; @@ -116,9 +116,9 @@ static inline void decodebytesC4RGB565_To_RGBA(u32* dst, const u8* src, int tlut } } -static inline void decodebytesC8_5A3_To_RGBA32(u32 *dst, const u8 *src, int tlutaddr) +static inline void decodebytesC8_5A3_To_RGBA32(u32 *dst, const u8 *src, const u8* tlut_) { - u16 *tlut = (u16*)(texMem + tlutaddr); + const u16* tlut = (u16*) tlut_; for (int x = 0; x < 8; x++) { u8 val = src[x]; @@ -126,27 +126,27 @@ static inline void decodebytesC8_5A3_To_RGBA32(u32 *dst, const u8 *src, int tlut } } -static inline void decodebytesC8IA8_To_RGBA(u32* dst, const u8* src, int tlutaddr) +static inline void decodebytesC8IA8_To_RGBA(u32* dst, const u8* src, const u8* tlut_) { - u16* tlut = (u16*)(texMem + tlutaddr); + const u16* tlut = (u16*) tlut_; for (int x = 0; x < 8; x++) { *dst++ = decodeIA8Swapped(tlut[src[x]]); } } -static inline void decodebytesC8RGB565_To_RGBA(u32* dst, const u8* src, int tlutaddr) +static inline void decodebytesC8RGB565_To_RGBA(u32* dst, const u8* src, const u8* tlut_) { - u16* tlut = (u16*)(texMem + tlutaddr); + const u16* tlut = (u16*) tlut_; for (int x = 0; x < 8; x++) { *dst++ = decode565RGBA(Common::swap16(tlut[src[x]])); } } -static inline void decodebytesC14X2_5A3_To_RGBA(u32 *dst, const u16 *src, int tlutaddr) +static inline void decodebytesC14X2_5A3_To_RGBA(u32 *dst, const u16 *src, const u8* tlut_) { - u16 *tlut = (u16*)(texMem + tlutaddr); + const u16* tlut = (u16*) tlut_; for (int x = 0; x < 4; x++) { u16 val = Common::swap16(src[x]); @@ -154,9 +154,9 @@ static inline void decodebytesC14X2_5A3_To_RGBA(u32 *dst, const u16 *src, int tl } } -static inline void decodebytesC14X2IA8_To_RGBA(u32* dst, const u16* src, int tlutaddr) +static inline void decodebytesC14X2IA8_To_RGBA(u32* dst, const u16* src, const u8* tlut_) { - u16* tlut = (u16*)(texMem + tlutaddr); + const u16* tlut = (u16*) tlut_; for (int x = 0; x < 4; x++) { u16 val = Common::swap16(src[x]); @@ -164,9 +164,9 @@ static inline void decodebytesC14X2IA8_To_RGBA(u32* dst, const u16* src, int tlu } } -static inline void decodebytesC14X2rgb565_To_RGBA(u32* dst, const u16* src, int tlutaddr) +static inline void decodebytesC14X2rgb565_To_RGBA(u32* dst, const u16* src, const u8* tlut_) { - u16* tlut = (u16*)(texMem + tlutaddr); + const u16* tlut = (u16*) tlut_; for (int x = 0; x < 4; x++) { u16 val = Common::swap16(src[x]); @@ -287,7 +287,7 @@ static inline void SetOpenMPThreadCount(int width, int height) // TODO: complete SSE2 optimization of less often used texture formats. // TODO: refactor algorithms using _mm_loadl_epi64 unaligned loads to prefer 128-bit aligned loads. -PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int height, int texformat, int tlutaddr, TlutFormat tlutfmt) +PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int height, int texformat, const u8* tlut, TlutFormat tlutfmt) { SetOpenMPThreadCount(width, height); @@ -304,7 +304,7 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he for (int y = 0; y < height; y += 8) for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8,yStep++) for (int iy = 0, xStep = 8 * yStep; iy < 8; iy++,xStep++) - decodebytesC4_5A3_To_rgba32(dst + (y + iy) * width + x, src + 4 * xStep, tlutaddr); + decodebytesC4_5A3_To_rgba32(dst + (y + iy) * width + x, src + 4 * xStep, tlut); } else if (tlutfmt == GX_TL_IA8) { @@ -312,7 +312,7 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he for (int y = 0; y < height; y += 8) for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8,yStep++) for (int iy = 0, xStep = 8 * yStep; iy < 8; iy++,xStep++) - decodebytesC4IA8_To_RGBA(dst + (y + iy) * width + x, src + 4 * xStep, tlutaddr); + decodebytesC4IA8_To_RGBA(dst + (y + iy) * width + x, src + 4 * xStep, tlut); } else if (tlutfmt == GX_TL_RGB565) @@ -321,7 +321,7 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he for (int y = 0; y < height; y += 8) for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8,yStep++) for (int iy = 0, xStep = 8 * yStep; iy < 8; iy++,xStep++) - decodebytesC4RGB565_To_RGBA(dst + (y + iy) * width + x, src + 4 * xStep, tlutaddr); + decodebytesC4RGB565_To_RGBA(dst + (y + iy) * width + x, src + 4 * xStep, tlut); } break; case GX_TF_I4: @@ -562,7 +562,7 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he for (int y = 0; y < height; y += 4) for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++) for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++) - decodebytesC8_5A3_To_RGBA32((u32*)dst + (y + iy) * width + x, src + 8 * xStep, tlutaddr); + decodebytesC8_5A3_To_RGBA32((u32*)dst + (y + iy) * width + x, src + 8 * xStep, tlut); } else if (tlutfmt == GX_TL_IA8) { @@ -570,7 +570,7 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he for (int y = 0; y < height; y += 4) for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++) for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++) - decodebytesC8IA8_To_RGBA(dst + (y + iy) * width + x, src + 8 * xStep, tlutaddr); + decodebytesC8IA8_To_RGBA(dst + (y + iy) * width + x, src + 8 * xStep, tlut); } else if (tlutfmt == GX_TL_RGB565) @@ -579,7 +579,7 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he for (int y = 0; y < height; y += 4) for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++) for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++) - decodebytesC8RGB565_To_RGBA(dst + (y + iy) * width + x, src + 8 * xStep, tlutaddr); + decodebytesC8RGB565_To_RGBA(dst + (y + iy) * width + x, src + 8 * xStep, tlut); } break; @@ -675,7 +675,7 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he for (int y = 0; y < height; y += 4) for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++) for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++) - decodebytesC14X2_5A3_To_RGBA(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlutaddr); + decodebytesC14X2_5A3_To_RGBA(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlut); } else if (tlutfmt == GX_TL_IA8) { @@ -683,7 +683,7 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he for (int y = 0; y < height; y += 4) for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++) for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++) - decodebytesC14X2IA8_To_RGBA(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlutaddr); + decodebytesC14X2IA8_To_RGBA(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlut); } else if (tlutfmt == GX_TL_RGB565) { @@ -691,7 +691,7 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he for (int y = 0; y < height; y += 4) for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++) for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++) - decodebytesC14X2rgb565_To_RGBA(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlutaddr); + decodebytesC14X2rgb565_To_RGBA(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlut); } break; case GX_TF_RGB565: