diff --git a/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp b/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp index cdb7f75102..a5fa6de7a5 100644 --- a/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp +++ b/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp @@ -87,55 +87,21 @@ void PSTextureEncoder::Shutdown() SAFE_RELEASE(m_out); } -size_t PSTextureEncoder::Encode(u8* dst, unsigned int dstFormat, +void PSTextureEncoder::Encode(u8* dst, const TextureCache::TCacheEntryBase *texture_entry, PEControl::PixelFormat srcFormat, const EFBRectangle& srcRect, bool isIntensity, bool scaleByHalf) { if (!m_ready) // Make sure we initialized OK - return 0; - - // Clamp srcRect to 640x528. BPS: The Strike tries to encode an 800x600 - // texture, which is invalid. - EFBRectangle correctSrc = srcRect; - correctSrc.ClampUL(0, 0, EFB_WIDTH, EFB_HEIGHT); - - // Validate source rect size - if (correctSrc.GetWidth() <= 0 || correctSrc.GetHeight() <= 0) - return 0; + return; HRESULT hr; - unsigned int blockW = BLOCK_WIDTHS[dstFormat]; - unsigned int blockH = BLOCK_HEIGHTS[dstFormat]; - - // Round up source dims to multiple of block size - unsigned int actualWidth = correctSrc.GetWidth() / (scaleByHalf ? 2 : 1); - actualWidth = (actualWidth + blockW-1) & ~(blockW-1); - unsigned int actualHeight = correctSrc.GetHeight() / (scaleByHalf ? 2 : 1); - actualHeight = (actualHeight + blockH-1) & ~(blockH-1); - - unsigned int numBlocksX = actualWidth/blockW; - unsigned int numBlocksY = actualHeight/blockH; - - unsigned int cacheLinesPerRow; - if (dstFormat == 0x6) // RGBA takes two cache lines per block; all others take one - cacheLinesPerRow = numBlocksX*2; - else - cacheLinesPerRow = numBlocksX; - _assert_msg_(VIDEO, cacheLinesPerRow*32 <= MAX_BYTES_PER_BLOCK_ROW, "cache lines per row sanity check"); - - unsigned int totalCacheLines = cacheLinesPerRow * numBlocksY; - _assert_msg_(VIDEO, totalCacheLines*32 <= MAX_BYTES_PER_ENCODE, "total encode size sanity check"); - - size_t encodeSize = 0; - // Reset API g_renderer->ResetAPIState(); // Set up all the state for EFB encoding - { - D3D11_VIEWPORT vp = CD3D11_VIEWPORT(0.f, 0.f, FLOAT(cacheLinesPerRow * 8), FLOAT(numBlocksY)); + D3D11_VIEWPORT vp = CD3D11_VIEWPORT(0.f, 0.f, FLOAT(texture_entry->CacheLinesPerRow() * 8), FLOAT(texture_entry->NumBlocksY())); D3D::context->RSSetViewports(1, &vp); EFBRectangle fullSrcRect; @@ -155,9 +121,9 @@ size_t PSTextureEncoder::Encode(u8* dst, unsigned int dstFormat, FramebufferManager::GetResolvedEFBColorTexture()->GetSRV(); EFBEncodeParams params; - params.SrcLeft = correctSrc.left; - params.SrcTop = correctSrc.top; - params.DestWidth = actualWidth; + params.SrcLeft = srcRect.left; + params.SrcTop = srcRect.top; + params.DestWidth = texture_entry->native_width; params.ScaleFactor = scaleByHalf ? 2 : 1; D3D::context->UpdateSubresource(m_encodeParams, 0, nullptr, ¶ms, 0, 0); D3D::stateman->SetPixelConstants(m_encodeParams); @@ -172,12 +138,12 @@ size_t PSTextureEncoder::Encode(u8* dst, unsigned int dstFormat, targetRect.AsRECT(), Renderer::GetTargetWidth(), Renderer::GetTargetHeight(), - SetStaticShader(dstFormat, srcFormat, isIntensity, scaleByHalf), + SetStaticShader(texture_entry->format, srcFormat, isIntensity, scaleByHalf), VertexShaderCache::GetSimpleVertexShader(), VertexShaderCache::GetSimpleInputLayout()); // Copy to staging buffer - D3D11_BOX srcBox = CD3D11_BOX(0, 0, 0, cacheLinesPerRow * 8, numBlocksY, 1); + D3D11_BOX srcBox = CD3D11_BOX(0, 0, 0, texture_entry->CacheLinesPerRow() * 8, texture_entry->NumBlocksY(), 1); D3D::context->CopySubresourceRegion(m_outStage, 0, 0, 0, 0, m_out, 0, &srcBox); // Transfer staging buffer to GameCube/Wii RAM @@ -186,16 +152,14 @@ size_t PSTextureEncoder::Encode(u8* dst, unsigned int dstFormat, CHECK(SUCCEEDED(hr), "map staging buffer (0x%x)", hr); u8* src = (u8*)map.pData; - for (unsigned int y = 0; y < numBlocksY; ++y) + for (unsigned int y = 0; y < texture_entry->NumBlocksY(); ++y) { - memcpy(dst, src, cacheLinesPerRow*32); - dst += bpmem.copyMipMapStrideChannels*32; + memcpy(dst, src, texture_entry->CacheLinesPerRow() * 32); + dst += texture_entry->memory_stride; src += map.RowPitch; } D3D::context->Unmap(m_outStage, 0); - - encodeSize = bpmem.copyMipMapStrideChannels*32 * numBlocksY; } // Restore API @@ -203,8 +167,6 @@ size_t PSTextureEncoder::Encode(u8* dst, unsigned int dstFormat, D3D::context->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV(), FramebufferManager::GetEFBDepthTexture()->GetDSV()); - - return encodeSize; } ID3D11PixelShader* PSTextureEncoder::SetStaticShader(unsigned int dstFormat, PEControl::PixelFormat srcFormat, diff --git a/Source/Core/VideoBackends/D3D/PSTextureEncoder.h b/Source/Core/VideoBackends/D3D/PSTextureEncoder.h index 03a501d61a..2857e9de7e 100644 --- a/Source/Core/VideoBackends/D3D/PSTextureEncoder.h +++ b/Source/Core/VideoBackends/D3D/PSTextureEncoder.h @@ -6,6 +6,8 @@ #include "VideoBackends/D3D/TextureEncoder.h" +#include "VideoCommon/TextureCacheBase.h" + struct ID3D11Texture2D; struct ID3D11RenderTargetView; struct ID3D11Buffer; @@ -29,7 +31,7 @@ public: void Init(); void Shutdown(); - size_t Encode(u8* dst, unsigned int dstFormat, + void Encode(u8* dst, const TextureCache::TCacheEntryBase *texture_entry, PEControl::PixelFormat srcFormat, const EFBRectangle& srcRect, bool isIntensity, bool scaleByHalf); diff --git a/Source/Core/VideoBackends/D3D/TextureCache.cpp b/Source/Core/VideoBackends/D3D/TextureCache.cpp index de204fa2af..603084dd33 100644 --- a/Source/Core/VideoBackends/D3D/TextureCache.cpp +++ b/Source/Core/VideoBackends/D3D/TextureCache.cpp @@ -2,7 +2,6 @@ // Licensed under GPLv2+ // Refer to the license.txt file included. -#include "Core/HW/Memmap.h" #include "VideoBackends/D3D/D3DBase.h" #include "VideoBackends/D3D/D3DShader.h" #include "VideoBackends/D3D/D3DState.h" @@ -185,7 +184,7 @@ TextureCache::TCacheEntryBase* TextureCache::CreateTexture(const TCacheEntryConf } } -void TextureCache::TCacheEntry::FromRenderTarget(u32 dstAddr, unsigned int dstFormat, +void TextureCache::TCacheEntry::FromRenderTarget(u8* dst, unsigned int dstFormat, u32 dstStride, PEControl::PixelFormat srcFormat, const EFBRectangle& srcRect, bool isIntensity, bool scaleByHalf, unsigned int cbufid, const float *colmat) @@ -226,10 +225,13 @@ void TextureCache::TCacheEntry::FromRenderTarget(u32 dstAddr, unsigned int dstFo // Create texture copy D3D::drawShadedTexQuad( - (srcFormat == PEControl::Z24) ? FramebufferManager::GetEFBDepthTexture()->GetSRV() : FramebufferManager::GetEFBColorTexture()->GetSRV(), - &sourcerect, Renderer::GetTargetWidth(), Renderer::GetTargetHeight(), - (srcFormat == PEControl::Z24) ? PixelShaderCache::GetDepthMatrixProgram(true) : PixelShaderCache::GetColorMatrixProgram(true), - VertexShaderCache::GetSimpleVertexShader(), VertexShaderCache::GetSimpleInputLayout(), GeometryShaderCache::GetCopyGeometryShader()); + (srcFormat == PEControl::Z24 ? FramebufferManager::GetEFBDepthTexture() : FramebufferManager::GetEFBColorTexture())->GetSRV(), + &sourcerect, Renderer::GetTargetWidth(), + Renderer::GetTargetHeight(), + srcFormat == PEControl::Z24 ? PixelShaderCache::GetDepthMatrixProgram(true) : PixelShaderCache::GetColorMatrixProgram(true), + VertexShaderCache::GetSimpleVertexShader(), + VertexShaderCache::GetSimpleInputLayout(), + GeometryShaderCache::GetCopyGeometryShader()); D3D::context->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV(), FramebufferManager::GetEFBDepthTexture()->GetDSV()); @@ -237,14 +239,7 @@ void TextureCache::TCacheEntry::FromRenderTarget(u32 dstAddr, unsigned int dstFo if (!g_ActiveConfig.bSkipEFBCopyToRam) { - u8* dst = Memory::GetPointer(dstAddr); - size_t encoded_size = g_encoder->Encode(dst, dstFormat, srcFormat, srcRect, isIntensity, scaleByHalf); - - size_in_bytes = (u32)encoded_size; - - TextureCache::MakeRangeDynamic(dstAddr, (u32)encoded_size); - - this->hash = GetHash64(dst, (int)encoded_size, g_ActiveConfig.iSafeTextureCache_ColorSamples); + g_encoder->Encode(dst, this, srcFormat, srcRect, isIntensity, scaleByHalf); } } @@ -342,7 +337,7 @@ void TextureCache::ConvertTexture(TCacheEntryBase* entry, TCacheEntryBase* uncon D3D::stateman->SetTexture(1, palette_buf_srv); // TODO: Add support for C14X2 format. (Different multiplier, more palette entries.) - float params[4] = { unconverted->format == 0 ? 15.f : 255.f }; + float params[4] = { (unconverted->format & 0xf) == 0 ? 15.f : 255.f }; D3D::context->UpdateSubresource(palette_uniform, 0, nullptr, ¶ms, 0, 0); D3D::stateman->SetPixelConstants(palette_uniform); diff --git a/Source/Core/VideoBackends/D3D/TextureCache.h b/Source/Core/VideoBackends/D3D/TextureCache.h index 608fa47044..4fed08ff65 100644 --- a/Source/Core/VideoBackends/D3D/TextureCache.h +++ b/Source/Core/VideoBackends/D3D/TextureCache.h @@ -34,7 +34,7 @@ private: void Load(unsigned int width, unsigned int height, unsigned int expanded_width, unsigned int levels) override; - void FromRenderTarget(u32 dstAddr, unsigned int dstFormat, + void FromRenderTarget(u8* dst, unsigned int dstFormat, u32 dstStride, PEControl::PixelFormat srcFormat, const EFBRectangle& srcRect, bool isIntensity, bool scaleByHalf, unsigned int cbufid, const float *colmat) override; diff --git a/Source/Core/VideoBackends/D3D/TextureEncoder.h b/Source/Core/VideoBackends/D3D/TextureEncoder.h index 7c298f3f81..445fffd218 100644 --- a/Source/Core/VideoBackends/D3D/TextureEncoder.h +++ b/Source/Core/VideoBackends/D3D/TextureEncoder.h @@ -5,96 +5,11 @@ #pragma once #include "VideoCommon/BPMemory.h" +#include "VideoCommon/TextureCacheBase.h" #include "VideoCommon/VideoCommon.h" namespace DX11 { - -// 4-bit format: 8x8 texels / cache line -// 8-bit format: 8x4 texels / cache line -// 16-bit format: 4x4 texels / cache line -// 32-bit format: 4x4 texels / 2 cache lines -// Compressed format: 8x8 texels / cache line - -// Document EFB encoding formats here with examples of where they are used. - -// Format: 0 - R4 -// Used in The Legend of Zelda: The Wind Waker for character shadows (srcFormat 1, -// isIntensity 1, scaleByHalf 1). - -// Format: 1 - R8 -// FIXME: Unseen. May or may not be a duplicate of format 8. - -// Format: 2 - A4 R4 -// FIXME: Unseen. - -// Format: 3 - A8 R8 -// FIXME: Unseen. - -// Format: 4 - R5 G6 B5 -// Used in Wind Waker for most render-to-texture effects like heat shimmer and -// depth-of-field. - -// Format: 5 - 1 R5 G5 B5 or 0 A3 R4 G4 B4 -// Used in Twilight Princess for character shadows. - -// Format: 6 - A8 R8 A8 R8 | G8 B8 G8 B8 -// Used in Twilight Princess for bloom effect. - -// Format: 7 - A8 -// Used in Metroid Prime 2 for the scan visor. - -// Format: 8 - R8 -// Used in Twilight Princess for the map. - -// Format: 9 - G8 -// FIXME: Unseen. - -// Format: A - B8 -// Used in Metroid Prime 2 for the scan visor. - -// Format: B - G8 R8 -// Used in Wind Waker for depth-of-field. Usually used with srcFormat 3 to -// render depth textures. The bytes are swapped, so games have to correct it -// in RAM before using it as a texture. - -// Format: C - B8 G8 -// FIXME: Unseen. - -const unsigned int BLOCK_WIDTHS[16] = { - 8, // R4 - 8, // R8 (FIXME: duplicate of R8 below?) - 8, // A4 R4 - 4, // A8 R8 - 4, // R5 G6 B5 - 4, // 1 R5 G5 B5 or 0 A3 R4 G4 B4 - 4, // A8 R8 A8 R8 | G8 B8 G8 B8 (two cache lines) - 8, // A8 - 8, // R8 (FIXME: duplicate of R8 above?) - 8, // G8 - 8, // B8 - 4, // G8 R8 - 4, // B8 G8 - 0, 0, 0 // Unknown formats -}; - -const unsigned int BLOCK_HEIGHTS[16] = { - 8, // R4 - 4, // R8 (FIXME: duplicate of R8 below?) - 4, // A4 R4 - 4, // A8 R8 - 4, // R5 G6 B5 - 4, // 1 R5 G5 B5 or 0 A3 R4 G4 B4 - 4, // A8 R8 A8 R8 | G8 B8 G8 B8 (two cache lines) - 4, // A8 - 4, // R8 (FIXME: duplicate of R8 above?) - 4, // G8 - 4, // B8 - 4, // G8 R8 - 4, // B8 G8 - 0, 0, 0 // Unknown formats -}; - // Maximum number of bytes that can occur in a texture block-row generated by // the encoder static const UINT MAX_BYTES_PER_BLOCK_ROW = (EFB_WIDTH/4)*64; @@ -111,7 +26,7 @@ public: virtual void Init() = 0; virtual void Shutdown() = 0; // Returns size in bytes of encoded block of memory - virtual size_t Encode(u8* dst, unsigned int dstFormat, + virtual void Encode(u8* dst, const TextureCache::TCacheEntryBase *texture_entry, PEControl::PixelFormat srcFormat, const EFBRectangle& srcRect, bool isIntensity, bool scaleByHalf) = 0; diff --git a/Source/Core/VideoBackends/OGL/TextureCache.cpp b/Source/Core/VideoBackends/OGL/TextureCache.cpp index 3fe28b07a1..86360976bf 100644 --- a/Source/Core/VideoBackends/OGL/TextureCache.cpp +++ b/Source/Core/VideoBackends/OGL/TextureCache.cpp @@ -213,7 +213,7 @@ void TextureCache::TCacheEntry::Load(unsigned int width, unsigned int height, TextureCache::SetStage(); } -void TextureCache::TCacheEntry::FromRenderTarget(u32 dstAddr, unsigned int dstFormat, +void TextureCache::TCacheEntry::FromRenderTarget(u8* dstPointer, unsigned int dstFormat, u32 dstStride, PEControl::PixelFormat srcFormat, const EFBRectangle& srcRect, bool isIntensity, bool scaleByHalf, unsigned int cbufid, const float *colmat) @@ -264,24 +264,14 @@ void TextureCache::TCacheEntry::FromRenderTarget(u32 dstAddr, unsigned int dstFo if (!g_ActiveConfig.bSkipEFBCopyToRam) { - int encoded_size = TextureConverter::EncodeToRamFromTexture( - dstAddr, + TextureConverter::EncodeToRamFromTexture( + dstPointer, + this, read_texture, srcFormat == PEControl::Z24, isIntensity, - dstFormat, scaleByHalf, - srcRect, - copyMipMapStrideChannels * 32); - - u8* dst = Memory::GetPointer(dstAddr); - u64 const new_hash = GetHash64(dst,encoded_size,g_ActiveConfig.iSafeTextureCache_ColorSamples); - - size_in_bytes = (u32)encoded_size; - - TextureCache::MakeRangeDynamic(dstAddr, encoded_size); - - hash = new_hash; + srcRect); } FramebufferManager::SetFramebuffer(0); @@ -570,7 +560,7 @@ void TextureCache::ConvertTexture(TCacheEntryBase* _entry, TCacheEntryBase* _unc memcpy(buffer.first, palette, size); s_palette_stream_buffer->Unmap(size); glUniform1i(s_palette_buffer_offset_uniform[format], buffer.second / 2); - glUniform1f(s_palette_multiplier_uniform[format], unconverted->format == 0 ? 15.0f : 255.0f); + glUniform1f(s_palette_multiplier_uniform[format], (unconverted->format & 0xf) == 0 ? 15.0f : 255.0f); glUniform4f(s_palette_copy_position_uniform[format], 0.0f, 0.0f, (float)unconverted->config.width, (float)unconverted->config.height); glActiveTexture(GL_TEXTURE10); diff --git a/Source/Core/VideoBackends/OGL/TextureCache.h b/Source/Core/VideoBackends/OGL/TextureCache.h index 1ed85b0539..3f6f54b876 100644 --- a/Source/Core/VideoBackends/OGL/TextureCache.h +++ b/Source/Core/VideoBackends/OGL/TextureCache.h @@ -41,7 +41,7 @@ private: void Load(unsigned int width, unsigned int height, unsigned int expanded_width, unsigned int level) override; - void FromRenderTarget(u32 dstAddr, unsigned int dstFormat, + void FromRenderTarget(u8 *dst, unsigned int dstFormat, u32 dstStride, PEControl::PixelFormat srcFormat, const EFBRectangle& srcRect, bool isIntensity, bool scaleByHalf, unsigned int cbufid, const float *colmat) override; diff --git a/Source/Core/VideoBackends/OGL/TextureConverter.cpp b/Source/Core/VideoBackends/OGL/TextureConverter.cpp index c85b617294..5020058da0 100644 --- a/Source/Core/VideoBackends/OGL/TextureConverter.cpp +++ b/Source/Core/VideoBackends/OGL/TextureConverter.cpp @@ -268,59 +268,18 @@ static void EncodeToRamUsingShader(GLuint srcTexture, } } -int EncodeToRamFromTexture(u32 address,GLuint source_texture, bool bFromZBuffer, bool bIsIntensityFmt, u32 copyfmt, int bScaleByHalf, const EFBRectangle& source, u32 writeStride) +void EncodeToRamFromTexture(u8 *dest_ptr, const TextureCache::TCacheEntryBase *texture_entry, + GLuint source_texture, bool bFromZBuffer, bool bIsIntensityFmt, int bScaleByHalf, const EFBRectangle& source) { - u32 format = copyfmt; - - if (bFromZBuffer) - { - format |= _GX_TF_ZTF; - if (copyfmt == 11) - format = GX_TF_Z16; - else if (format < GX_TF_Z8 || format > GX_TF_Z24X8) - format |= _GX_TF_CTF; - } - else - { - if (copyfmt > GX_TF_RGBA8 || (copyfmt < GX_TF_RGB565 && !bIsIntensityFmt)) - format |= _GX_TF_CTF; - } - - SHADER& texconv_shader = GetOrCreateEncodingShader(format); - - u8 *dest_ptr = Memory::GetPointer(address); - - int width = (source.right - source.left) >> bScaleByHalf; - int height = (source.bottom - source.top) >> bScaleByHalf; - - int size_in_bytes = TexDecoder_GetTextureSizeInBytes(width, height, format); - - u16 blkW = TexDecoder_GetBlockWidthInTexels(format) - 1; - u16 blkH = TexDecoder_GetBlockHeightInTexels(format) - 1; - - // only copy on cache line boundaries - // extra pixels are copied but not displayed in the resulting texture - s32 expandedWidth = (width + blkW) & (~blkW); - s32 expandedHeight = (height + blkH) & (~blkH); + SHADER& texconv_shader = GetOrCreateEncodingShader(texture_entry->format); texconv_shader.Bind(); - glUniform4i(s_encodingUniforms[format], - source.left, source.top, - expandedWidth, bScaleByHalf ? 2 : 1); - - unsigned int numBlocksX = expandedWidth / TexDecoder_GetBlockWidthInTexels(format); - unsigned int numBlocksY = expandedHeight / TexDecoder_GetBlockHeightInTexels(format); - unsigned int cacheLinesPerRow; - if ((format & 0x0f) == 6) - cacheLinesPerRow = numBlocksX * 2; - else - cacheLinesPerRow = numBlocksX; + glUniform4i(s_encodingUniforms[texture_entry->format], + source.left, source.top, texture_entry->native_width, bScaleByHalf ? 2 : 1); EncodeToRamUsingShader(source_texture, - dest_ptr, cacheLinesPerRow * 32, numBlocksY, - writeStride, bScaleByHalf > 0 && !bFromZBuffer); - return size_in_bytes; // TODO: D3D11 is calculating this value differently! - + dest_ptr, texture_entry->CacheLinesPerRow() * 32, texture_entry->NumBlocksY(), + texture_entry->memory_stride, bScaleByHalf > 0 && !bFromZBuffer); } void EncodeToRamYUYV(GLuint srcTexture, const TargetRectangle& sourceRc, u8* destAddr, u32 dstWidth, u32 dstStride, u32 dstHeight) diff --git a/Source/Core/VideoBackends/OGL/TextureConverter.h b/Source/Core/VideoBackends/OGL/TextureConverter.h index c41dfe10ac..186c9fe6d0 100644 --- a/Source/Core/VideoBackends/OGL/TextureConverter.h +++ b/Source/Core/VideoBackends/OGL/TextureConverter.h @@ -5,6 +5,7 @@ #pragma once #include "VideoBackends/OGL/GLUtil.h" +#include "VideoCommon/TextureCacheBase.h" #include "VideoCommon/VideoCommon.h" namespace OGL @@ -24,7 +25,8 @@ void EncodeToRamYUYV(GLuint srcTexture, const TargetRectangle& sourceRc, void DecodeToTexture(u32 xfbAddr, int srcWidth, int srcHeight, GLuint destTexture); // returns size of the encoded data (in bytes) -int EncodeToRamFromTexture(u32 address, GLuint source_texture, bool bFromZBuffer, bool bIsIntensityFmt, u32 copyfmt, int bScaleByHalf, const EFBRectangle& source, u32 writeStride); +void EncodeToRamFromTexture(u8 *dest_ptr, const TextureCache::TCacheEntryBase *texture_entry, + GLuint source_texture, bool bFromZBuffer, bool bIsIntensityFmt, int bScaleByHalf, const EFBRectangle& source); } diff --git a/Source/Core/VideoCommon/BPFunctions.cpp b/Source/Core/VideoCommon/BPFunctions.cpp index 4150a7cf31..e4b5f54901 100644 --- a/Source/Core/VideoCommon/BPFunctions.cpp +++ b/Source/Core/VideoCommon/BPFunctions.cpp @@ -9,7 +9,6 @@ #include "VideoCommon/BPFunctions.h" #include "VideoCommon/RenderBase.h" -#include "VideoCommon/TextureCacheBase.h" #include "VideoCommon/VertexManagerBase.h" #include "VideoCommon/VertexShaderManager.h" #include "VideoCommon/VideoConfig.h" @@ -85,15 +84,6 @@ void SetColorMask() g_renderer->SetColorMask(); } -void CopyEFB(u32 dstAddr, const EFBRectangle& srcRect, - unsigned int dstFormat, PEControl::PixelFormat srcFormat, - bool isIntensity, bool scaleByHalf) -{ - // bpmem.zcontrol.pixel_format to PEControl::Z24 is when the game wants to copy from ZBuffer (Zbuffer uses 24-bit Format) - TextureCache::CopyRenderTargetToTexture(dstAddr, dstFormat, srcFormat, - srcRect, isIntensity, scaleByHalf); -} - /* Explanation of the magic behind ClearScreen: There's numerous possible formats for the pixel data in the EFB. However, in the HW accelerated backends we're always using RGBA8 diff --git a/Source/Core/VideoCommon/BPFunctions.h b/Source/Core/VideoCommon/BPFunctions.h index 890f734deb..126d9f1d7b 100644 --- a/Source/Core/VideoCommon/BPFunctions.h +++ b/Source/Core/VideoCommon/BPFunctions.h @@ -23,9 +23,6 @@ void SetBlendMode(); void SetDitherMode(); void SetLogicOpMode(); void SetColorMask(); -void CopyEFB(u32 dstAddr, const EFBRectangle& srcRect, - unsigned int dstFormat, PEControl::PixelFormat srcFormat, - bool isIntensity, bool scaleByHalf); void ClearScreen(const EFBRectangle &rc); void OnPixelFormatChange(); void SetInterlacingMode(const BPCmd &bp); diff --git a/Source/Core/VideoCommon/BPStructs.cpp b/Source/Core/VideoCommon/BPStructs.cpp index 2717fe4b21..e30aa2fcaa 100644 --- a/Source/Core/VideoCommon/BPStructs.cpp +++ b/Source/Core/VideoCommon/BPStructs.cpp @@ -20,6 +20,7 @@ #include "VideoCommon/PixelShaderManager.h" #include "VideoCommon/RenderBase.h" #include "VideoCommon/Statistics.h" +#include "VideoCommon/TextureCacheBase.h" #include "VideoCommon/TextureDecoder.h" #include "VideoCommon/VertexShaderManager.h" #include "VideoCommon/VideoCommon.h" @@ -205,6 +206,7 @@ static void BPWritten(const BPCmd& bp) // The values in bpmem.copyTexSrcXY and bpmem.copyTexSrcWH are updated in case 0x49 and 0x4a in this function u32 destAddr = bpmem.copyTexDest << 5; + u32 destStride = bpmem.copyMipMapStrideChannels << 5; EFBRectangle srcRect; srcRect.left = (int)bpmem.copyTexSrcXY.x; @@ -223,8 +225,9 @@ static void BPWritten(const BPCmd& bp) if (g_ActiveConfig.bShowEFBCopyRegions) stats.efb_regions.push_back(srcRect); - CopyEFB(destAddr, srcRect, - PE_copy.tp_realFormat(), bpmem.zcontrol.pixel_format, + // bpmem.zcontrol.pixel_format to PEControl::Z24 is when the game wants to copy from ZBuffer (Zbuffer uses 24-bit Format) + TextureCache::CopyRenderTargetToTexture(destAddr, PE_copy.tp_realFormat(), destStride, + bpmem.zcontrol.pixel_format, srcRect, !!PE_copy.intensity_fmt, !!PE_copy.half_scale); } else @@ -251,10 +254,9 @@ static void BPWritten(const BPCmd& bp) height = MAX_XFB_HEIGHT; } - u32 stride = bpmem.copyMipMapStrideChannels << 5; DEBUG_LOG(VIDEO, "RenderToXFB: destAddr: %08x | srcRect {%d %d %d %d} | fbWidth: %u | fbStride: %u | fbHeight: %u", - destAddr, srcRect.left, srcRect.top, srcRect.right, srcRect.bottom, bpmem.copyTexSrcWH.x + 1, stride, height); - Renderer::RenderToXFB(destAddr, srcRect, stride, height, s_gammaLUT[PE_copy.gamma]); + destAddr, srcRect.left, srcRect.top, srcRect.right, srcRect.bottom, bpmem.copyTexSrcWH.x + 1, destStride, height); + Renderer::RenderToXFB(destAddr, srcRect, destStride, height, s_gammaLUT[PE_copy.gamma]); } // Clear the rectangular region after copying it. diff --git a/Source/Core/VideoCommon/TextureCacheBase.cpp b/Source/Core/VideoCommon/TextureCacheBase.cpp index 7d49e698da..9dcd960037 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.cpp +++ b/Source/Core/VideoCommon/TextureCacheBase.cpp @@ -182,24 +182,6 @@ void TextureCache::Cleanup(int _frameCount) } } -void TextureCache::MakeRangeDynamic(u32 start_address, u32 size) -{ - TexCache::iterator - iter = textures_by_address.begin(); - - while (iter != textures_by_address.end()) - { - if (iter->second->OverlapsMemoryRange(start_address, size)) - { - iter = FreeTexture(iter); - } - else - { - ++iter; - } - } -} - bool TextureCache::TCacheEntryBase::OverlapsMemoryRange(u32 range_address, u32 range_size) const { if (addr + size_in_bytes <= range_address) @@ -243,7 +225,7 @@ TextureCache::TCacheEntryBase* TextureCache::DoPartialTextureUpdates(TexCache::i && entry_to_update->addr <= entry->addr && entry->addr + entry->size_in_bytes <= entry_to_update->addr + entry_to_update->size_in_bytes && entry->frameCount == FRAMECOUNT_INVALID - && entry->copyMipMapStrideChannels * 32 == numBlocksX * block_size) + && entry->memory_stride == numBlocksX * block_size) { u32 block_offset = (entry->addr - entry_to_update->addr) / block_size; u32 block_x = block_offset % numBlocksX; @@ -372,11 +354,11 @@ TextureCache::TCacheEntryBase* TextureCache::Load(const u32 stage) return nullptr; // TexelSizeInNibbles(format) * width * height / 16; - const unsigned int bsw = TexDecoder_GetBlockWidthInTexels(texformat) - 1; - const unsigned int bsh = TexDecoder_GetBlockHeightInTexels(texformat) - 1; + const unsigned int bsw = TexDecoder_GetBlockWidthInTexels(texformat); + const unsigned int bsh = TexDecoder_GetBlockHeightInTexels(texformat); - unsigned int expandedWidth = (width + bsw) & (~bsw); - unsigned int expandedHeight = (height + bsh) & (~bsh); + unsigned int expandedWidth = ROUND_UP(width, bsw); + unsigned int expandedHeight = ROUND_UP(height, bsh); const unsigned int nativeW = width; const unsigned int nativeH = height; @@ -668,8 +650,8 @@ TextureCache::TCacheEntryBase* TextureCache::Load(const u32 stage) { const u32 mip_width = CalculateLevelSize(width, level); const u32 mip_height = CalculateLevelSize(height, level); - const u32 expanded_mip_width = (mip_width + bsw) & (~bsw); - const u32 expanded_mip_height = (mip_height + bsh) & (~bsh); + const u32 expanded_mip_width = ROUND_UP(mip_width, bsw); + const u32 expanded_mip_height = ROUND_UP(mip_height, bsh); const u8*& mip_src_data = from_tmem ? ((level % 2) ? ptr_odd : ptr_even) @@ -693,7 +675,7 @@ TextureCache::TCacheEntryBase* TextureCache::Load(const u32 stage) return ReturnEntry(stage, entry); } -void TextureCache::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat, PEControl::PixelFormat srcFormat, +void TextureCache::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat, u32 dstStride, PEControl::PixelFormat srcFormat, const EFBRectangle& srcRect, bool isIntensity, bool scaleByHalf) { // Emulation methods: @@ -753,9 +735,11 @@ void TextureCache::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat case 0: // Z4 colmat[3] = colmat[7] = colmat[11] = colmat[15] = 1.0f; cbufid = 0; + dstFormat |= _GX_TF_CTF; break; + case 8: // Z8H + dstFormat |= _GX_TF_CTF; case 1: // Z8 - case 8: // Z8 colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1.0f; cbufid = 1; break; @@ -768,6 +752,7 @@ void TextureCache::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat case 11: // Z16 (reverse order) colmat[0] = colmat[4] = colmat[8] = colmat[13] = 1.0f; cbufid = 3; + dstFormat |= _GX_TF_CTF; break; case 6: // Z24X8 @@ -778,11 +763,13 @@ void TextureCache::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat case 9: // Z8M colmat[1] = colmat[5] = colmat[9] = colmat[13] = 1.0f; cbufid = 5; + dstFormat |= _GX_TF_CTF; break; case 10: // Z8L colmat[2] = colmat[6] = colmat[10] = colmat[14] = 1.0f; cbufid = 6; + dstFormat |= _GX_TF_CTF; break; case 12: // Z16L - copy lower 16 depth bits @@ -790,6 +777,7 @@ void TextureCache::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat // Used e.g. in Zelda: Skyward Sword colmat[1] = colmat[5] = colmat[9] = colmat[14] = 1.0f; cbufid = 7; + dstFormat |= _GX_TF_CTF; break; default: @@ -798,6 +786,8 @@ void TextureCache::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat cbufid = 8; break; } + + dstFormat |= _GX_TF_ZTF; } else if (isIntensity) { @@ -862,11 +852,13 @@ void TextureCache::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat ColorMask[0] = 15.0f; ColorMask[4] = 1.0f / 15.0f; cbufid = 14; + dstFormat |= _GX_TF_CTF; break; case 1: // R8 case 8: // R8 colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1; cbufid = 15; + dstFormat |= _GX_TF_CTF; break; case 2: // RA4 @@ -881,6 +873,7 @@ void TextureCache::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat fConstAdd[3] = 1.0f; cbufid = 17; } + dstFormat |= _GX_TF_CTF; break; case 3: // RA8 colmat[0] = colmat[4] = colmat[8] = colmat[15] = 1.0f; @@ -892,6 +885,7 @@ void TextureCache::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat fConstAdd[3] = 1.0f; cbufid = 19; } + dstFormat |= _GX_TF_CTF; break; case 7: // A8 @@ -907,25 +901,30 @@ void TextureCache::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat fConstAdd[3] = 1.0f; cbufid = 21; } + dstFormat |= _GX_TF_CTF; break; case 9: // G8 colmat[1] = colmat[5] = colmat[9] = colmat[13] = 1.0f; cbufid = 22; + dstFormat |= _GX_TF_CTF; break; case 10: // B8 colmat[2] = colmat[6] = colmat[10] = colmat[14] = 1.0f; cbufid = 23; + dstFormat |= _GX_TF_CTF; break; case 11: // RG8 colmat[0] = colmat[4] = colmat[8] = colmat[13] = 1.0f; cbufid = 24; + dstFormat |= _GX_TF_CTF; break; case 12: // GB8 colmat[1] = colmat[5] = colmat[9] = colmat[14] = 1.0f; cbufid = 25; + dstFormat |= _GX_TF_CTF; break; case 4: // RGB565 @@ -973,6 +972,13 @@ void TextureCache::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat } } + u8* dst = Memory::GetPointer(dstAddr); + if (dst == nullptr) + { + ERROR_LOG(VIDEO, "Trying to copy from EFB to invalid address 0x%8x", dstAddr); + return; + } + const unsigned int tex_w = scaleByHalf ? srcRect.GetWidth() / 2 : srcRect.GetWidth(); const unsigned int tex_h = scaleByHalf ? srcRect.GetHeight() / 2 : srcRect.GetHeight(); @@ -996,17 +1002,36 @@ void TextureCache::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat TCacheEntryBase* entry = AllocateTexture(config); - // TODO: Using the wrong dstFormat, dumb... entry->SetGeneralParameters(dstAddr, 0, dstFormat); entry->SetDimensions(tex_w, tex_h, 1); entry->SetHashes(TEXHASH_INVALID); entry->frameCount = FRAMECOUNT_INVALID; - entry->is_efb_copy = true; + entry->SetEfbCopy(dstStride); entry->is_custom_tex = false; - entry->copyMipMapStrideChannels = bpmem.copyMipMapStrideChannels; - entry->FromRenderTarget(dstAddr, dstFormat, srcFormat, srcRect, isIntensity, scaleByHalf, cbufid, colmat); + entry->FromRenderTarget(dst, dstFormat, dstStride, srcFormat, srcRect, isIntensity, scaleByHalf, cbufid, colmat); + + if (!g_ActiveConfig.bSkipEFBCopyToRam) + { + entry->hash = GetHash64(dst, (int)entry->size_in_bytes, g_ActiveConfig.iSafeTextureCache_ColorSamples); + + // Invalidate all textures that overlap the range of our texture + TexCache::iterator + iter = textures_by_address.begin(); + + while (iter != textures_by_address.end()) + { + if (iter->second->OverlapsMemoryRange(dstAddr, entry->size_in_bytes)) + { + iter = FreeTexture(iter); + } + else + { + ++iter; + } + } + } if (g_ActiveConfig.bDumpEFBTarget) { @@ -1052,3 +1077,36 @@ TextureCache::TexCache::iterator TextureCache::FreeTexture(TexCache::iterator it return textures_by_address.erase(iter); } + +u32 TextureCache::TCacheEntryBase::CacheLinesPerRow() const +{ + u32 blockW = TexDecoder_GetBlockWidthInTexels(format); + // Round up source height to multiple of block size + u32 actualWidth = ROUND_UP(native_width, blockW); + + u32 numBlocksX = actualWidth / blockW; + + // RGBA takes two cache lines per block; all others take one + if (format == GX_TF_RGBA8) + numBlocksX = numBlocksX * 2; + return numBlocksX; +} + +u32 TextureCache::TCacheEntryBase::NumBlocksY() const +{ + u32 blockH = TexDecoder_GetBlockHeightInTexels(format); + // Round up source height to multiple of block size + u32 actualHeight = ROUND_UP(native_height, blockH); + + return actualHeight / blockH; +} + +void TextureCache::TCacheEntryBase::SetEfbCopy(u32 stride) +{ + is_efb_copy = true; + memory_stride = stride; + + _assert_msg_(VIDEO, memory_stride >= CacheLinesPerRow(), "Memory stride is too small"); + + size_in_bytes = memory_stride * NumBlocksY(); +} diff --git a/Source/Core/VideoCommon/TextureCacheBase.h b/Source/Core/VideoCommon/TextureCacheBase.h index 0a51ef64b9..c012cad0fc 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.h +++ b/Source/Core/VideoCommon/TextureCacheBase.h @@ -53,7 +53,7 @@ public: u32 format; bool is_efb_copy; bool is_custom_tex; - u32 copyMipMapStrideChannels; + u32 memory_stride; unsigned int native_width, native_height; // Texture dimensions from the GameCube's point of view unsigned int native_levels; @@ -76,6 +76,7 @@ public: native_width = _native_width; native_height = _native_height; native_levels = _native_levels; + memory_stride = _native_width; } void SetHashes(u64 _hash) @@ -83,6 +84,8 @@ public: hash = _hash; } + void SetEfbCopy(u32 stride); + TCacheEntryBase(const TCacheEntryConfig& c) : config(c) {} virtual ~TCacheEntryBase(); @@ -96,7 +99,7 @@ public: virtual void Load(unsigned int width, unsigned int height, unsigned int expanded_width, unsigned int level) = 0; - virtual void FromRenderTarget(u32 dstAddr, unsigned int dstFormat, + virtual void FromRenderTarget(u8* dst, unsigned int dstFormat, u32 dstStride, PEControl::PixelFormat srcFormat, const EFBRectangle& srcRect, bool isIntensity, bool scaleByHalf, unsigned int cbufid, const float *colmat) = 0; @@ -104,6 +107,11 @@ public: bool OverlapsMemoryRange(u32 range_address, u32 range_size) const; bool IsEfbCopy() const { return is_efb_copy; } + + u32 NumBlocksY() const; + u32 CacheLinesPerRow() const; + + void Memset(u8* ptr, u32 tag); }; virtual ~TextureCache(); // needs virtual for DX11 dtor @@ -115,7 +123,6 @@ public: static void Cleanup(int _frameCount); static void Invalidate(); - static void MakeRangeDynamic(u32 start_address, u32 size); virtual TCacheEntryBase* CreateTexture(const TCacheEntryConfig& config) = 0; @@ -125,8 +132,8 @@ public: static TCacheEntryBase* Load(const u32 stage); static void UnbindTextures(); static void BindTextures(); - static void CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat, PEControl::PixelFormat srcFormat, - const EFBRectangle& srcRect, bool isIntensity, bool scaleByHalf); + static void CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat, u32 dstStride, + PEControl::PixelFormat srcFormat, const EFBRectangle& srcRect, bool isIntensity, bool scaleByHalf); static void RequestInvalidateTextureCache(); diff --git a/Source/Core/VideoCommon/TextureConversionShader.cpp b/Source/Core/VideoCommon/TextureConversionShader.cpp index 9375a984c8..033a56b4aa 100644 --- a/Source/Core/VideoCommon/TextureConversionShader.cpp +++ b/Source/Core/VideoCommon/TextureConversionShader.cpp @@ -650,9 +650,11 @@ const char *GenerateEncodingShader(u32 format,API_TYPE ApiType) case GX_CTF_GB8: WriteCC8Encoder(p, "gb", ApiType); break; + case GX_CTF_Z8H: case GX_TF_Z8: WriteC8Encoder(p, "r", ApiType); break; + case GX_CTF_Z16R: case GX_TF_Z16: WriteZ16Encoder(p, ApiType); break; diff --git a/Source/Core/VideoCommon/TextureDecoder.h b/Source/Core/VideoCommon/TextureDecoder.h index 1abcc5705c..be0c8f44e0 100644 --- a/Source/Core/VideoCommon/TextureDecoder.h +++ b/Source/Core/VideoCommon/TextureDecoder.h @@ -16,6 +16,7 @@ extern GC_ALIGNED16(u8 texMem[TMEM_SIZE]); enum TextureFormat { + // These are the texture formats that can be read by the texture mapper. GX_TF_I4 = 0x0, GX_TF_I8 = 0x1, GX_TF_IA4 = 0x2, @@ -28,14 +29,21 @@ enum TextureFormat GX_TF_C14X2 = 0xA, GX_TF_CMPR = 0xE, - _GX_TF_CTF = 0x20, // copy-texture-format only (simply means linear?) - _GX_TF_ZTF = 0x10, // Z-texture-format + _GX_TF_ZTF = 0x10, // flag for Z texture formats (used internally by dolphin) - // these formats are also valid when copying targets + // Depth texture formats (which directly map to the equivalent colour format above.) + GX_TF_Z8 = 0x1 | _GX_TF_ZTF, + GX_TF_Z16 = 0x3 | _GX_TF_ZTF, + GX_TF_Z24X8 = 0x6 | _GX_TF_ZTF, + + _GX_TF_CTF = 0x20, // flag for copy-texture-format only (used internally by dolphin) + + // These are extra formats that can be used when copying from efb, + // they use one of texel formats from above, but pack diffrent data into them. GX_CTF_R4 = 0x0 | _GX_TF_CTF, GX_CTF_RA4 = 0x2 | _GX_TF_CTF, GX_CTF_RA8 = 0x3 | _GX_TF_CTF, - GX_CTF_YUVA8 = 0x6 | _GX_TF_CTF, + GX_CTF_YUVA8 = 0x6 | _GX_TF_CTF, // YUV 4:4:4 - Dolphin doesn't implement this format as no commercial games use it GX_CTF_A8 = 0x7 | _GX_TF_CTF, GX_CTF_R8 = 0x8 | _GX_TF_CTF, GX_CTF_G8 = 0x9 | _GX_TF_CTF, @@ -43,13 +51,12 @@ enum TextureFormat GX_CTF_RG8 = 0xB | _GX_TF_CTF, GX_CTF_GB8 = 0xC | _GX_TF_CTF, - GX_TF_Z8 = 0x1 | _GX_TF_ZTF, - GX_TF_Z16 = 0x3 | _GX_TF_ZTF, - GX_TF_Z24X8 = 0x6 | _GX_TF_ZTF, - + // extra depth texture formats that can be used for efb copies. GX_CTF_Z4 = 0x0 | _GX_TF_ZTF | _GX_TF_CTF, + GX_CTF_Z8H = 0x8 | _GX_TF_ZTF | _GX_TF_CTF, // This produces an identical result to to GX_TF_Z8 GX_CTF_Z8M = 0x9 | _GX_TF_ZTF | _GX_TF_CTF, GX_CTF_Z8L = 0xA | _GX_TF_ZTF | _GX_TF_CTF, + GX_CTF_Z16R = 0xB | _GX_TF_ZTF | _GX_TF_CTF, // Reversed version of GX_TF_Z16 GX_CTF_Z16L = 0xC | _GX_TF_ZTF | _GX_TF_CTF, }; diff --git a/Source/Core/VideoCommon/TextureDecoder_Common.cpp b/Source/Core/VideoCommon/TextureDecoder_Common.cpp index 092b8ec1e0..8fc1631b93 100644 --- a/Source/Core/VideoCommon/TextureDecoder_Common.cpp +++ b/Source/Core/VideoCommon/TextureDecoder_Common.cpp @@ -35,7 +35,6 @@ int TexDecoder_GetTexelSizeInNibbles(int format) case GX_CTF_R4: return 1; case GX_CTF_RA4: return 2; case GX_CTF_RA8: return 4; - case GX_CTF_YUVA8: return 8; case GX_CTF_A8: return 2; case GX_CTF_R8: return 2; case GX_CTF_G8: return 2; @@ -48,10 +47,14 @@ int TexDecoder_GetTexelSizeInNibbles(int format) case GX_TF_Z24X8: return 8; case GX_CTF_Z4: return 1; + case GX_CTF_Z8H: return 2; case GX_CTF_Z8M: return 2; case GX_CTF_Z8L: return 2; + case GX_CTF_Z16R: return 4; case GX_CTF_Z16L: return 4; - default: return 1; + default: + PanicAlert("Unsupported Texture Format (%08x)! (GetTexelSizeInNibbles)", format); + return 1; } } @@ -88,11 +91,13 @@ int TexDecoder_GetBlockWidthInTexels(u32 format) case GX_TF_Z16: return 4; case GX_TF_Z24X8: return 4; case GX_CTF_Z4: return 8; + case GX_CTF_Z8H: return 8; case GX_CTF_Z8M: return 8; case GX_CTF_Z8L: return 8; + case GX_CTF_Z16R: return 4; case GX_CTF_Z16L: return 4; default: - ERROR_LOG(VIDEO, "Unsupported Texture Format (%08x)! (GetBlockWidthInTexels)", format); + PanicAlert("Unsupported Texture Format (%08x)! (GetBlockWidthInTexels)", format); return 8; } } @@ -125,11 +130,13 @@ int TexDecoder_GetBlockHeightInTexels(u32 format) case GX_TF_Z16: return 4; case GX_TF_Z24X8: return 4; case GX_CTF_Z4: return 8; + case GX_CTF_Z8H: return 4; case GX_CTF_Z8M: return 4; case GX_CTF_Z8L: return 4; + case GX_CTF_Z16R: return 4; case GX_CTF_Z16L: return 4; default: - ERROR_LOG(VIDEO, "Unsupported Texture Format (%08x)! (GetBlockHeightInTexels)", format); + PanicAlert("Unsupported Texture Format (%08x)! (GetBlockHeightInTexels)", format); return 4; } }