From db5f2bfa7ef522a56101776248e7cd0daea6d266 Mon Sep 17 00:00:00 2001
From: Subv <subv2112@gmail.com>
Date: Sun, 15 Apr 2018 19:52:25 -0500
Subject: [PATCH 1/5] GPU/TIC: Added the pitch and block height fields to the
 TIC structure.

---
 src/video_core/textures/texture.h | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index 9d443ea90..58cbb2115 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -4,6 +4,7 @@
 
 #pragma once
 
+#include "common/assert.h"
 #include "common/bit_field.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
@@ -57,6 +58,8 @@ union TextureHandle {
 static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size");
 
 struct TICEntry {
+    static constexpr u32 DefaultBlockHeight = 16;
+
     union {
         u32 raw;
         BitField<0, 7, TextureFormat> format;
@@ -70,7 +73,12 @@ struct TICEntry {
         BitField<0, 16, u32> address_high;
         BitField<21, 3, TICHeaderVersion> header_version;
     };
-    INSERT_PADDING_BYTES(4);
+    union {
+        BitField<3, 3, u32> block_height;
+
+        // High 16 bits of the pitch value
+        BitField<0, 16, u32> pitch_high;
+    };
     union {
         BitField<0, 16, u32> width_minus_1;
         BitField<23, 4, TextureType> texture_type;
@@ -82,6 +90,13 @@ struct TICEntry {
         return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low);
     }
 
+    u32 Pitch() const {
+        ASSERT(header_version == TICHeaderVersion::Pitch ||
+               header_version == TICHeaderVersion::PitchColorKey);
+        // The pitch value is 21 bits, and is 32B aligned.
+        return pitch_high << 5;
+    }
+
     u32 Width() const {
         return width_minus_1 + 1;
     }

From 6b63aaa5b4f55621117e27c6b80979908c255e75 Mon Sep 17 00:00:00 2001
From: Subv <subv2112@gmail.com>
Date: Sun, 15 Apr 2018 19:53:15 -0500
Subject: [PATCH 2/5] GPU: Allow using a configurable block height when
 unswizzling textures.

---
 .../renderer_opengl/gl_rasterizer_cache.cpp           | 11 ++++++++++-
 src/video_core/textures/decoders.cpp                  |  9 ++++-----
 src/video_core/textures/decoders.h                    |  3 ++-
 src/video_core/textures/texture.h                     |  7 +++++++
 4 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 213b20a21..9d005936d 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -1041,9 +1041,18 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu
     params.height = config.tic.Height();
     params.is_tiled = config.tic.IsTiled();
     params.pixel_format = SurfaceParams::PixelFormatFromTextureFormat(config.tic.format);
+
+    if (config.tic.IsTiled()) {
+        params.block_height = config.tic.BlockHeight();
+    } else {
+        // Use the texture-provided stride value if the texture isn't tiled.
+        params.stride = params.PixelsInBytes(config.tic.Pitch());
+    }
+
     params.UpdateParams();
 
-    if (config.tic.Width() % 8 != 0 || config.tic.Height() % 8 != 0) {
+    if (config.tic.Width() % 8 != 0 || config.tic.Height() % 8 != 0 ||
+        params.stride != params.width) {
         Surface src_surface;
         MathUtil::Rectangle<u32> rect;
         std::tie(src_surface, rect) = GetSurfaceSubRect(params, ScaleMatch::Ignore, true);
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 2e87281eb..9c2a10d2e 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -56,23 +56,22 @@ u32 BytesPerPixel(TextureFormat format) {
     }
 }
 
-std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height) {
+std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height,
+                                 u32 block_height) {
     u8* data = Memory::GetPointer(address);
     u32 bytes_per_pixel = BytesPerPixel(format);
 
-    static constexpr u32 DefaultBlockHeight = 16;
-
     std::vector<u8> unswizzled_data(width * height * bytes_per_pixel);
 
     switch (format) {
     case TextureFormat::DXT1:
         // In the DXT1 format, each 4x4 tile is swizzled instead of just individual pixel values.
         CopySwizzledData(width / 4, height / 4, bytes_per_pixel, bytes_per_pixel, data,
-                         unswizzled_data.data(), true, DefaultBlockHeight);
+                         unswizzled_data.data(), true, block_height);
         break;
     case TextureFormat::A8R8G8B8:
         CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data,
-                         unswizzled_data.data(), true, DefaultBlockHeight);
+                         unswizzled_data.data(), true, block_height);
         break;
     default:
         UNIMPLEMENTED_MSG("Format not implemented");
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h
index 0c21694ff..a700911cf 100644
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -14,7 +14,8 @@ namespace Texture {
 /**
  * Unswizzles a swizzled texture without changing its format.
  */
-std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height);
+std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height,
+                                 u32 block_height = TICEntry::DefaultBlockHeight);
 
 /**
  * Decodes an unswizzled texture into a A8R8G8B8 texture.
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index 58cbb2115..09d2317e0 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -105,6 +105,13 @@ struct TICEntry {
         return height_minus_1 + 1;
     }
 
+    u32 BlockHeight() const {
+        ASSERT(header_version == TICHeaderVersion::BlockLinear ||
+               header_version == TICHeaderVersion::BlockLinearColorKey);
+        // The block height is stored in log2 format.
+        return 1 << block_height;
+    }
+
     bool IsTiled() const {
         return header_version == TICHeaderVersion::BlockLinear ||
                header_version == TICHeaderVersion::BlockLinearColorKey;

From ac09b5a2e945f587a8b3b712f54b76d46edb0c2f Mon Sep 17 00:00:00 2001
From: Subv <subv2112@gmail.com>
Date: Sun, 15 Apr 2018 19:54:38 -0500
Subject: [PATCH 3/5] GLCache: Added a function to convert cached PixelFormats
 back to texture formats.

TODO: The way we handle cached formats must change, framebuffer and texture formats are too different to keep them in the same place.
---
 src/video_core/renderer_opengl/gl_rasterizer_cache.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 3293905d6..0b2e3ffef 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -115,6 +115,18 @@ struct SurfaceParams {
         }
     }
 
+    static Tegra::Texture::TextureFormat TextureFormatFromPixelFormat(PixelFormat format) {
+        // TODO(Subv): Properly implement this
+        switch (format) {
+        case PixelFormat::RGBA8:
+            return Tegra::Texture::TextureFormat::A8R8G8B8;
+        case PixelFormat::DXT1:
+            return Tegra::Texture::TextureFormat::DXT1;
+        default:
+            UNREACHABLE();
+        }
+    }
+
     static bool CheckFormatsBlittable(PixelFormat pixel_format_a, PixelFormat pixel_format_b) {
         SurfaceType a_type = GetFormatType(pixel_format_a);
         SurfaceType b_type = GetFormatType(pixel_format_b);

From a3e82e8e1f5cb39246f30cac045db8e243f0daee Mon Sep 17 00:00:00 2001
From: Subv <subv2112@gmail.com>
Date: Sun, 15 Apr 2018 19:55:39 -0500
Subject: [PATCH 4/5] GLCache: Take into account the texture's block height
 when caching and unswizzling.

---
 .../renderer_opengl/gl_rasterizer.cpp         |  3 +-
 .../renderer_opengl/gl_rasterizer_cache.cpp   | 82 +++++++++----------
 .../renderer_opengl/gl_rasterizer_cache.h     |  1 +
 3 files changed, 43 insertions(+), 43 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 7b6240e65..9522a35ea 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -523,7 +523,8 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& framebu
     src_params.width = std::min(framebuffer.width, pixel_stride);
     src_params.height = framebuffer.height;
     src_params.stride = pixel_stride;
-    src_params.is_tiled = false;
+    src_params.is_tiled = true;
+    src_params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight;
     src_params.pixel_format =
         SurfaceParams::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format);
     src_params.UpdateParams();
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 9d005936d..a92773f11 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -102,39 +102,36 @@ static void MortonCopyTile(u32 stride, u8* tile_buffer, u8* gl_buffer) {
 }
 
 template <bool morton_to_gl, PixelFormat format>
-void MortonCopy(u32 stride, u32 height, u8* gl_buffer, VAddr base, VAddr start, VAddr end) {
+void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, VAddr base, VAddr start,
+                VAddr end) {
     constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8;
     constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format);
 
-    // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check the
-    // configuration for this and perform more generic un/swizzle
-    LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!");
-    VideoCore::MortonCopyPixels128(stride, height, bytes_per_pixel, gl_bytes_per_pixel,
-                                   Memory::GetPointer(base), gl_buffer, morton_to_gl);
+    if (morton_to_gl) {
+        auto data = Tegra::Texture::UnswizzleTexture(
+            base, SurfaceParams::TextureFormatFromPixelFormat(format), stride, height,
+            block_height);
+        std::memcpy(gl_buffer, data.data(), data.size());
+    } else {
+        // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check
+        // the configuration for this and perform more generic un/swizzle
+        LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!");
+        VideoCore::MortonCopyPixels128(stride, height, bytes_per_pixel, gl_bytes_per_pixel,
+                                       Memory::GetPointer(base), gl_buffer, morton_to_gl);
+    }
 }
 
-template <>
-void MortonCopy<true, PixelFormat::DXT1>(u32 stride, u32 height, u8* gl_buffer, VAddr base,
-                                         VAddr start, VAddr end) {
-    constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(PixelFormat::DXT1) / 8;
-    constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(PixelFormat::DXT1);
-
-    // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check the
-    // configuration for this and perform more generic un/swizzle
-    LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!");
-    auto data =
-        Tegra::Texture::UnswizzleTexture(base, Tegra::Texture::TextureFormat::DXT1, stride, height);
-    std::memcpy(gl_buffer, data.data(), data.size());
-}
-
-static constexpr std::array<void (*)(u32, u32, u8*, VAddr, VAddr, VAddr), 2> morton_to_gl_fns = {
-    MortonCopy<true, PixelFormat::RGBA8>,
-    MortonCopy<true, PixelFormat::DXT1>,
+static constexpr std::array<void (*)(u32, u32, u32, u8*, VAddr, VAddr, VAddr), 2> morton_to_gl_fns =
+    {
+        MortonCopy<true, PixelFormat::RGBA8>,
+        MortonCopy<true, PixelFormat::DXT1>,
 };
 
-static constexpr std::array<void (*)(u32, u32, u8*, VAddr, VAddr, VAddr), 2> gl_to_morton_fns = {
-    MortonCopy<false, PixelFormat::RGBA8>,
-    MortonCopy<false, PixelFormat::DXT1>,
+static constexpr std::array<void (*)(u32, u32, u32, u8*, VAddr, VAddr, VAddr), 2> gl_to_morton_fns =
+    {
+        MortonCopy<false, PixelFormat::RGBA8>,
+        // TODO(Subv): Swizzling the DXT1 format is not yet supported
+        nullptr,
 };
 
 // Allocate an uninitialized texture of appropriate size and format for the surface
@@ -311,15 +308,16 @@ MathUtil::Rectangle<u32> SurfaceParams::GetScaledSubRect(const SurfaceParams& su
 
 bool SurfaceParams::ExactMatch(const SurfaceParams& other_surface) const {
     return std::tie(other_surface.addr, other_surface.width, other_surface.height,
-                    other_surface.stride, other_surface.pixel_format, other_surface.is_tiled) ==
-               std::tie(addr, width, height, stride, pixel_format, is_tiled) &&
+                    other_surface.stride, other_surface.block_height, other_surface.pixel_format,
+                    other_surface.is_tiled) ==
+               std::tie(addr, width, height, stride, block_height, pixel_format, is_tiled) &&
            pixel_format != PixelFormat::Invalid;
 }
 
 bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const {
     return sub_surface.addr >= addr && sub_surface.end <= end &&
            sub_surface.pixel_format == pixel_format && pixel_format != PixelFormat::Invalid &&
-           sub_surface.is_tiled == is_tiled &&
+           sub_surface.is_tiled == is_tiled && sub_surface.block_height == block_height &&
            (sub_surface.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 &&
            (sub_surface.stride == stride || sub_surface.height <= (is_tiled ? 8u : 1u)) &&
            GetSubRect(sub_surface).left + sub_surface.width <= stride;
@@ -328,7 +326,8 @@ bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const {
 bool SurfaceParams::CanExpand(const SurfaceParams& expanded_surface) const {
     return pixel_format != PixelFormat::Invalid && pixel_format == expanded_surface.pixel_format &&
            addr <= expanded_surface.end && expanded_surface.addr <= end &&
-           is_tiled == expanded_surface.is_tiled && stride == expanded_surface.stride &&
+           is_tiled == expanded_surface.is_tiled && block_height == expanded_surface.block_height &&
+           stride == expanded_surface.stride &&
            (std::max(expanded_surface.addr, addr) - std::min(expanded_surface.addr, addr)) %
                    BytesInPixels(stride * (is_tiled ? 8 : 1)) ==
                0;
@@ -339,6 +338,9 @@ bool SurfaceParams::CanTexCopy(const SurfaceParams& texcopy_params) const {
         end < texcopy_params.end) {
         return false;
     }
+    if (texcopy_params.block_height != block_height)
+        return false;
+
     if (texcopy_params.width != texcopy_params.stride) {
         const u32 tile_stride = static_cast<u32>(BytesInPixels(stride * (is_tiled ? 8 : 1)));
         return (texcopy_params.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 &&
@@ -481,18 +483,13 @@ void CachedSurface::LoadGLBuffer(VAddr load_start, VAddr load_end) {
     const u64 start_offset = load_start - addr;
 
     if (!is_tiled) {
-        ASSERT(type == SurfaceType::Color);
         const u32 bytes_per_pixel{GetFormatBpp() >> 3};
 
-        // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check
-        // the configuration for this and perform more generic un/swizzle
-        LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!");
-        VideoCore::MortonCopyPixels128(width, height, bytes_per_pixel, 4,
-                                       texture_src_data + start_offset, &gl_buffer[start_offset],
-                                       true);
+        std::memcpy(&gl_buffer[start_offset], texture_src_data + start_offset,
+                    bytes_per_pixel * width * height);
     } else {
-        morton_to_gl_fns[static_cast<size_t>(pixel_format)](stride, height, &gl_buffer[0], addr,
-                                                            load_start, load_end);
+        morton_to_gl_fns[static_cast<size_t>(pixel_format)](
+            stride, block_height, height, &gl_buffer[0], addr, load_start, load_end);
     }
 }
 
@@ -533,11 +530,10 @@ void CachedSurface::FlushGLBuffer(VAddr flush_start, VAddr flush_end) {
         if (backup_bytes)
             std::memcpy(&dst_buffer[coarse_start_offset], &backup_data[0], backup_bytes);
     } else if (!is_tiled) {
-        ASSERT(type == SurfaceType::Color);
         std::memcpy(dst_buffer + start_offset, &gl_buffer[start_offset], flush_end - flush_start);
     } else {
-        gl_to_morton_fns[static_cast<size_t>(pixel_format)](stride, height, &gl_buffer[0], addr,
-                                                            flush_start, flush_end);
+        gl_to_morton_fns[static_cast<size_t>(pixel_format)](
+            stride, block_height, height, &gl_buffer[0], addr, flush_start, flush_end);
     }
 }
 
@@ -1103,6 +1099,8 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(
     color_params.res_scale = resolution_scale_factor;
     color_params.width = config.width;
     color_params.height = config.height;
+    // TODO(Subv): Can framebuffers use a different block height?
+    color_params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight;
     SurfaceParams depth_params = color_params;
 
     color_params.addr = memory_manager->PhysicalToVirtualAddress(config.Address());
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 0b2e3ffef..26d6c3061 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -225,6 +225,7 @@ struct SurfaceParams {
     u32 width = 0;
     u32 height = 0;
     u32 stride = 0;
+    u32 block_height = 0;
     u16 res_scale = 1;
 
     bool is_tiled = false;

From 48d4efbd696d1dbd5330d74e69a52f8e508d279d Mon Sep 17 00:00:00 2001
From: Subv <subv2112@gmail.com>
Date: Sun, 15 Apr 2018 19:56:07 -0500
Subject: [PATCH 5/5] GPU: Pitch textures are now supported, don't assert when
 encountering them.

---
 src/video_core/engines/maxwell_3d.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index a2f162602..2a3ff234a 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -218,8 +218,9 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
     Texture::TICEntry tic_entry;
     Memory::ReadBlock(tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry));
 
-    ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear,
-               "TIC versions other than BlockLinear are unimplemented");
+    ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear ||
+                   tic_entry.header_version == Texture::TICHeaderVersion::Pitch,
+               "TIC versions other than BlockLinear or Pitch are unimplemented");
 
     ASSERT_MSG((tic_entry.texture_type == Texture::TextureType::Texture2D) ||
                    (tic_entry.texture_type == Texture::TextureType::Texture2DNoMipmap),