mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-11-17 08:11:51 +00:00
rsx: Import, rebase and clean up the old detiling patches from 2021
This commit is contained in:
parent
5b46db5e6b
commit
3afc379746
186
rpcs3/Emu/RSX/Common/tiled_dma_copy.hpp
Normal file
186
rpcs3/Emu/RSX/Common/tiled_dma_copy.hpp
Normal file
@ -0,0 +1,186 @@
|
||||
#pragma once
|
||||
|
||||
#include <util/types.hpp>
|
||||
#include <cstdint>
|
||||
|
||||
// This is a 1:1 port of the GPU code for my own sanity when debugging misplaced bits
|
||||
// For a high-level explanation, read https://envytools.readthedocs.io/en/latest/hw/memory/vram.html
|
||||
namespace rsx
|
||||
{
|
||||
struct detiler_config
|
||||
{
|
||||
uint32_t prime;
|
||||
uint32_t factor;
|
||||
uint32_t num_tiles_per_row;
|
||||
uint32_t tile_base_address;
|
||||
uint32_t tile_size;
|
||||
uint32_t tile_offset;
|
||||
uint32_t tile_pitch;
|
||||
uint32_t tile_bank;
|
||||
uint32_t image_width;
|
||||
uint32_t image_height;
|
||||
uint32_t image_bpp;
|
||||
};
|
||||
|
||||
#define RSX_TILE_WIDTH 256
|
||||
#define RSX_TILE_HEIGHT 64
|
||||
#define RSX_DMA_OP_ENCODE_TILE 0
|
||||
#define RSX_DMA_OP_DECODE_TILE 1
|
||||
|
||||
static void tiled_dma_copy(const uint32_t row, const uint32_t col, const detiler_config& conf, char* tiled_data, char* linear_data, int direction)
|
||||
{
|
||||
const uint32_t row_offset = (row * conf.tile_pitch) + conf.tile_base_address + conf.tile_offset;
|
||||
const uint32_t this_address = row_offset + (col * conf.image_bpp);
|
||||
|
||||
// 1. Calculate row_addr
|
||||
const uint32_t texel_offset = (this_address - conf.tile_base_address) / RSX_TILE_WIDTH;
|
||||
// Calculate coordinate of the tile grid we're supposed to be in
|
||||
const uint32_t tile_x = texel_offset % conf.num_tiles_per_row;
|
||||
const uint32_t tile_y = (texel_offset / conf.num_tiles_per_row) / RSX_TILE_HEIGHT;
|
||||
// Calculate the grid offset for the tile selected and add the base offset. It's supposed to affect the bank stuff in the next step
|
||||
const uint32_t tile_id = tile_y * conf.num_tiles_per_row + tile_x;
|
||||
const uint32_t tile_selector = (tile_id + (conf.tile_base_address >> 14)) & 0x3ffff;
|
||||
// Calculate row address
|
||||
const uint32_t row_address = (tile_selector >> 2) & 0xffff;
|
||||
|
||||
// 2. Calculate bank selector
|
||||
// There's a lot of weird math here, but it's just a variant of (tile_selector % 4) to pick a value between [0..3]
|
||||
uint32_t bank_selector = 0;
|
||||
const uint32_t bank_distribution_lookup[16] = { 0, 1, 2, 3, 2, 3, 0, 1, 1, 2, 3, 0, 3, 0, 1, 2 };
|
||||
|
||||
if (conf.factor == 1)
|
||||
{
|
||||
bank_selector = (tile_selector & 3);
|
||||
}
|
||||
else if (conf.factor == 2)
|
||||
{
|
||||
const uint32_t idx = ((tile_selector + ((tile_y & 1) << 1)) & 3) * 4 + (tile_y & 3);
|
||||
bank_selector = bank_distribution_lookup[idx];
|
||||
}
|
||||
else if (conf.factor >= 4)
|
||||
{
|
||||
const uint32_t idx = (tile_selector & 3) * 4 + (tile_y & 3);
|
||||
bank_selector = bank_distribution_lookup[idx];
|
||||
}
|
||||
bank_selector = (bank_selector + conf.tile_bank) & 3;
|
||||
|
||||
// 3. Calculate column selector
|
||||
uint32_t column_selector = 0;
|
||||
const uint32_t line_offset_in_tile = (texel_offset / conf.num_tiles_per_row) % RSX_TILE_HEIGHT;
|
||||
// Calculate column_selector by bit-twiddling line offset and the other calculated parameter bits:
|
||||
// column_selector[9:7] = line_offset_in_tile[5:3]
|
||||
// column_selector[6:4] = this_address[7:5]
|
||||
// column_selector[3:2] = line_offset_in_tile[1:0]
|
||||
// column_selector[1:0] = 0
|
||||
column_selector |= ((line_offset_in_tile >> 3) & 0x7) << 7;
|
||||
column_selector |= ((this_address >> 5) & 0x7) << 4;
|
||||
column_selector |= ((line_offset_in_tile >> 0) & 0x3) << 2;
|
||||
|
||||
// 4. Calculate partition selector (0 or 1)
|
||||
const uint32_t partition_selector = (((line_offset_in_tile >> 2) & 1) + ((this_address >> 6) & 1)) & 1;
|
||||
|
||||
// 5. Build tiled address
|
||||
uint32_t tile_address = 0;
|
||||
// tile_address[31:16] = row_adr[15:0]
|
||||
// tile_address[15:14] = bank_sel[1:0]
|
||||
// tile_address[13:8] = column_sel[9:4]
|
||||
// tile_address[7:7] = partition_sel[0:0]
|
||||
// tile_address[6:5] = column_sel[3:2]
|
||||
// tile_address[4:0] = this_address[4:0]
|
||||
tile_address |= ((row_address >> 0) & 0xFFFF) << 16;
|
||||
tile_address |= ((bank_selector >> 0) & 0x3) << 14;
|
||||
tile_address |= ((column_selector >> 4) & 0x3F) << 8;
|
||||
tile_address |= ((partition_selector >> 0) & 0x1) << 7;
|
||||
tile_address |= ((column_selector >> 2) & 0x3) << 5;
|
||||
tile_address |= ((this_address >> 0) & 0x1F) << 0;
|
||||
// Twiddle bits 9 and 10
|
||||
tile_address ^= (((tile_address >> 12) ^ ((bank_selector ^ tile_selector) & 1) ^ (tile_address >> 14)) & 1) << 9;
|
||||
tile_address ^= ((tile_address >> 11) & 1) << 10;
|
||||
|
||||
// Calculate relative addresses and sample
|
||||
uint32_t linear_image_offset = (row * conf.tile_pitch) + (col * conf.image_bpp);
|
||||
uint32_t tile_data_offset = tile_address - (conf.tile_base_address + conf.tile_offset);
|
||||
|
||||
if (tile_data_offset >= conf.tile_size)
|
||||
{
|
||||
// Do not touch anything out of bounds
|
||||
return;
|
||||
}
|
||||
|
||||
if (direction == RSX_DMA_OP_ENCODE_TILE)
|
||||
{
|
||||
std::memcpy(tiled_data + tile_data_offset, linear_data + linear_image_offset, conf.image_bpp);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::memcpy(linear_data + linear_image_offset, tiled_data + tile_data_offset, conf.image_bpp);
|
||||
}
|
||||
}
|
||||
|
||||
// Entry point. In GPU code this is handled by dispatch + main
|
||||
template <typename T, bool Reverse = false>
|
||||
void tile_texel_data(void* dst, const void* src, uint32_t base_address, uint32_t base_offset, uint32_t tile_size, uint8_t bank_sense, uint16_t row_pitch_in_bytes, uint16_t image_width, uint16_t image_height)
|
||||
{
|
||||
// Some constants
|
||||
auto get_prime_factor = [](uint32_t pitch) -> std::pair<uint32_t, uint32_t>
|
||||
{
|
||||
const uint32_t base = (pitch >> 8);
|
||||
if ((pitch & (pitch - 1)) == 0)
|
||||
{
|
||||
return { 1u, base };
|
||||
}
|
||||
|
||||
for (const auto prime : { 3, 5, 7, 11, 13 })
|
||||
{
|
||||
if ((base % prime) == 0)
|
||||
{
|
||||
return { prime, base / prime };
|
||||
}
|
||||
}
|
||||
|
||||
// rsx_log.error("Unexpected pitch value 0x%x", pitch);
|
||||
return {};
|
||||
};
|
||||
|
||||
const auto [prime, factor] = get_prime_factor(row_pitch_in_bytes);
|
||||
const uint32_t tiles_per_row = prime * factor;
|
||||
constexpr int op = Reverse ? RSX_DMA_OP_DECODE_TILE : RSX_DMA_OP_ENCODE_TILE;
|
||||
|
||||
auto src2 = static_cast<char*>(const_cast<void*>(src));
|
||||
auto dst2 = static_cast<char*>(dst);
|
||||
|
||||
const detiler_config dconf = {
|
||||
.prime = prime,
|
||||
.factor = factor,
|
||||
.num_tiles_per_row = tiles_per_row,
|
||||
.tile_base_address = base_address,
|
||||
.tile_size = tile_size,
|
||||
.tile_offset = base_offset,
|
||||
.tile_pitch = row_pitch_in_bytes,
|
||||
.tile_bank = bank_sense,
|
||||
.image_width = image_width,
|
||||
.image_height = image_height,
|
||||
.image_bpp = sizeof(T)
|
||||
};
|
||||
|
||||
for (u16 row = 0; row < image_height; ++row)
|
||||
{
|
||||
for (u16 col = 0; col < image_width; ++col)
|
||||
{
|
||||
if constexpr (op == RSX_DMA_OP_DECODE_TILE)
|
||||
{
|
||||
tiled_dma_copy(row, col, dconf, src2, dst2, op);
|
||||
}
|
||||
else
|
||||
{
|
||||
tiled_dma_copy(row, col, dconf, dst2, src2, op);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#undef RSX_TILE_WIDTH
|
||||
#undef RSX_TILE_HEIGHT
|
||||
#undef RSX_DMA_OP_ENCODE_TILE
|
||||
#undef RSX_DMA_OP_DECODE_TILE
|
||||
}
|
@ -14,6 +14,11 @@
|
||||
extern atomic_t<bool> g_user_asked_for_screenshot;
|
||||
extern atomic_t<recording_mode> g_recording_mode;
|
||||
|
||||
namespace vk
|
||||
{
|
||||
u32 g_debug_vis_address = 0;
|
||||
}
|
||||
|
||||
void VKGSRender::reinitialize_swapchain()
|
||||
{
|
||||
m_swapchain_dims.width = m_frame->client_width();
|
||||
@ -477,7 +482,18 @@ void VKGSRender::flip(const rsx::display_flip_info_t& info)
|
||||
present_info.format = av_format;
|
||||
present_info.address = rsx::get_address(display_buffers[info.buffer].offset, CELL_GCM_LOCATION_LOCAL);
|
||||
|
||||
image_to_flip = get_present_source(&present_info, avconfig);
|
||||
if (vk::g_debug_vis_address)
|
||||
{
|
||||
//std::vector<u8> temp_data(5120 * 1024);
|
||||
//std::memcpy(temp_data.data(), vm::get_super_ptr(vk::g_debug_vis_address), 5120 * 1024);
|
||||
//rsx::untile_texel_data<u32>(vm::get_super_ptr(vk::g_debug_vis_address), temp_data.data(), vk::g_debug_vis_address, 0, 0, 5120, 1280, 720);
|
||||
image_to_flip = m_texture_cache.upload_image_simple(*m_current_command_buffer, VK_FORMAT_B8G8R8A8_UNORM, vk::g_debug_vis_address, 1280, 720, 5120);
|
||||
vk::g_debug_vis_address = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
image_to_flip = get_present_source(&present_info, avconfig);
|
||||
}
|
||||
|
||||
if (avconfig.stereo_mode != stereo_render_mode_options::disabled) [[unlikely]]
|
||||
{
|
||||
|
@ -1,6 +1,9 @@
|
||||
#include "VKRenderTargets.h"
|
||||
#include "VKResourceManager.h"
|
||||
#include "Emu/RSX/rsx_methods.h"
|
||||
#include "Emu/RSX/RSXThread.h"
|
||||
|
||||
#include "Emu/RSX/Common/tiled_dma_copy.hpp"
|
||||
|
||||
namespace vk
|
||||
{
|
||||
@ -678,6 +681,28 @@ namespace vk
|
||||
subres.depth = 1;
|
||||
subres.data = { vm::get_super_ptr<const std::byte>(base_addr), static_cast<std::span<const std::byte>::size_type>(rsx_pitch * surface_height * samples_y) };
|
||||
|
||||
// FIXME: Move to GPU queue
|
||||
std::vector<std::byte> ext_data;
|
||||
const auto range = get_memory_range();
|
||||
|
||||
if (auto region = rsx::get_current_renderer()->get_tiled_memory_region(range))
|
||||
{
|
||||
auto real_data = vm::get_super_ptr<u8>(range.start);
|
||||
ext_data.resize(region.tile->size);
|
||||
rsx::tile_texel_data<u32, true>(
|
||||
ext_data.data(),
|
||||
real_data,
|
||||
region.base_address,
|
||||
range.start - region.base_address,
|
||||
region.tile->size,
|
||||
region.tile->bank,
|
||||
region.tile->pitch,
|
||||
subres.width_in_block,
|
||||
subres.height_in_block
|
||||
);
|
||||
subres.data = ext_data;
|
||||
}
|
||||
|
||||
if (g_cfg.video.resolution_scale_percent == 100 && spp == 1) [[likely]]
|
||||
{
|
||||
push_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
|
||||
|
@ -1009,7 +1009,7 @@ namespace vk
|
||||
{
|
||||
caps.supports_byteswap = (image_linear_size >= 1024);
|
||||
caps.supports_hw_deswizzle = caps.supports_byteswap;
|
||||
caps.supports_zero_copy = caps.supports_byteswap;
|
||||
caps.supports_zero_copy = false;// caps.supports_byteswap;
|
||||
caps.supports_vtc_decoding = false;
|
||||
check_caps = false;
|
||||
}
|
||||
|
@ -1333,7 +1333,7 @@ namespace vk
|
||||
|
||||
void* mem = image->memory->map(0, layout.rowPitch * height);
|
||||
|
||||
auto src = vm::_ptr<const char>(address);
|
||||
auto src = vm::get_super_ptr<const char>(address);
|
||||
auto dst = static_cast<char*>(mem);
|
||||
|
||||
// TODO: SSE optimization
|
||||
|
@ -8,11 +8,18 @@
|
||||
#include "vkutils/image_helpers.h"
|
||||
|
||||
#include "../Common/texture_cache.h"
|
||||
|
||||
#include "Emu/Cell/timers.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#define DEBUG_DMA_TILING 1
|
||||
|
||||
#if DEBUG_DMA_TILING
|
||||
#include "../Common/tiled_dma_copy.hpp"
|
||||
#endif
|
||||
|
||||
namespace vk
|
||||
{
|
||||
class cached_texture_section;
|
||||
@ -286,6 +293,30 @@ namespace vk
|
||||
const auto range = (context == rsx::texture_upload_context::framebuffer_storage) ? get_section_range() : get_confirmed_range();
|
||||
vk::flush_dma(range.start, range.length());
|
||||
|
||||
#if DEBUG_DMA_TILING
|
||||
// Are we a tiled region?
|
||||
if (const auto tiled_region = rsx::get_current_renderer()->get_tiled_memory_region(range))
|
||||
{
|
||||
auto real_data = vm::get_super_ptr<u8>(range.start);
|
||||
auto out_data = std::vector<u8>(tiled_region.tile->size);
|
||||
rsx::tile_texel_data<u32>(
|
||||
out_data.data(),
|
||||
real_data,
|
||||
tiled_region.base_address,
|
||||
range.start - tiled_region.base_address,
|
||||
tiled_region.tile->size,
|
||||
tiled_region.tile->bank,
|
||||
tiled_region.tile->pitch,
|
||||
width,
|
||||
height
|
||||
);
|
||||
const auto available_tile_size = tiled_region.tile->size - (range.start - tiled_region.base_address);
|
||||
const auto max_content_size = tiled_region.tile->pitch * utils::align(height, 64);
|
||||
const auto write_length = std::min(max_content_size, available_tile_size);
|
||||
std::memcpy(real_data, out_data.data(), write_length);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (is_swizzled())
|
||||
{
|
||||
// This format is completely worthless to CPU processing algorithms where cache lines on die are linear.
|
||||
|
@ -288,7 +288,9 @@ namespace rsx
|
||||
|
||||
static inline u32 get_location(u32 addr)
|
||||
{
|
||||
return (addr >= rsx::constants::local_mem_base) ?
|
||||
// We don't really care about the actual memory map, it shouldn't be possible to use the mmio bar region anyway
|
||||
constexpr address_range local_mem_range = address_range::start_length(rsx::constants::local_mem_base, 0x1000'0000);
|
||||
return local_mem_range.overlaps(addr) ?
|
||||
CELL_GCM_LOCATION_LOCAL :
|
||||
CELL_GCM_LOCATION_MAIN;
|
||||
}
|
||||
|
@ -550,6 +550,7 @@
|
||||
<ClInclude Include="Emu\perf_monitor.hpp" />
|
||||
<ClInclude Include="Emu\RSX\Common\bitfield.hpp" />
|
||||
<ClInclude Include="Emu\RSX\Common\buffer_stream.hpp" />
|
||||
<ClInclude Include="Emu\RSX\Common\tiled_dma_copy.hpp" />
|
||||
<ClInclude Include="Emu\RSX\Common\expected.hpp" />
|
||||
<ClInclude Include="Emu\RSX\Common\profiling_timer.hpp" />
|
||||
<ClInclude Include="Emu\RSX\Common\ranged_map.hpp" />
|
||||
@ -905,6 +906,7 @@
|
||||
<None Include="Emu\RSX\Program\GLSLSnippets\GPUDeswizzle.glsl" />
|
||||
<None Include="Emu\RSX\Program\GLSLSnippets\OverlayRenderFS.glsl" />
|
||||
<None Include="Emu\RSX\Program\GLSLSnippets\OverlayRenderVS.glsl" />
|
||||
<None Include="Emu\RSX\Program\GLSLSnippets\RSXMemoryTiling.glsl" />
|
||||
<None Include="Emu\RSX\Program\GLSLSnippets\RSXProg\RSXDefines2.glsl" />
|
||||
<None Include="Emu\RSX\Program\GLSLSnippets\RSXProg\RSXFragmentPrologue.glsl" />
|
||||
<None Include="Emu\RSX\Program\GLSLSnippets\RSXProg\RSXFragmentTextureDepthConversion.glsl" />
|
||||
|
@ -2436,5 +2436,8 @@
|
||||
<None Include="Emu\RSX\Program\GLSLSnippets\RSXProg\RSXVertexFetch.glsl">
|
||||
<Filter>Emu\GPU\RSX\Program\Snippets\RSXProg</Filter>
|
||||
</None>
|
||||
<None Include="Emu\RSX\Program\GLSLSnippets\RSXMemoryTiling.glsl">
|
||||
<Filter>Emu\GPU\RSX\Program\Snippets</Filter>
|
||||
</None>
|
||||
</ItemGroup>
|
||||
</Project>
|
Loading…
Reference in New Issue
Block a user