mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-01-30 12:32:43 +00:00
rsx/vk: Initial hw-accelerated tile decoder
This commit is contained in:
parent
6a7386ddb8
commit
647f7ddeec
349
rpcs3/Emu/RSX/Program/GLSLSnippets/RSXMemoryTiling.glsl
Normal file
349
rpcs3/Emu/RSX/Program/GLSLSnippets/RSXMemoryTiling.glsl
Normal file
@ -0,0 +1,349 @@
|
||||
R"(
|
||||
#version 450
|
||||
layout(local_size_x = %ws, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
#define SSBO_LOCATION(x) (x + %loc)
|
||||
|
||||
#define MEMORY_OP %op
|
||||
#define MEMORY_OP_DETILE 0
|
||||
#define MEMORY_OP_TILE 1
|
||||
|
||||
#if (MEMORY_OP == MEMORY_OP_TILE)
|
||||
#define TILED_DATA_MODIFIER
|
||||
#define LINEAR_DATA_MODIFIER readonly
|
||||
#else
|
||||
#define TILED_DATA_MODIFIER readonly
|
||||
#define LINEAR_DATA_MODIFIER
|
||||
#endif
|
||||
|
||||
layout(%set, binding=SSBO_LOCATION(0), std430) TILED_DATA_MODIFIER restrict buffer TiledDataBlock
|
||||
{
|
||||
uint tiled_data[];
|
||||
};
|
||||
|
||||
layout(%set, binding=SSBO_LOCATION(1), std430) LINEAR_DATA_MODIFIER restrict buffer LinearDataBlock
|
||||
{
|
||||
uint linear_data[];
|
||||
};
|
||||
|
||||
#ifdef VULKAN
|
||||
layout(%push_block) uniform Configuration
|
||||
{
|
||||
uint prime;
|
||||
uint factor;
|
||||
uint num_tiles_per_row;
|
||||
uint tile_base_address;
|
||||
uint tile_size;
|
||||
uint tile_offset;
|
||||
uint tile_pitch;
|
||||
uint tile_bank;
|
||||
uint image_width;
|
||||
uint image_height;
|
||||
uint image_bpp;
|
||||
};
|
||||
#else
|
||||
uniform uint prime;
|
||||
uniform uint factor;
|
||||
uniform uint num_tiles_per_row;
|
||||
uniform uint tile_base_address;
|
||||
uniform uint tile_size;
|
||||
uniform uint tile_offset;
|
||||
uniform uint tile_pitch;
|
||||
uniform uint tile_bank;
|
||||
uniform uint image_width;
|
||||
uniform uint image_height;
|
||||
uniform uint image_bpp;
|
||||
#endif
|
||||
|
||||
// Constants
|
||||
#define RSX_TILE_WIDTH 256
|
||||
#define RSX_TILE_HEIGHT 64
|
||||
|
||||
#if (MEMORY_OP == MEMORY_OP_TILE)
|
||||
|
||||
uvec4 read_linear(const in uint offset)
|
||||
{
|
||||
switch (image_bpp)
|
||||
{
|
||||
case 16:
|
||||
{
|
||||
return uvec4(
|
||||
linear_data[offset * 4],
|
||||
linear_data[offset * 4 + 1],
|
||||
linear_data[offset * 4 + 2],
|
||||
linear_data[offset * 4 + 3]);
|
||||
}
|
||||
case 8:
|
||||
{
|
||||
return uvec4(
|
||||
linear_data[offset * 2],
|
||||
linear_data[offset * 2 + 1],
|
||||
0,
|
||||
0);
|
||||
}
|
||||
case 4:
|
||||
{
|
||||
return uvec4(linear_data[offset], 0, 0, 0);
|
||||
}
|
||||
case 2:
|
||||
{
|
||||
const uint word = linear_data[offset >> 1];
|
||||
const int shift = int(offset & 1) << 4;
|
||||
return uvec4(bitfieldExtract(word, shift, 16), 0, 0, 0);
|
||||
}
|
||||
case 1:
|
||||
{
|
||||
const uint word = linear_data[offset >> 2];
|
||||
const int shift = int(offset & 3) << 3;
|
||||
return uvec4(bitfieldExtract(word, shift, 8), 0, 0, 0);
|
||||
}
|
||||
default:
|
||||
return uvec4(0);
|
||||
}
|
||||
}
|
||||
|
||||
void write_tiled(const in uint offset, const in uvec4 value)
|
||||
{
|
||||
switch (image_bpp)
|
||||
{
|
||||
case 16:
|
||||
{
|
||||
tiled_data[offset * 4] = value.x;
|
||||
tiled_data[offset * 4 + 1] = value.y;
|
||||
tiled_data[offset * 4 + 2] = value.z;
|
||||
tiled_data[offset * 4 + 3] = value.w;
|
||||
break;
|
||||
}
|
||||
case 8:
|
||||
{
|
||||
tiled_data[offset * 2] = value.x;
|
||||
tiled_data[offset * 2 + 1] = value.y;
|
||||
break;
|
||||
}
|
||||
case 4:
|
||||
{
|
||||
tiled_data[offset] = value.x;
|
||||
break;
|
||||
}
|
||||
case 2:
|
||||
{
|
||||
const uint word_offset = offset >> 1;
|
||||
const uint word = tiled_data[word_offset];
|
||||
const int shift = int(offset & 1) << 4;
|
||||
tiled_data[word_offset] = bitfieldInsert(word, value.x, shift, 16);
|
||||
break;
|
||||
}
|
||||
case 1:
|
||||
{
|
||||
const uint word_offset = offset >> 2;
|
||||
const uint word = tiled_data[word_offset];
|
||||
const int shift = int(offset & 3) << 3;
|
||||
tiled_data[word_offset] = bitfieldInsert(word, value.x, shift, 8);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
uvec4 read_tiled(const in uint offset)
|
||||
{
|
||||
switch (image_bpp)
|
||||
{
|
||||
case 16:
|
||||
{
|
||||
return uvec4(
|
||||
tiled_data[offset * 4],
|
||||
tiled_data[offset * 4 + 1],
|
||||
tiled_data[offset * 4 + 2],
|
||||
tiled_data[offset * 4 + 3]);
|
||||
}
|
||||
case 8:
|
||||
{
|
||||
return uvec4(
|
||||
tiled_data[offset * 2],
|
||||
tiled_data[offset * 2 + 1],
|
||||
0,
|
||||
0);
|
||||
}
|
||||
case 4:
|
||||
{
|
||||
return uvec4(tiled_data[offset], 0, 0, 0);
|
||||
}
|
||||
case 2:
|
||||
{
|
||||
const uint word = tiled_data[offset >> 1];
|
||||
const int shift = int(offset & 1) << 4;
|
||||
return uvec4(bitfieldExtract(word, shift, 16), 0, 0, 0);
|
||||
}
|
||||
case 1:
|
||||
{
|
||||
const uint word = tiled_data[offset >> 2];
|
||||
const int shift = int(offset & 3) << 3;
|
||||
return uvec4(bitfieldExtract(word, shift, 8), 0, 0, 0);
|
||||
}
|
||||
default:
|
||||
return uvec4(0);
|
||||
}
|
||||
}
|
||||
|
||||
void write_linear(const in uint offset, const in uvec4 value)
|
||||
{
|
||||
switch (image_bpp)
|
||||
{
|
||||
case 16:
|
||||
{
|
||||
linear_data[offset * 4] = value.x;
|
||||
linear_data[offset * 4 + 1] = value.y;
|
||||
linear_data[offset * 4 + 2] = value.z;
|
||||
linear_data[offset * 4 + 3] = value.w;
|
||||
break;
|
||||
}
|
||||
case 8:
|
||||
{
|
||||
linear_data[offset * 2] = value.x;
|
||||
linear_data[offset * 2 + 1] = value.y;
|
||||
break;
|
||||
}
|
||||
case 4:
|
||||
{
|
||||
linear_data[offset] = value.x;
|
||||
break;
|
||||
}
|
||||
case 2:
|
||||
{
|
||||
const uint word_offset = offset >> 1;
|
||||
const uint word = linear_data[word_offset];
|
||||
const int shift = int(offset & 1) << 4;
|
||||
linear_data[word_offset] = bitfieldInsert(word, value.x, shift, 16);
|
||||
break;
|
||||
}
|
||||
case 1:
|
||||
{
|
||||
const uint word_offset = offset >> 2;
|
||||
const uint word = linear_data[word_offset];
|
||||
const int shift = int(offset & 3) << 3;
|
||||
linear_data[word_offset] = bitfieldInsert(word, value.x, shift, 8);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
void do_memory_op(const in uint row, const in uint col)
|
||||
{
|
||||
const uint row_offset = (row * tile_pitch) + tile_base_address + tile_offset;
|
||||
const uint this_address = row_offset + (col * image_bpp);
|
||||
|
||||
// 1. Calculate row_addr
|
||||
const uint texel_offset = (this_address - tile_base_address) / RSX_TILE_WIDTH;
|
||||
// Calculate coordinate of the tile grid we're supposed to be in
|
||||
const uint tile_x = texel_offset % num_tiles_per_row;
|
||||
const uint tile_y = (texel_offset / num_tiles_per_row) / RSX_TILE_HEIGHT;
|
||||
// Calculate the grid offset for the tile selected and add the base offset. It's supposed to affect the bank stuff in the next step
|
||||
const uint tile_id = tile_y * num_tiles_per_row + tile_x;
|
||||
const uint tile_selector = (tile_id + (tile_base_address >> 14)) & 0x3ffff;
|
||||
// Calculate row address
|
||||
const uint row_address = (tile_selector >> 2) & 0xffff;
|
||||
|
||||
// 2. Calculate bank selector
|
||||
// There's a lot of weird math here, but it's just a variant of (tile_selector % 4) to pick a value between [0..3]
|
||||
uint bank_selector = 0;
|
||||
const uint bank_distribution_lookup[16] = { 0, 1, 2, 3, 2, 3, 0, 1, 1, 2, 3, 0, 3, 0, 1, 2 };
|
||||
|
||||
if (factor == 1)
|
||||
{
|
||||
bank_selector = (tile_selector & 3);
|
||||
}
|
||||
else if (factor == 2)
|
||||
{
|
||||
const uint idx = ((tile_selector + ((tile_y & 1) << 1)) & 3) * 4 + (tile_y & 3);
|
||||
bank_selector = bank_distribution_lookup[idx];
|
||||
}
|
||||
else if (factor >= 4)
|
||||
{
|
||||
const uint idx = (tile_selector & 3) * 4 + (tile_y & 3);
|
||||
bank_selector = bank_distribution_lookup[idx];
|
||||
}
|
||||
bank_selector = (bank_selector + tile_bank) % 4;
|
||||
|
||||
// 3. Calculate column selector
|
||||
uint column_selector = 0;
|
||||
const uint line_offset_in_tile = (texel_offset / num_tiles_per_row) % RSX_TILE_HEIGHT;
|
||||
// Calculate column_selector by bit-twiddling line offset and the other calculated parameter bits:
|
||||
// column_selector[9:7] = line_offset_in_tile[5:3]
|
||||
// column_selector[6:4] = this_address[7:5]
|
||||
// column_selector[3:2] = line_offset_in_tile[1:0]
|
||||
// column_selector[1:0] = 0
|
||||
column_selector |= ((line_offset_in_tile >> 3) & 0x7) << 7;
|
||||
column_selector |= ((this_address >> 5) & 0x7) << 4;
|
||||
column_selector |= ((line_offset_in_tile >> 0) & 0x3) << 2;
|
||||
|
||||
// 4. Calculate partition selector (0 or 1)
|
||||
const uint partition_selector = (((line_offset_in_tile >> 2) & 1) + ((this_address >> 6) & 1)) & 1;
|
||||
|
||||
// 5. Build tiled address
|
||||
uint tile_address = 0;
|
||||
// tile_address[31:16] = row_adr[15:0]
|
||||
// tile_address[15:14] = bank_sel[1:0]
|
||||
// tile_address[13:8] = column_sel[9:4]
|
||||
// tile_address[7:7] = partition_sel[0:0]
|
||||
// tile_address[6:5] = column_sel[3:2]
|
||||
// tile_address[4:0] = this_address[4:0]
|
||||
tile_address |= ((row_address >> 0) & 0xFFFF) << 16;
|
||||
tile_address |= ((bank_selector >> 0) & 0x3) << 14;
|
||||
tile_address |= ((column_selector >> 4) & 0x3F) << 8;
|
||||
tile_address |= ((partition_selector >> 0) & 0x1) << 7;
|
||||
tile_address |= ((column_selector >> 2) & 0x3) << 5;
|
||||
tile_address |= ((this_address >> 0) & 0x1F) << 0;
|
||||
// Twiddle bits 9 and 10
|
||||
tile_address ^= (((tile_address >> 12) ^ ((bank_selector ^ tile_selector) & 1) ^ (tile_address >> 14)) & 1) << 9;
|
||||
tile_address ^= ((tile_address >> 11) & 1) << 10;
|
||||
|
||||
// Calculate relative addresses and sample
|
||||
uint linear_image_offset = (row * tile_pitch) + (col * image_bpp);
|
||||
uint tile_data_offset = tile_address - (tile_base_address + tile_offset);
|
||||
|
||||
if (tile_data_offset >= tile_size)
|
||||
{
|
||||
// Do not touch anything out of bounds
|
||||
return;
|
||||
}
|
||||
|
||||
// Convert to texel addresses for data access
|
||||
linear_image_offset /= image_bpp;
|
||||
tile_data_offset /= image_bpp;
|
||||
|
||||
#if (MEMORY_OP == MEMORY_OP_DETILE)
|
||||
// Write to linear from tiled
|
||||
write_linear(linear_image_offset, read_tiled(tile_data_offset));
|
||||
#else
|
||||
// Opposite. Write to tile from linear
|
||||
write_tiled(tile_data_offset, read_linear(linear_image_offset));
|
||||
#endif
|
||||
}
|
||||
|
||||
void main()
|
||||
{
|
||||
// The 2D coordinates are retrieved from gl_GlobalInvocationID
|
||||
const uint num_iterations = (image_bpp < 4) ? (4 / image_bpp) : 1;
|
||||
const uint row = gl_GlobalInvocationID.y;
|
||||
const uint col0 = gl_GlobalInvocationID.x;
|
||||
|
||||
// for (uint col = col0; col < (col0 + num_iterations); ++col)
|
||||
{
|
||||
if (row >= image_height || col0 >= image_width)
|
||||
{
|
||||
// Out of bounds
|
||||
return;
|
||||
}
|
||||
|
||||
do_memory_op(row, col0);
|
||||
}
|
||||
}
|
||||
)"
|
@ -502,6 +502,159 @@ namespace vk
|
||||
void run(const vk::command_buffer& cmd, const vk::buffer* dst, const vk::buffer* src, u32 num_words);
|
||||
};
|
||||
|
||||
enum RSX_detiler_op
|
||||
{
|
||||
decode = 0,
|
||||
encode = 1
|
||||
};
|
||||
|
||||
struct RSX_detiler_config
|
||||
{
|
||||
u32 tile_base_address;
|
||||
u32 tile_base_offset;
|
||||
u32 tile_size;
|
||||
u32 tile_pitch;
|
||||
u32 bank;
|
||||
|
||||
const vk::buffer* dst;
|
||||
u32 dst_offset;
|
||||
const vk::buffer* src;
|
||||
u32 src_offset;
|
||||
|
||||
u16 image_width;
|
||||
u16 image_height;
|
||||
u32 image_pitch;
|
||||
};
|
||||
|
||||
template <RSX_detiler_op Op>
|
||||
struct cs_tile_memcpy : compute_task
|
||||
{
|
||||
#pragma pack (push, 1)
|
||||
struct
|
||||
{
|
||||
u32 prime;
|
||||
u32 factor;
|
||||
u32 num_tiles_per_row;
|
||||
u32 tile_base_address;
|
||||
u32 tile_size;
|
||||
u32 tile_offset;
|
||||
u32 tile_pitch;
|
||||
u32 tile_bank;
|
||||
u32 image_width;
|
||||
u32 image_height;
|
||||
u32 image_bpp;
|
||||
} params;
|
||||
#pragma pack (pop)
|
||||
|
||||
const vk::buffer* src_buffer = nullptr;
|
||||
const vk::buffer* dst_buffer = nullptr;
|
||||
u32 in_offset = 0;
|
||||
u32 out_offset = 0;
|
||||
u32 in_block_length = 0;
|
||||
u32 out_block_length = 0;
|
||||
|
||||
cs_tile_memcpy()
|
||||
{
|
||||
ssbo_count = 2;
|
||||
use_push_constants = true;
|
||||
push_constants_size = 44;
|
||||
|
||||
create();
|
||||
|
||||
m_src =
|
||||
#include "../Program/GLSLSnippets/RSXMemoryTiling.glsl"
|
||||
;
|
||||
|
||||
optimal_group_size = 1;
|
||||
const std::pair<std::string_view, std::string> syntax_replace[] =
|
||||
{
|
||||
{ "%loc", "0" },
|
||||
{ "%set", "set = 0" },
|
||||
{ "%push_block", "push_constant" },
|
||||
{ "%ws", std::to_string(optimal_group_size) },
|
||||
{ "%op", std::to_string(Op) }
|
||||
};
|
||||
|
||||
m_src = fmt::replace_all(m_src, syntax_replace);
|
||||
}
|
||||
|
||||
void bind_resources() override
|
||||
{
|
||||
const auto op = static_cast<int>(Op);
|
||||
m_program->bind_buffer({ src_buffer->value, in_offset, in_block_length }, 0 ^ op, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_descriptor_set);
|
||||
m_program->bind_buffer({ dst_buffer->value, out_offset, out_block_length }, 1 ^ op, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_descriptor_set);
|
||||
}
|
||||
|
||||
void set_parameters(const vk::command_buffer& cmd)
|
||||
{
|
||||
vkCmdPushConstants(cmd, m_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, push_constants_size, ¶ms);
|
||||
}
|
||||
|
||||
void run(const vk::command_buffer& cmd, const RSX_detiler_config& config)
|
||||
{
|
||||
dst_buffer = config.dst;
|
||||
src_buffer = config.src;
|
||||
|
||||
this->in_offset = config.src_offset;
|
||||
this->out_offset = config.dst_offset;
|
||||
|
||||
const auto tiled_height = std::min(
|
||||
utils::align<u32>(config.image_height, 64),
|
||||
utils::aligned_div(config.tile_size - config.tile_base_offset, config.tile_pitch)
|
||||
);
|
||||
|
||||
if constexpr (Op == RSX_detiler_op::decode)
|
||||
{
|
||||
this->in_block_length = tiled_height * config.tile_pitch;
|
||||
this->out_block_length = config.image_height * config.image_pitch;
|
||||
}
|
||||
else
|
||||
{
|
||||
this->in_block_length = config.image_height * config.image_pitch;
|
||||
this->out_block_length = tiled_height* config.tile_pitch;
|
||||
}
|
||||
|
||||
auto get_prime_factor = [](u32 pitch) -> std::pair<u32, u32>
|
||||
{
|
||||
const u32 base = (pitch >> 8);
|
||||
if ((pitch & (pitch - 1)) == 0)
|
||||
{
|
||||
return { 1u, base };
|
||||
}
|
||||
|
||||
for (const auto prime : { 3, 5, 7, 11, 13 })
|
||||
{
|
||||
if ((base % prime) == 0)
|
||||
{
|
||||
return { prime, base / prime };
|
||||
}
|
||||
}
|
||||
|
||||
rsx_log.error("Unexpected pitch value 0x%x", pitch);
|
||||
return {};
|
||||
};
|
||||
|
||||
const auto [prime, factor] = get_prime_factor(config.tile_pitch);
|
||||
const u32 tiles_per_row = prime * factor;
|
||||
|
||||
params.prime = prime;
|
||||
params.factor = factor;
|
||||
params.num_tiles_per_row = tiles_per_row;
|
||||
params.tile_base_address = config.tile_base_address;
|
||||
params.tile_size = config.tile_size;
|
||||
params.tile_offset = config.tile_base_offset;
|
||||
params.tile_pitch = config.tile_pitch;
|
||||
params.tile_bank = config.bank;
|
||||
params.image_width = config.image_width;
|
||||
params.image_height = config.image_height;
|
||||
params.image_bpp = config.image_pitch / config.image_width;
|
||||
set_parameters(cmd);
|
||||
|
||||
const u32 invocations_x = utils::aligned_div(config.image_width, optimal_group_size);
|
||||
compute_task::run(cmd, invocations_x, config.image_height, 1);
|
||||
}
|
||||
};
|
||||
|
||||
// TODO: Replace with a proper manager
|
||||
extern std::unordered_map<u32, std::unique_ptr<vk::compute_task>> g_compute_tasks;
|
||||
|
||||
|
@ -92,16 +92,24 @@ namespace vk
|
||||
rsx_pitch = pitch;
|
||||
|
||||
const bool require_format_conversion = !!(src->aspect() & VK_IMAGE_ASPECT_STENCIL_BIT) || src->format() == VK_FORMAT_D32_SFLOAT;
|
||||
const auto tiled_region = rsx::get_current_renderer()->get_tiled_memory_region(valid_range);
|
||||
const bool require_tiling = !!tiled_region;
|
||||
const bool require_gpu_transform = require_format_conversion || pack_unpack_swap_bytes || require_tiling;
|
||||
auto dma_mapping = vk::map_dma(valid_range.start, valid_range.length());
|
||||
|
||||
if (require_format_conversion || pack_unpack_swap_bytes)
|
||||
if (require_gpu_transform)
|
||||
{
|
||||
const auto section_length = valid_range.length();
|
||||
const auto transfer_pitch = real_pitch;
|
||||
const auto task_length = transfer_pitch * src_area.height();
|
||||
const auto working_buffer_length = calculate_working_buffer_size(task_length, src->aspect());
|
||||
auto working_buffer_length = calculate_working_buffer_size(task_length, src->aspect());
|
||||
|
||||
if (require_tiling) {
|
||||
working_buffer_length += tiled_region.tile->size;
|
||||
}
|
||||
|
||||
auto working_buffer = vk::get_scratch_buffer(cmd, working_buffer_length);
|
||||
u32 result_offset = 0;
|
||||
|
||||
VkBufferImageCopy region = {};
|
||||
region.imageSubresource = { src->aspect(), 0, 0, 1 };
|
||||
@ -142,17 +150,56 @@ namespace vk
|
||||
|
||||
shuffle_kernel->run(cmd, working_buffer, task_length);
|
||||
|
||||
vk::insert_buffer_memory_barrier(cmd, working_buffer->value, 0, task_length,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
|
||||
if (!require_tiling)
|
||||
{
|
||||
vk::insert_buffer_memory_barrier(cmd, working_buffer->value, 0, task_length,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
|
||||
|
||||
require_rw_barrier = false;
|
||||
require_rw_barrier = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (require_tiling)
|
||||
{
|
||||
#if !DEBUG_DMA_TILING
|
||||
// Compute -> Compute barrier
|
||||
vk::insert_buffer_memory_barrier(cmd, working_buffer->value, 0, task_length,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_SHADER_READ_BIT);
|
||||
|
||||
// Prepare payload
|
||||
const RSX_detiler_config config =
|
||||
{
|
||||
.tile_base_address = tiled_region.base_address,
|
||||
.tile_base_offset = valid_range.start - tiled_region.base_address,
|
||||
.tile_size = tiled_region.tile->size,
|
||||
.tile_pitch = tiled_region.tile->pitch,
|
||||
.bank = tiled_region.tile->bank,
|
||||
|
||||
.dst = working_buffer,
|
||||
.dst_offset = task_length,
|
||||
.src = working_buffer,
|
||||
.src_offset = 0,
|
||||
|
||||
.image_width = width,
|
||||
.image_height = height,
|
||||
.image_pitch = real_pitch
|
||||
};
|
||||
|
||||
// Execute
|
||||
const auto job = vk::get_compute_task<vk::cs_tile_memcpy<RSX_detiler_op::encode>>();
|
||||
job->run(cmd, config);
|
||||
|
||||
result_offset = task_length;
|
||||
require_rw_barrier = true;
|
||||
#endif
|
||||
}
|
||||
|
||||
if (require_rw_barrier)
|
||||
{
|
||||
vk::insert_buffer_memory_barrier(cmd, working_buffer->value, 0, working_buffer_length,
|
||||
vk::insert_buffer_memory_barrier(cmd, working_buffer->value, result_offset, working_buffer_length,
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
|
||||
}
|
||||
@ -160,6 +207,7 @@ namespace vk
|
||||
if (rsx_pitch == real_pitch) [[likely]]
|
||||
{
|
||||
VkBufferCopy copy = {};
|
||||
copy.srcOffset = result_offset;
|
||||
copy.dstOffset = dma_mapping.first;
|
||||
copy.size = section_length;
|
||||
vkCmdCopyBuffer(cmd, working_buffer->value, dma_mapping.second->value, 1, ©);
|
||||
@ -178,7 +226,7 @@ namespace vk
|
||||
copy.reserve(transfer_height);
|
||||
|
||||
u32 dst_offset = dma_mapping.first;
|
||||
u32 src_offset = 0;
|
||||
u32 src_offset = result_offset;
|
||||
|
||||
for (unsigned row = 0; row < transfer_height; ++row)
|
||||
{
|
||||
|
@ -14,7 +14,7 @@
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#define DEBUG_DMA_TILING 1
|
||||
#define DEBUG_DMA_TILING 0
|
||||
|
||||
#if DEBUG_DMA_TILING
|
||||
#include "../Common/tiled_dma_copy.hpp"
|
||||
|
Loading…
x
Reference in New Issue
Block a user