gl: Implement fast texture readback for D24X8 and RGBA8/BGRA8

This commit is contained in:
kd-11 2022-06-01 21:56:33 +03:00 committed by kd-11
parent 2010d697c8
commit a6e6df1445
10 changed files with 318 additions and 8 deletions

View File

@ -1,4 +1,5 @@
#include "GLCompute.h"
#include "GLTexture.h"
#include "Utilities/StrUtil.h"
namespace gl
@ -272,4 +273,82 @@ namespace gl
m_program.uniforms["out_ptr"] = dst_offset - data_offset;
cs_shuffle_base::run(cmd, data, num_texels * 4, data_offset);
}
cs_d24x8_to_ssbo::cs_d24x8_to_ssbo()
{
initialize();
const auto raw_data =
#include "../Program/GLSLSnippets/CopyD24x8ToBuffer.glsl"
;
const std::pair<std::string_view, std::string> repl_list[] =
{
{ "%set, ", "" },
{ "%loc", std::to_string(GL_COMPUTE_BUFFER_SLOT(0)) },
{ "%ws", std::to_string(optimal_group_size) },
{ "%wks", std::to_string(optimal_kernel_size) }
};
m_src = fmt::replace_all(raw_data, repl_list);
}
void cs_d24x8_to_ssbo::run(gl::command_context& cmd, gl::viewable_image* src, const gl::buffer* dst, u32 out_offset, const coordu& region, const gl::pixel_buffer_layout& /*layout*/, const gl::pixel_pack_settings& settings)
{
const auto row_pitch = settings.get_row_length() ? settings.get_row_length() : region.width;
m_program.uniforms["swap_bytes"] = settings.get_swap_bytes();
m_program.uniforms["output_pitch"] = row_pitch;
m_program.uniforms["region_offset"] = color2i(region.x, region.y);
m_program.uniforms["region_size"] = color2i(region.width, region.height);
auto depth_view = src->get_view(0xAAE4, rsx::default_remap_vector, gl::image_aspect::depth);
auto stencil_view = src->get_view(0xAAE4, rsx::default_remap_vector, gl::image_aspect::stencil);
depth_view->bind(cmd, GL_COMPUTE_BUFFER_SLOT(0));
stencil_view->bind(cmd, GL_COMPUTE_BUFFER_SLOT(1));
dst->bind_range(gl::buffer::target::ssbo, GL_COMPUTE_BUFFER_SLOT(2), out_offset, row_pitch * 4 * region.height);
const int num_invocations = utils::aligned_div(region.width * region.height, optimal_kernel_size);
compute_task::run(cmd, num_invocations);
}
cs_rgba8_to_ssbo::cs_rgba8_to_ssbo()
{
initialize();
const auto raw_data =
#include "../Program/GLSLSnippets/CopyRGBA8ToBuffer.glsl"
;
const std::pair<std::string_view, std::string> repl_list[] =
{
{ "%set, ", "" },
{ "%loc", std::to_string(GL_COMPUTE_BUFFER_SLOT(0)) },
{ "%ws", std::to_string(optimal_group_size) },
{ "%wks", std::to_string(optimal_kernel_size) }
};
m_src = fmt::replace_all(raw_data, repl_list);
}
void cs_rgba8_to_ssbo::run(gl::command_context& cmd, gl::viewable_image* src, const gl::buffer* dst, u32 out_offset, const coordu& region, const gl::pixel_buffer_layout& layout, const gl::pixel_pack_settings& settings)
{
const auto row_pitch = settings.get_row_length() ? settings.get_row_length() : region.width;
m_program.uniforms["swap_bytes"] = settings.get_swap_bytes();
m_program.uniforms["output_pitch"] = row_pitch;
m_program.uniforms["region_offset"] = color2i(region.x, region.y);
m_program.uniforms["region_size"] = color2i(region.width, region.height);
m_program.uniforms["is_bgra"] = (layout.format == static_cast<GLenum>(gl::texture::format::bgra));
m_program.uniforms["block_width"] = static_cast<u32>(layout.size);
auto data_view = src->get_view(0xAAE4, rsx::default_remap_vector, gl::image_aspect::color);
data_view->bind(cmd, GL_COMPUTE_BUFFER_SLOT(0));
dst->bind_range(gl::buffer::target::ssbo, GL_COMPUTE_BUFFER_SLOT(1), out_offset, row_pitch * 4 * region.height);
const int num_invocations = utils::aligned_div(region.width * region.height, optimal_kernel_size);
compute_task::run(cmd, num_invocations);
}
}

View File

@ -340,6 +340,25 @@ namespace gl
}
};
struct pixel_buffer_layout;
struct cs_image_to_ssbo : compute_task
{
virtual void run(gl::command_context& cmd, gl::viewable_image* src, const gl::buffer* dst, u32 out_offset, const coordu& region, const gl::pixel_buffer_layout& layout, const gl::pixel_pack_settings& settings) = 0;
};
struct cs_d24x8_to_ssbo : cs_image_to_ssbo
{
cs_d24x8_to_ssbo();
void run(gl::command_context& cmd, gl::viewable_image* src, const gl::buffer* dst, u32 out_offset, const coordu& region, const gl::pixel_buffer_layout& layout, const gl::pixel_pack_settings& settings) override;
};
struct cs_rgba8_to_ssbo : cs_image_to_ssbo
{
cs_rgba8_to_ssbo();
void run(gl::command_context& cmd, gl::viewable_image* src, const gl::buffer* dst, u32 out_offset, const coordu& region, const gl::pixel_buffer_layout& layout, const gl::pixel_pack_settings& settings) override;
};
// TODO: Replace with a proper manager
extern std::unordered_map<u32, std::unique_ptr<gl::compute_task>> g_compute_tasks;

View File

@ -336,6 +336,15 @@ namespace gl
m_alignment = value;
return *this;
}
bool get_swap_bytes() const
{
return m_swap_bytes;
}
int get_row_length() const
{
return m_row_length;
}
};
class pixel_unpack_settings
@ -2558,6 +2567,7 @@ public:
void operator = (int rhs) const { glProgramUniform1i(m_program.id(), location(), rhs); }
void operator = (unsigned rhs) const { glProgramUniform1ui(m_program.id(), location(), rhs); }
void operator = (float rhs) const { glProgramUniform1f(m_program.id(), location(), rhs); }
void operator = (bool rhs) const { glProgramUniform1ui(m_program.id(), location(), rhs ? 1 : 0); }
void operator = (const color1i& rhs) const { glProgramUniform1i(m_program.id(), location(), rhs.r); }
void operator = (const color1f& rhs) const { glProgramUniform1f(m_program.id(), location(), rhs.r); }
void operator = (const color2i& rhs) const { glProgramUniform2i(m_program.id(), location(), rhs.r, rhs.g); }

View File

@ -608,10 +608,10 @@ namespace gl
const u32 src_offset, const coordu& dst_region,
const pixel_unpack_settings& settings)
{
const int row_length = settings.get_row_length();
program_handle.uniforms["src_pitch"] = row_length ? row_length : static_cast<int>(dst_region.width);
program_handle.uniforms["swap_bytes"] = settings.get_swap_bytes() ? 1 : 0;
src->bind_range(GL_COMPUTE_BUFFER_SLOT(0), src_offset, row_length * dst_region.height);
const u32 row_length = settings.get_row_length() ? settings.get_row_length() : static_cast<u32>(dst_region.width);
program_handle.uniforms["src_pitch"] = row_length;
program_handle.uniforms["swap_bytes"] = settings.get_swap_bytes();
src->bind_range(gl::buffer::target::ssbo, GL_COMPUTE_BUFFER_SLOT(0), src_offset, row_length * 4 * dst_region.height);
cmd->stencil_mask(0xFF);

View File

@ -508,6 +508,30 @@ namespace gl
dst->create(buffer::target::pixel_pack, max_mem, nullptr, buffer::memory_type::local, GL_STATIC_COPY);
}
if (auto as_vi = dynamic_cast<const gl::viewable_image*>(src);
gl::get_driver_caps().vendor_AMD &&
src->get_target() == gl::texture::target::texture2D &&
as_vi)
{
switch (src->get_internal_format())
{
case gl::texture::internal_format::depth24_stencil8:
gl::get_compute_task<gl::cs_d24x8_to_ssbo>()->run(cmd,
const_cast<gl::viewable_image*>(as_vi), dst, 0,
{ {src_region.x, src_region.y}, {src_region.width, src_region.height} },
pack_info, {});
return;
case gl::texture::internal_format::rgba8:
gl::get_compute_task<gl::cs_rgba8_to_ssbo>()->run(cmd,
const_cast<gl::viewable_image*>(as_vi), dst, 0,
{ {src_region.x, src_region.y}, {src_region.width, src_region.height} },
pack_info, {});
return;
default:
break;
}
}
dst->bind(buffer::target::pixel_pack);
src->copy_to(nullptr, static_cast<texture::format>(pack_info.format), static_cast<texture::type>(pack_info.type), src_level, src_region, {});
};

View File

@ -14,11 +14,11 @@ layout(%push_block) uniform UnpackConfiguration
uint src_pitch;
};
#else
uniform int swap_bytes;
uniform int src_pitch;
uniform uint swap_bytes;
uniform uint src_pitch;
#endif
int getDataOffset()
uint getDataOffset()
{
const ivec2 coords = ivec2(gl_FragCoord.xy);
return coords.y * src_pitch + coords.x;
@ -26,7 +26,7 @@ int getDataOffset()
void main()
{
const int virtual_address = getDataOffset();
const uint virtual_address = getDataOffset();
uint real_data = data[virtual_address];
const uint stencil_byte = bitfieldExtract(real_data, 0, 8);

View File

@ -0,0 +1,74 @@
R"(
#version 450
layout(local_size_x = %ws, local_size_y = 1, local_size_z = 1) in;
#define IMAGE_LOCATION(x) (x + %loc)
#define SSBO_LOCATION IMAGE_LOCATION(2)
layout(%set, binding=IMAGE_LOCATION(0)) uniform sampler2D depthData;
layout(%set, binding=IMAGE_LOCATION(1)) uniform usampler2D stencilData;
layout(%set, binding=SSBO_LOCATION, std430) writeonly restrict buffer OutputBlock
{
uint data[];
};
#if USE_UBO
layout(%push_block) uniform Configuration
{
uint swap_bytes;
uint output_pitch;
ivec2 region_offset;
ivec2 region_size;
};
#else
uniform uint swap_bytes;
uniform uint output_pitch;
uniform ivec2 region_offset;
uniform ivec2 region_size;
#endif
#define KERNEL_SIZE %wks
uint linear_invocation_id()
{
uint size_in_x = (gl_NumWorkGroups.x * gl_WorkGroupSize.x);
return (gl_GlobalInvocationID.y * size_in_x) + gl_GlobalInvocationID.x;
}
ivec2 linear_id_to_input_coord(uint index)
{
return ivec2(int(index % region_size.x), int(index / output_pitch)) + region_offset;
}
uint input_coord_to_output_id(ivec2 coord)
{
coord -= region_offset;
return coord.y * output_pitch + coord.x;
}
void main()
{
uint index = linear_invocation_id() * KERNEL_SIZE;
for (int loop = 0; loop < KERNEL_SIZE; ++loop, ++index)
{
if (index > (region_size.x * region_size.y))
{
return;
}
ivec2 coord = linear_id_to_input_coord(index);
float depth = texelFetch(depthData, coord, 0).x;
uint stencil = texelFetch(stencilData, coord, 0).x;
uint depth_bytes = uint(depth * 0xffffff);
if (swap_bytes != 0)
{
depth_bytes = (bitfieldExtract(depth_bytes, 0, 8) << 16u) | (bitfieldExtract(depth_bytes, 16, 8) << 0u) | depth_bytes & 0xFF00u;
}
data[input_coord_to_output_id(coord)] = (depth_bytes << 8) | stencil;
}
}
)"

View File

@ -0,0 +1,96 @@
R"(
#version 450
layout(local_size_x = %ws, local_size_y = 1, local_size_z = 1) in;
#define IMAGE_LOCATION(x) (x + %loc)
#define SSBO_LOCATION IMAGE_LOCATION(1)
layout(%set, binding=IMAGE_LOCATION(0)) uniform sampler2D colorData;
layout(%set, binding=SSBO_LOCATION, std430) writeonly restrict buffer OutputBlock
{
uint data[];
};
#if USE_UBO
layout(%push_block) uniform Configuration
{
uint swap_bytes;
uint output_pitch;
uint block_width;
uint is_bgra;
ivec2 region_offset;
ivec2 region_size;
};
#else
uniform uint swap_bytes;
uniform uint output_pitch;
uniform uint block_width;
uniform uint is_bgra;
uniform ivec2 region_offset;
uniform ivec2 region_size;
#endif
#define KERNEL_SIZE %wks
uint linear_invocation_id()
{
uint size_in_x = (gl_NumWorkGroups.x * gl_WorkGroupSize.x);
return (gl_GlobalInvocationID.y * size_in_x) + gl_GlobalInvocationID.x;
}
ivec2 linear_id_to_input_coord(uint index)
{
return ivec2(int(index % region_size.x), int(index / output_pitch)) + region_offset;
}
uint input_coord_to_output_id(ivec2 coord)
{
coord -= region_offset;
return coord.y * output_pitch + coord.x;
}
void main()
{
uint index = linear_invocation_id() * KERNEL_SIZE;
for (int loop = 0; loop < KERNEL_SIZE; ++loop, ++index)
{
if (index > uint(region_size.x * region_size.y))
{
return;
}
ivec2 coord = linear_id_to_input_coord(index);
vec4 color = texelFetch(colorData, coord, 0);
if (is_bgra != 0)
{
color = color.bgra;
}
// Specific to 8-bit color in ARGB8 format. Need to generalize later
if (swap_bytes != 0 && block_width > 1)
{
color = (block_width == 4) ?
color.wzyx :
color.yxwz;
}
uvec4 bytes = uvec4(color * 255);
uint result;
if (block_width > 1)
{
// Simulate BE packing as in UINT_8_8_8_8
result = bytes.w | (bytes.z << 8u) | (bytes.y << 16u) | (bytes.x << 24u);
}
else
{
result = bytes.x | (bytes.y << 8u) | (bytes.z << 16u) | (bytes.w << 24u);
}
uint output_id = input_coord_to_output_id(coord);
data[output_id] = result;
}
}
)"

View File

@ -817,6 +817,8 @@
<None Include="Emu\RSX\Program\GLSLInterpreter\FragmentInterpreter.glsl" />
<None Include="Emu\RSX\Program\GLSLInterpreter\VertexInterpreter.glsl" />
<None Include="Emu\RSX\Program\GLSLSnippets\CopyBufferToD24x8.glsl" />
<None Include="Emu\RSX\Program\GLSLSnippets\CopyD24x8ToBuffer.glsl" />
<None Include="Emu\RSX\Program\GLSLSnippets\CopyRGBA8ToBuffer.glsl" />
<None Include="Emu\RSX\Program\GLSLSnippets\GenericVSPassthrough.glsl" />
<None Include="Emu\RSX\Program\GLSLSnippets\GPUDeswizzle.glsl" />
<None Include="Emu\RSX\Program\GLSLSnippets\ShuffleBytes.glsl" />

View File

@ -2151,5 +2151,11 @@
<None Include="Emu\RSX\Program\GLSLSnippets\GenericVSPassthrough.glsl">
<Filter>Emu\GPU\RSX\Program\Snippets</Filter>
</None>
<None Include="Emu\RSX\Program\GLSLSnippets\CopyD24x8ToBuffer.glsl">
<Filter>Emu\GPU\RSX\Program\Snippets</Filter>
</None>
<None Include="Emu\RSX\Program\GLSLSnippets\CopyRGBA8ToBuffer.glsl">
<Filter>Emu\GPU\RSX\Program\Snippets</Filter>
</None>
</ItemGroup>
</Project>