mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-02-06 00:40:11 +00:00
gl: Implement fast texture readback for D24X8 and RGBA8/BGRA8
This commit is contained in:
parent
2010d697c8
commit
a6e6df1445
@ -1,4 +1,5 @@
|
||||
#include "GLCompute.h"
|
||||
#include "GLTexture.h"
|
||||
#include "Utilities/StrUtil.h"
|
||||
|
||||
namespace gl
|
||||
@ -272,4 +273,82 @@ namespace gl
|
||||
m_program.uniforms["out_ptr"] = dst_offset - data_offset;
|
||||
cs_shuffle_base::run(cmd, data, num_texels * 4, data_offset);
|
||||
}
|
||||
|
||||
cs_d24x8_to_ssbo::cs_d24x8_to_ssbo()
|
||||
{
|
||||
initialize();
|
||||
|
||||
const auto raw_data =
|
||||
#include "../Program/GLSLSnippets/CopyD24x8ToBuffer.glsl"
|
||||
;
|
||||
|
||||
const std::pair<std::string_view, std::string> repl_list[] =
|
||||
{
|
||||
{ "%set, ", "" },
|
||||
{ "%loc", std::to_string(GL_COMPUTE_BUFFER_SLOT(0)) },
|
||||
{ "%ws", std::to_string(optimal_group_size) },
|
||||
{ "%wks", std::to_string(optimal_kernel_size) }
|
||||
};
|
||||
|
||||
m_src = fmt::replace_all(raw_data, repl_list);
|
||||
}
|
||||
|
||||
void cs_d24x8_to_ssbo::run(gl::command_context& cmd, gl::viewable_image* src, const gl::buffer* dst, u32 out_offset, const coordu& region, const gl::pixel_buffer_layout& /*layout*/, const gl::pixel_pack_settings& settings)
|
||||
{
|
||||
const auto row_pitch = settings.get_row_length() ? settings.get_row_length() : region.width;
|
||||
|
||||
m_program.uniforms["swap_bytes"] = settings.get_swap_bytes();
|
||||
m_program.uniforms["output_pitch"] = row_pitch;
|
||||
m_program.uniforms["region_offset"] = color2i(region.x, region.y);
|
||||
m_program.uniforms["region_size"] = color2i(region.width, region.height);
|
||||
|
||||
auto depth_view = src->get_view(0xAAE4, rsx::default_remap_vector, gl::image_aspect::depth);
|
||||
auto stencil_view = src->get_view(0xAAE4, rsx::default_remap_vector, gl::image_aspect::stencil);
|
||||
|
||||
depth_view->bind(cmd, GL_COMPUTE_BUFFER_SLOT(0));
|
||||
stencil_view->bind(cmd, GL_COMPUTE_BUFFER_SLOT(1));
|
||||
dst->bind_range(gl::buffer::target::ssbo, GL_COMPUTE_BUFFER_SLOT(2), out_offset, row_pitch * 4 * region.height);
|
||||
|
||||
const int num_invocations = utils::aligned_div(region.width * region.height, optimal_kernel_size);
|
||||
compute_task::run(cmd, num_invocations);
|
||||
}
|
||||
|
||||
cs_rgba8_to_ssbo::cs_rgba8_to_ssbo()
|
||||
{
|
||||
initialize();
|
||||
|
||||
const auto raw_data =
|
||||
#include "../Program/GLSLSnippets/CopyRGBA8ToBuffer.glsl"
|
||||
;
|
||||
|
||||
const std::pair<std::string_view, std::string> repl_list[] =
|
||||
{
|
||||
{ "%set, ", "" },
|
||||
{ "%loc", std::to_string(GL_COMPUTE_BUFFER_SLOT(0)) },
|
||||
{ "%ws", std::to_string(optimal_group_size) },
|
||||
{ "%wks", std::to_string(optimal_kernel_size) }
|
||||
};
|
||||
|
||||
m_src = fmt::replace_all(raw_data, repl_list);
|
||||
}
|
||||
|
||||
void cs_rgba8_to_ssbo::run(gl::command_context& cmd, gl::viewable_image* src, const gl::buffer* dst, u32 out_offset, const coordu& region, const gl::pixel_buffer_layout& layout, const gl::pixel_pack_settings& settings)
|
||||
{
|
||||
const auto row_pitch = settings.get_row_length() ? settings.get_row_length() : region.width;
|
||||
|
||||
m_program.uniforms["swap_bytes"] = settings.get_swap_bytes();
|
||||
m_program.uniforms["output_pitch"] = row_pitch;
|
||||
m_program.uniforms["region_offset"] = color2i(region.x, region.y);
|
||||
m_program.uniforms["region_size"] = color2i(region.width, region.height);
|
||||
m_program.uniforms["is_bgra"] = (layout.format == static_cast<GLenum>(gl::texture::format::bgra));
|
||||
m_program.uniforms["block_width"] = static_cast<u32>(layout.size);
|
||||
|
||||
auto data_view = src->get_view(0xAAE4, rsx::default_remap_vector, gl::image_aspect::color);
|
||||
|
||||
data_view->bind(cmd, GL_COMPUTE_BUFFER_SLOT(0));
|
||||
dst->bind_range(gl::buffer::target::ssbo, GL_COMPUTE_BUFFER_SLOT(1), out_offset, row_pitch * 4 * region.height);
|
||||
|
||||
const int num_invocations = utils::aligned_div(region.width * region.height, optimal_kernel_size);
|
||||
compute_task::run(cmd, num_invocations);
|
||||
}
|
||||
}
|
||||
|
@ -340,6 +340,25 @@ namespace gl
|
||||
}
|
||||
};
|
||||
|
||||
struct pixel_buffer_layout;
|
||||
|
||||
struct cs_image_to_ssbo : compute_task
|
||||
{
|
||||
virtual void run(gl::command_context& cmd, gl::viewable_image* src, const gl::buffer* dst, u32 out_offset, const coordu& region, const gl::pixel_buffer_layout& layout, const gl::pixel_pack_settings& settings) = 0;
|
||||
};
|
||||
|
||||
struct cs_d24x8_to_ssbo : cs_image_to_ssbo
|
||||
{
|
||||
cs_d24x8_to_ssbo();
|
||||
void run(gl::command_context& cmd, gl::viewable_image* src, const gl::buffer* dst, u32 out_offset, const coordu& region, const gl::pixel_buffer_layout& layout, const gl::pixel_pack_settings& settings) override;
|
||||
};
|
||||
|
||||
struct cs_rgba8_to_ssbo : cs_image_to_ssbo
|
||||
{
|
||||
cs_rgba8_to_ssbo();
|
||||
void run(gl::command_context& cmd, gl::viewable_image* src, const gl::buffer* dst, u32 out_offset, const coordu& region, const gl::pixel_buffer_layout& layout, const gl::pixel_pack_settings& settings) override;
|
||||
};
|
||||
|
||||
// TODO: Replace with a proper manager
|
||||
extern std::unordered_map<u32, std::unique_ptr<gl::compute_task>> g_compute_tasks;
|
||||
|
||||
|
@ -336,6 +336,15 @@ namespace gl
|
||||
m_alignment = value;
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool get_swap_bytes() const
|
||||
{
|
||||
return m_swap_bytes;
|
||||
}
|
||||
int get_row_length() const
|
||||
{
|
||||
return m_row_length;
|
||||
}
|
||||
};
|
||||
|
||||
class pixel_unpack_settings
|
||||
@ -2558,6 +2567,7 @@ public:
|
||||
void operator = (int rhs) const { glProgramUniform1i(m_program.id(), location(), rhs); }
|
||||
void operator = (unsigned rhs) const { glProgramUniform1ui(m_program.id(), location(), rhs); }
|
||||
void operator = (float rhs) const { glProgramUniform1f(m_program.id(), location(), rhs); }
|
||||
void operator = (bool rhs) const { glProgramUniform1ui(m_program.id(), location(), rhs ? 1 : 0); }
|
||||
void operator = (const color1i& rhs) const { glProgramUniform1i(m_program.id(), location(), rhs.r); }
|
||||
void operator = (const color1f& rhs) const { glProgramUniform1f(m_program.id(), location(), rhs.r); }
|
||||
void operator = (const color2i& rhs) const { glProgramUniform2i(m_program.id(), location(), rhs.r, rhs.g); }
|
||||
|
@ -608,10 +608,10 @@ namespace gl
|
||||
const u32 src_offset, const coordu& dst_region,
|
||||
const pixel_unpack_settings& settings)
|
||||
{
|
||||
const int row_length = settings.get_row_length();
|
||||
program_handle.uniforms["src_pitch"] = row_length ? row_length : static_cast<int>(dst_region.width);
|
||||
program_handle.uniforms["swap_bytes"] = settings.get_swap_bytes() ? 1 : 0;
|
||||
src->bind_range(GL_COMPUTE_BUFFER_SLOT(0), src_offset, row_length * dst_region.height);
|
||||
const u32 row_length = settings.get_row_length() ? settings.get_row_length() : static_cast<u32>(dst_region.width);
|
||||
program_handle.uniforms["src_pitch"] = row_length;
|
||||
program_handle.uniforms["swap_bytes"] = settings.get_swap_bytes();
|
||||
src->bind_range(gl::buffer::target::ssbo, GL_COMPUTE_BUFFER_SLOT(0), src_offset, row_length * 4 * dst_region.height);
|
||||
|
||||
cmd->stencil_mask(0xFF);
|
||||
|
||||
|
@ -508,6 +508,30 @@ namespace gl
|
||||
dst->create(buffer::target::pixel_pack, max_mem, nullptr, buffer::memory_type::local, GL_STATIC_COPY);
|
||||
}
|
||||
|
||||
if (auto as_vi = dynamic_cast<const gl::viewable_image*>(src);
|
||||
gl::get_driver_caps().vendor_AMD &&
|
||||
src->get_target() == gl::texture::target::texture2D &&
|
||||
as_vi)
|
||||
{
|
||||
switch (src->get_internal_format())
|
||||
{
|
||||
case gl::texture::internal_format::depth24_stencil8:
|
||||
gl::get_compute_task<gl::cs_d24x8_to_ssbo>()->run(cmd,
|
||||
const_cast<gl::viewable_image*>(as_vi), dst, 0,
|
||||
{ {src_region.x, src_region.y}, {src_region.width, src_region.height} },
|
||||
pack_info, {});
|
||||
return;
|
||||
case gl::texture::internal_format::rgba8:
|
||||
gl::get_compute_task<gl::cs_rgba8_to_ssbo>()->run(cmd,
|
||||
const_cast<gl::viewable_image*>(as_vi), dst, 0,
|
||||
{ {src_region.x, src_region.y}, {src_region.width, src_region.height} },
|
||||
pack_info, {});
|
||||
return;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
dst->bind(buffer::target::pixel_pack);
|
||||
src->copy_to(nullptr, static_cast<texture::format>(pack_info.format), static_cast<texture::type>(pack_info.type), src_level, src_region, {});
|
||||
};
|
||||
|
@ -14,11 +14,11 @@ layout(%push_block) uniform UnpackConfiguration
|
||||
uint src_pitch;
|
||||
};
|
||||
#else
|
||||
uniform int swap_bytes;
|
||||
uniform int src_pitch;
|
||||
uniform uint swap_bytes;
|
||||
uniform uint src_pitch;
|
||||
#endif
|
||||
|
||||
int getDataOffset()
|
||||
uint getDataOffset()
|
||||
{
|
||||
const ivec2 coords = ivec2(gl_FragCoord.xy);
|
||||
return coords.y * src_pitch + coords.x;
|
||||
@ -26,7 +26,7 @@ int getDataOffset()
|
||||
|
||||
void main()
|
||||
{
|
||||
const int virtual_address = getDataOffset();
|
||||
const uint virtual_address = getDataOffset();
|
||||
uint real_data = data[virtual_address];
|
||||
|
||||
const uint stencil_byte = bitfieldExtract(real_data, 0, 8);
|
||||
|
74
rpcs3/Emu/RSX/Program/GLSLSnippets/CopyD24x8ToBuffer.glsl
Normal file
74
rpcs3/Emu/RSX/Program/GLSLSnippets/CopyD24x8ToBuffer.glsl
Normal file
@ -0,0 +1,74 @@
|
||||
R"(
|
||||
#version 450
|
||||
layout(local_size_x = %ws, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
#define IMAGE_LOCATION(x) (x + %loc)
|
||||
#define SSBO_LOCATION IMAGE_LOCATION(2)
|
||||
|
||||
layout(%set, binding=IMAGE_LOCATION(0)) uniform sampler2D depthData;
|
||||
layout(%set, binding=IMAGE_LOCATION(1)) uniform usampler2D stencilData;
|
||||
|
||||
layout(%set, binding=SSBO_LOCATION, std430) writeonly restrict buffer OutputBlock
|
||||
{
|
||||
uint data[];
|
||||
};
|
||||
|
||||
#if USE_UBO
|
||||
layout(%push_block) uniform Configuration
|
||||
{
|
||||
uint swap_bytes;
|
||||
uint output_pitch;
|
||||
ivec2 region_offset;
|
||||
ivec2 region_size;
|
||||
};
|
||||
#else
|
||||
uniform uint swap_bytes;
|
||||
uniform uint output_pitch;
|
||||
uniform ivec2 region_offset;
|
||||
uniform ivec2 region_size;
|
||||
#endif
|
||||
|
||||
#define KERNEL_SIZE %wks
|
||||
|
||||
uint linear_invocation_id()
|
||||
{
|
||||
uint size_in_x = (gl_NumWorkGroups.x * gl_WorkGroupSize.x);
|
||||
return (gl_GlobalInvocationID.y * size_in_x) + gl_GlobalInvocationID.x;
|
||||
}
|
||||
|
||||
ivec2 linear_id_to_input_coord(uint index)
|
||||
{
|
||||
return ivec2(int(index % region_size.x), int(index / output_pitch)) + region_offset;
|
||||
}
|
||||
|
||||
uint input_coord_to_output_id(ivec2 coord)
|
||||
{
|
||||
coord -= region_offset;
|
||||
return coord.y * output_pitch + coord.x;
|
||||
}
|
||||
|
||||
void main()
|
||||
{
|
||||
uint index = linear_invocation_id() * KERNEL_SIZE;
|
||||
|
||||
for (int loop = 0; loop < KERNEL_SIZE; ++loop, ++index)
|
||||
{
|
||||
if (index > (region_size.x * region_size.y))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
ivec2 coord = linear_id_to_input_coord(index);
|
||||
float depth = texelFetch(depthData, coord, 0).x;
|
||||
uint stencil = texelFetch(stencilData, coord, 0).x;
|
||||
uint depth_bytes = uint(depth * 0xffffff);
|
||||
|
||||
if (swap_bytes != 0)
|
||||
{
|
||||
depth_bytes = (bitfieldExtract(depth_bytes, 0, 8) << 16u) | (bitfieldExtract(depth_bytes, 16, 8) << 0u) | depth_bytes & 0xFF00u;
|
||||
}
|
||||
|
||||
data[input_coord_to_output_id(coord)] = (depth_bytes << 8) | stencil;
|
||||
}
|
||||
}
|
||||
)"
|
96
rpcs3/Emu/RSX/Program/GLSLSnippets/CopyRGBA8ToBuffer.glsl
Normal file
96
rpcs3/Emu/RSX/Program/GLSLSnippets/CopyRGBA8ToBuffer.glsl
Normal file
@ -0,0 +1,96 @@
|
||||
R"(
|
||||
#version 450
|
||||
layout(local_size_x = %ws, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
#define IMAGE_LOCATION(x) (x + %loc)
|
||||
#define SSBO_LOCATION IMAGE_LOCATION(1)
|
||||
|
||||
layout(%set, binding=IMAGE_LOCATION(0)) uniform sampler2D colorData;
|
||||
layout(%set, binding=SSBO_LOCATION, std430) writeonly restrict buffer OutputBlock
|
||||
{
|
||||
uint data[];
|
||||
};
|
||||
|
||||
#if USE_UBO
|
||||
layout(%push_block) uniform Configuration
|
||||
{
|
||||
uint swap_bytes;
|
||||
uint output_pitch;
|
||||
uint block_width;
|
||||
uint is_bgra;
|
||||
ivec2 region_offset;
|
||||
ivec2 region_size;
|
||||
};
|
||||
#else
|
||||
uniform uint swap_bytes;
|
||||
uniform uint output_pitch;
|
||||
uniform uint block_width;
|
||||
uniform uint is_bgra;
|
||||
uniform ivec2 region_offset;
|
||||
uniform ivec2 region_size;
|
||||
#endif
|
||||
|
||||
#define KERNEL_SIZE %wks
|
||||
|
||||
uint linear_invocation_id()
|
||||
{
|
||||
uint size_in_x = (gl_NumWorkGroups.x * gl_WorkGroupSize.x);
|
||||
return (gl_GlobalInvocationID.y * size_in_x) + gl_GlobalInvocationID.x;
|
||||
}
|
||||
|
||||
ivec2 linear_id_to_input_coord(uint index)
|
||||
{
|
||||
return ivec2(int(index % region_size.x), int(index / output_pitch)) + region_offset;
|
||||
}
|
||||
|
||||
uint input_coord_to_output_id(ivec2 coord)
|
||||
{
|
||||
coord -= region_offset;
|
||||
return coord.y * output_pitch + coord.x;
|
||||
}
|
||||
|
||||
void main()
|
||||
{
|
||||
uint index = linear_invocation_id() * KERNEL_SIZE;
|
||||
|
||||
for (int loop = 0; loop < KERNEL_SIZE; ++loop, ++index)
|
||||
{
|
||||
if (index > uint(region_size.x * region_size.y))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
ivec2 coord = linear_id_to_input_coord(index);
|
||||
vec4 color = texelFetch(colorData, coord, 0);
|
||||
|
||||
if (is_bgra != 0)
|
||||
{
|
||||
color = color.bgra;
|
||||
}
|
||||
|
||||
// Specific to 8-bit color in ARGB8 format. Need to generalize later
|
||||
if (swap_bytes != 0 && block_width > 1)
|
||||
{
|
||||
color = (block_width == 4) ?
|
||||
color.wzyx :
|
||||
color.yxwz;
|
||||
}
|
||||
|
||||
uvec4 bytes = uvec4(color * 255);
|
||||
uint result;
|
||||
|
||||
if (block_width > 1)
|
||||
{
|
||||
// Simulate BE packing as in UINT_8_8_8_8
|
||||
result = bytes.w | (bytes.z << 8u) | (bytes.y << 16u) | (bytes.x << 24u);
|
||||
}
|
||||
else
|
||||
{
|
||||
result = bytes.x | (bytes.y << 8u) | (bytes.z << 16u) | (bytes.w << 24u);
|
||||
}
|
||||
|
||||
uint output_id = input_coord_to_output_id(coord);
|
||||
data[output_id] = result;
|
||||
}
|
||||
}
|
||||
)"
|
@ -817,6 +817,8 @@
|
||||
<None Include="Emu\RSX\Program\GLSLInterpreter\FragmentInterpreter.glsl" />
|
||||
<None Include="Emu\RSX\Program\GLSLInterpreter\VertexInterpreter.glsl" />
|
||||
<None Include="Emu\RSX\Program\GLSLSnippets\CopyBufferToD24x8.glsl" />
|
||||
<None Include="Emu\RSX\Program\GLSLSnippets\CopyD24x8ToBuffer.glsl" />
|
||||
<None Include="Emu\RSX\Program\GLSLSnippets\CopyRGBA8ToBuffer.glsl" />
|
||||
<None Include="Emu\RSX\Program\GLSLSnippets\GenericVSPassthrough.glsl" />
|
||||
<None Include="Emu\RSX\Program\GLSLSnippets\GPUDeswizzle.glsl" />
|
||||
<None Include="Emu\RSX\Program\GLSLSnippets\ShuffleBytes.glsl" />
|
||||
|
@ -2151,5 +2151,11 @@
|
||||
<None Include="Emu\RSX\Program\GLSLSnippets\GenericVSPassthrough.glsl">
|
||||
<Filter>Emu\GPU\RSX\Program\Snippets</Filter>
|
||||
</None>
|
||||
<None Include="Emu\RSX\Program\GLSLSnippets\CopyD24x8ToBuffer.glsl">
|
||||
<Filter>Emu\GPU\RSX\Program\Snippets</Filter>
|
||||
</None>
|
||||
<None Include="Emu\RSX\Program\GLSLSnippets\CopyRGBA8ToBuffer.glsl">
|
||||
<Filter>Emu\GPU\RSX\Program\Snippets</Filter>
|
||||
</None>
|
||||
</ItemGroup>
|
||||
</Project>
|
Loading…
x
Reference in New Issue
Block a user