mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-02-06 09:39:55 +00:00
gl: Implement on-chip buffer-to-d24x8 conversion
This commit is contained in:
parent
dd6cb054a7
commit
d167582f6b
@ -21,6 +21,7 @@ namespace gl
|
||||
bool ARB_shader_draw_parameters_supported = false;
|
||||
bool ARB_depth_buffer_float_supported = false;
|
||||
bool ARB_texture_barrier_supported = false;
|
||||
bool ARB_shader_stencil_export_supported = false;
|
||||
bool NV_texture_barrier_supported = false;
|
||||
bool NV_gpu_shader5_supported = false;
|
||||
bool AMD_gpu_shader_half_float_supported = false;
|
||||
@ -45,7 +46,7 @@ namespace gl
|
||||
|
||||
void initialize()
|
||||
{
|
||||
int find_count = 14;
|
||||
int find_count = 15;
|
||||
int ext_count = 0;
|
||||
glGetIntegerv(GL_NUM_EXTENSIONS, &ext_count);
|
||||
|
||||
@ -162,6 +163,13 @@ namespace gl
|
||||
find_count--;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (check(ext_name, "GL_ARB_shader_stencil_export"))
|
||||
{
|
||||
ARB_shader_stencil_export_supported = true;
|
||||
find_count--;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Check GL_VERSION and GL_RENDERER for the presence of Mesa
|
||||
|
@ -364,6 +364,7 @@ void GLGSRender::on_exit()
|
||||
// Globals
|
||||
// TODO: Move these
|
||||
gl::destroy_compute_tasks();
|
||||
gl::destroy_overlay_passes();
|
||||
|
||||
gl::destroy_global_texture_resources();
|
||||
|
||||
|
@ -402,6 +402,16 @@ namespace gl
|
||||
m_alignment = value;
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool get_swap_bytes() const
|
||||
{
|
||||
return m_swap_bytes;
|
||||
}
|
||||
|
||||
int get_row_length() const
|
||||
{
|
||||
return m_row_length;
|
||||
}
|
||||
};
|
||||
|
||||
class vao;
|
||||
|
@ -2,6 +2,19 @@
|
||||
|
||||
namespace gl
|
||||
{
|
||||
// Lame
|
||||
std::unordered_map<u32, std::unique_ptr<gl::overlay_pass>> g_overlay_passes;
|
||||
|
||||
void destroy_overlay_passes()
|
||||
{
|
||||
for (auto& [key, prog] : g_overlay_passes)
|
||||
{
|
||||
prog->destroy();
|
||||
}
|
||||
|
||||
g_overlay_passes.clear();
|
||||
}
|
||||
|
||||
void overlay_pass::create()
|
||||
{
|
||||
if (!compiled)
|
||||
@ -505,17 +518,8 @@ namespace gl
|
||||
video_out_calibration_pass::video_out_calibration_pass()
|
||||
{
|
||||
vs_src =
|
||||
"#version 420\n\n"
|
||||
"layout(location=0) out vec2 tc0;\n"
|
||||
"\n"
|
||||
"void main()\n"
|
||||
"{\n"
|
||||
" vec2 positions[] = {vec2(-1., -1.), vec2(1., -1.), vec2(-1., 1.), vec2(1., 1.)};\n"
|
||||
" vec2 coords[] = {vec2(0., 1.), vec2(1., 1.), vec2(0., 0.), vec2(1., 0.)};\n"
|
||||
" tc0 = coords[gl_VertexID % 4];\n"
|
||||
" vec2 pos = positions[gl_VertexID % 4];\n"
|
||||
" gl_Position = vec4(pos, 0., 1.);\n"
|
||||
"}\n";
|
||||
#include "../Program/GLSLSnippets/GenericVSPassthrough.glsl"
|
||||
;
|
||||
|
||||
fs_src =
|
||||
"#version 420\n\n"
|
||||
@ -578,4 +582,39 @@ namespace gl
|
||||
|
||||
overlay_pass::run(cmd, viewport, GL_NONE, false, false);
|
||||
}
|
||||
|
||||
rp_ssbo_to_d24x8_texture::rp_ssbo_to_d24x8_texture()
|
||||
{
|
||||
vs_src =
|
||||
#include "../Program/GLSLSnippets/GenericVSPassthrough.glsl"
|
||||
;
|
||||
|
||||
fs_src =
|
||||
#include "../Program/GLSLSnippets/CopyBufferToD24x8.glsl"
|
||||
;
|
||||
|
||||
std::pair<std::string_view, std::string> repl_list[] =
|
||||
{
|
||||
{ "%set, ", "" },
|
||||
{ "%loc", std::to_string(GL_COMPUTE_BUFFER_SLOT(0)) },
|
||||
{ "%push_block", fmt::format("binding=%d, std140", GL_COMPUTE_BUFFER_SLOT(1)) }
|
||||
};
|
||||
|
||||
fs_src = fmt::replace_all(fs_src, repl_list);
|
||||
}
|
||||
|
||||
void rp_ssbo_to_d24x8_texture::run(gl::command_context& cmd,
|
||||
const buffer* src, const texture* dst,
|
||||
const u32 src_offset, const coordu& dst_region,
|
||||
const pixel_unpack_settings& settings)
|
||||
{
|
||||
const int row_length = settings.get_row_length();
|
||||
program_handle.uniforms["src_pitch"] = row_length ? row_length : static_cast<int>(dst_region.width);
|
||||
program_handle.uniforms["swap_bytes"] = settings.get_swap_bytes() ? 1 : 0;
|
||||
src->bind_range(GL_COMPUTE_BUFFER_SLOT(0), src_offset, row_length * dst_region.height);
|
||||
|
||||
cmd->stencil_mask(0xFF);
|
||||
|
||||
overlay_pass::run(cmd, dst_region, dst->id(), true);
|
||||
}
|
||||
}
|
||||
|
@ -108,4 +108,30 @@ namespace gl
|
||||
|
||||
void run(gl::command_context& cmd, const areau& viewport, const rsx::simple_array<GLuint>& source, f32 gamma, bool limited_rgb, bool _3d);
|
||||
};
|
||||
|
||||
struct rp_ssbo_to_d24x8_texture : public overlay_pass
|
||||
{
|
||||
rp_ssbo_to_d24x8_texture();
|
||||
void run(gl::command_context& cmd, const buffer* src, const texture* dst, const u32 src_offset, const coordu& dst_region, const pixel_unpack_settings& settings);
|
||||
};
|
||||
|
||||
// TODO: Replace with a proper manager
|
||||
extern std::unordered_map<u32, std::unique_ptr<gl::overlay_pass>> g_overlay_passes;
|
||||
|
||||
template<class T>
|
||||
T* get_overlay_pass()
|
||||
{
|
||||
u32 index = id_manager::typeinfo::get_index<T>();
|
||||
auto &e = g_overlay_passes[index];
|
||||
|
||||
if (!e)
|
||||
{
|
||||
e = std::make_unique<T>();
|
||||
e->create();
|
||||
}
|
||||
|
||||
return static_cast<T*>(e.get());
|
||||
}
|
||||
|
||||
void destroy_overlay_passes();
|
||||
}
|
||||
|
@ -2,6 +2,7 @@
|
||||
#include "GLTexture.h"
|
||||
#include "GLCompute.h"
|
||||
#include "GLRenderTargets.h"
|
||||
#include "GLOverlays.h"
|
||||
#include "../GCM.h"
|
||||
#include "../RSXThread.h"
|
||||
#include "../RSXTexture.h"
|
||||
@ -622,16 +623,36 @@ namespace gl
|
||||
fmt::throw_exception("Invalid depth/stencil type 0x%x", unpack_info.type);
|
||||
}
|
||||
|
||||
if (!skip_barrier)
|
||||
const auto caps = gl::get_driver_caps();
|
||||
if (dst->get_internal_format() == gl::texture::internal_format::depth24_stencil8 &&
|
||||
dst->get_target() == gl::texture::target::texture2D && // Only 2D output supported for the moment.
|
||||
!caps.vendor_NVIDIA && // NVIDIA has native support for D24X8 data as they introduced this extension.
|
||||
caps.ARB_shader_stencil_export_supported) // The driver needs to support stencil export at the very least
|
||||
{
|
||||
glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT);
|
||||
// This optimized path handles the data load on the GPU without context switching to compute.
|
||||
// The upside is that it is very fast if you have headroom.
|
||||
// The downside is that it is linear. Not that it matters that much as most drivers seem to be downloading the entire data source and doing really slow things with it.
|
||||
if (!skip_barrier)
|
||||
{
|
||||
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
|
||||
}
|
||||
|
||||
auto pass = gl::get_overlay_pass<gl::rp_ssbo_to_d24x8_texture>();
|
||||
pass->run(cmd, transfer_buf, dst, out_offset, {{dst_region.x, dst_region.y}, {dst_region.width, dst_region.height}}, {});
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!skip_barrier)
|
||||
{
|
||||
glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT);
|
||||
}
|
||||
|
||||
glBindBuffer(GL_SHADER_STORAGE_BUFFER, GL_NONE);
|
||||
transfer_buf->bind(buffer::target::pixel_unpack);
|
||||
glBindBuffer(GL_SHADER_STORAGE_BUFFER, GL_NONE);
|
||||
transfer_buf->bind(buffer::target::pixel_unpack);
|
||||
|
||||
dst->copy_from(reinterpret_cast<void*>(u64(out_offset)), static_cast<texture::format>(unpack_info.format),
|
||||
static_cast<texture::type>(unpack_info.type), dst_level, dst_region, {});
|
||||
dst->copy_from(reinterpret_cast<void*>(u64(out_offset)), static_cast<texture::format>(unpack_info.format),
|
||||
static_cast<texture::type>(unpack_info.type), dst_level, dst_region, {});
|
||||
}
|
||||
|
||||
if (scratch_mem) scratch_mem.remove();
|
||||
}
|
||||
|
48
rpcs3/Emu/RSX/Program/GLSLSnippets/CopyBufferToD24x8.glsl
Normal file
48
rpcs3/Emu/RSX/Program/GLSLSnippets/CopyBufferToD24x8.glsl
Normal file
@ -0,0 +1,48 @@
|
||||
R"(
|
||||
#version 430
|
||||
#extension GL_ARB_shader_stencil_export : enable
|
||||
|
||||
layout(%set, binding=%loc) readonly restrict buffer RawDataBlock
|
||||
{
|
||||
uint data[];
|
||||
};
|
||||
|
||||
#if USE_UBO
|
||||
layout(%push_block) uniform UnpackConfiguration
|
||||
{
|
||||
uint swap_bytes;
|
||||
uint src_pitch;
|
||||
};
|
||||
#else
|
||||
uniform int swap_bytes;
|
||||
uniform int src_pitch;
|
||||
#endif
|
||||
|
||||
int getDataOffset()
|
||||
{
|
||||
const ivec2 coords = ivec2(gl_FragCoord.xy);
|
||||
return coords.y * src_pitch + coords.x;
|
||||
}
|
||||
|
||||
void main()
|
||||
{
|
||||
const int virtual_address = getDataOffset();
|
||||
uint real_data = data[virtual_address];
|
||||
|
||||
const uint stencil_byte = bitfieldExtract(real_data, 0, 8);
|
||||
uint depth_bytes;
|
||||
|
||||
if (swap_bytes > 0)
|
||||
{
|
||||
// CCBBAA00 -> 00AABBCC -> AABBCC. Stencil byte does not actually move
|
||||
depth_bytes = bitfieldExtract(real_data, 24, 8) | (bitfieldExtract(real_data, 16, 8) << 8) | (bitfieldExtract(real_data, 8, 8) << 24);
|
||||
}
|
||||
else
|
||||
{
|
||||
depth_bytes = bitfieldExtract(real_data, 8, 24);
|
||||
}
|
||||
|
||||
gl_FragDepth = float(depth_bytes) / 0xffffff;
|
||||
gl_FragStencilRefARB = int(stencil_byte);
|
||||
}
|
||||
)"
|
13
rpcs3/Emu/RSX/Program/GLSLSnippets/GenericVSPassthrough.glsl
Normal file
13
rpcs3/Emu/RSX/Program/GLSLSnippets/GenericVSPassthrough.glsl
Normal file
@ -0,0 +1,13 @@
|
||||
R"(
|
||||
#version 420
|
||||
layout(location=0) out vec2 tc0;
|
||||
|
||||
void main()
|
||||
{
|
||||
vec2 positions[] = {vec2(-1., -1.), vec2(1., -1.), vec2(-1., 1.), vec2(1., 1.)};
|
||||
vec2 coords[] = {vec2(0., 1.), vec2(1., 1.), vec2(0., 0.), vec2(1., 0.)};
|
||||
tc0 = coords[gl_VertexID % 4];
|
||||
vec2 pos = positions[gl_VertexID % 4];
|
||||
gl_Position = vec4(pos, 0., 1.);
|
||||
}
|
||||
)"
|
Loading…
x
Reference in New Issue
Block a user