mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-02-06 00:40:11 +00:00
gl: Deswizzle textures on the GPU
This commit is contained in:
parent
532563e861
commit
60a2a39e88
@ -102,46 +102,19 @@ namespace gl
|
||||
kernel_size = _kernel_size? _kernel_size : optimal_kernel_size;
|
||||
|
||||
m_src =
|
||||
"#version 430\n"
|
||||
"layout(local_size_x=%ws, local_size_y=1, local_size_z=1) in;\n"
|
||||
"layout(binding=%loc, std430) buffer ssbo{ uint data[]; };\n"
|
||||
"%ub"
|
||||
"\n"
|
||||
"#define KERNEL_SIZE %ks\n"
|
||||
"\n"
|
||||
"// Generic swap routines\n"
|
||||
"#define bswap_u16(bits) (bits & 0xFF) << 8 | (bits & 0xFF00) >> 8 | (bits & 0xFF0000) << 8 | (bits & 0xFF000000) >> 8\n"
|
||||
"#define bswap_u32(bits) (bits & 0xFF) << 24 | (bits & 0xFF00) << 8 | (bits & 0xFF0000) >> 8 | (bits & 0xFF000000) >> 24\n"
|
||||
"#define bswap_u16_u32(bits) (bits & 0xFFFF) << 16 | (bits & 0xFFFF0000) >> 16\n"
|
||||
"\n"
|
||||
"// Depth format conversions\n"
|
||||
"#define d24f_to_f32(bits) (bits << 7)\n"
|
||||
"#define f32_to_d24f(bits) (bits >> 7)\n"
|
||||
"\n"
|
||||
"uint linear_invocation_id()\n"
|
||||
"{\n"
|
||||
" uint size_in_x = (gl_NumWorkGroups.x * gl_WorkGroupSize.x);\n"
|
||||
" return (gl_GlobalInvocationID.y * size_in_x) + gl_GlobalInvocationID.x;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"%md"
|
||||
"void main()\n"
|
||||
"{\n"
|
||||
" uint invocation_id = linear_invocation_id();\n"
|
||||
" uint index = invocation_id * KERNEL_SIZE;\n"
|
||||
" uint value;\n"
|
||||
" %vars"
|
||||
"\n";
|
||||
#include "../Program/GLSLSnippets/ShuffleBytes.glsl"
|
||||
;
|
||||
|
||||
const std::pair<std::string_view, std::string> syntax_replace[] =
|
||||
{
|
||||
{ "%set, ", ""},
|
||||
{ "%loc", std::to_string(GL_COMPUTE_BUFFER_SLOT(0)) },
|
||||
{ "%ws", std::to_string(optimal_group_size) },
|
||||
{ "%ks", std::to_string(kernel_size) },
|
||||
{ "%vars", variables },
|
||||
{ "%f", function_name },
|
||||
{ "%ub", uniforms },
|
||||
{ "%md", method_declarations }
|
||||
{ "%md", method_declarations },
|
||||
};
|
||||
|
||||
m_src = fmt::replace_all(m_src, syntax_replace);
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
#include "Emu/IdManager.h"
|
||||
#include "GLHelpers.h"
|
||||
#include "../rsx_utils.h"
|
||||
|
||||
#include <unordered_map>
|
||||
|
||||
@ -226,6 +227,116 @@ namespace gl
|
||||
}
|
||||
};
|
||||
|
||||
// Reverse morton-order block arrangement
|
||||
template <typename _BlockType, typename _BaseType, bool _SwapBytes>
|
||||
struct cs_deswizzle_3d : compute_task
|
||||
{
|
||||
union params_t
|
||||
{
|
||||
u32 data[7];
|
||||
|
||||
struct
|
||||
{
|
||||
u32 width;
|
||||
u32 height;
|
||||
u32 depth;
|
||||
u32 logw;
|
||||
u32 logh;
|
||||
u32 logd;
|
||||
u32 mipmaps;
|
||||
};
|
||||
}
|
||||
params;
|
||||
|
||||
gl::buffer param_buffer;
|
||||
|
||||
const gl::buffer* src_buffer = nullptr;
|
||||
const gl::buffer* dst_buffer = nullptr;
|
||||
u32 in_offset = 0;
|
||||
u32 out_offset = 0;
|
||||
u32 block_length = 0;
|
||||
|
||||
cs_deswizzle_3d()
|
||||
{
|
||||
ensure((sizeof(_BlockType) & 3) == 0); // "Unsupported block type"
|
||||
|
||||
m_src =
|
||||
#include "../Program/GLSLSnippets/GPUDeswizzle.glsl"
|
||||
;
|
||||
|
||||
std::string transform;
|
||||
if constexpr (_SwapBytes)
|
||||
{
|
||||
if constexpr (sizeof(_BaseType) == 4)
|
||||
{
|
||||
transform = "bswap_u32";
|
||||
}
|
||||
else if constexpr (sizeof(_BaseType) == 2)
|
||||
{
|
||||
transform = "bswap_u16";
|
||||
}
|
||||
else
|
||||
{
|
||||
fmt::throw_exception("Unreachable");
|
||||
}
|
||||
}
|
||||
|
||||
const std::pair<std::string_view, std::string> syntax_replace[] =
|
||||
{
|
||||
{ "%set, ", ""},
|
||||
{ "%loc", std::to_string(GL_COMPUTE_BUFFER_SLOT(0))},
|
||||
{ "%push_block", fmt::format("binding=%d, std140", GL_COMPUTE_BUFFER_SLOT(2)) },
|
||||
{ "%ws", std::to_string(optimal_group_size) },
|
||||
{ "%_wordcount", std::to_string(sizeof(_BlockType) / 4) },
|
||||
{ "%f", transform }
|
||||
};
|
||||
|
||||
m_src = fmt::replace_all(m_src, syntax_replace);
|
||||
|
||||
param_buffer.create(gl::buffer::target::uniform, 32, nullptr, gl::buffer::memory_type::local, GL_DYNAMIC_COPY);
|
||||
}
|
||||
|
||||
~cs_deswizzle_3d()
|
||||
{
|
||||
param_buffer.remove();
|
||||
}
|
||||
|
||||
void bind_resources() override
|
||||
{
|
||||
src_buffer->bind_range(gl::buffer::target::ssbo, GL_COMPUTE_BUFFER_SLOT(0), in_offset, block_length);
|
||||
dst_buffer->bind_range(gl::buffer::target::ssbo, GL_COMPUTE_BUFFER_SLOT(1), in_offset, block_length);
|
||||
param_buffer.bind_range(gl::buffer::target::uniform, GL_COMPUTE_BUFFER_SLOT(2), 0, sizeof(params));
|
||||
}
|
||||
|
||||
void set_parameters(gl::command_context& /*cmd*/)
|
||||
{
|
||||
param_buffer.sub_data(0, sizeof(params), params.data);
|
||||
}
|
||||
|
||||
void run(gl::command_context& cmd, const gl::buffer* dst, u32 out_offset, const gl::buffer* src, u32 in_offset, u32 data_length, u32 width, u32 height, u32 depth, u32 mipmaps)
|
||||
{
|
||||
dst_buffer = dst;
|
||||
src_buffer = src;
|
||||
|
||||
this->in_offset = in_offset;
|
||||
this->out_offset = out_offset;
|
||||
this->block_length = data_length;
|
||||
|
||||
params.width = width;
|
||||
params.height = height;
|
||||
params.depth = depth;
|
||||
params.mipmaps = mipmaps;
|
||||
params.logw = rsx::ceil_log2(width);
|
||||
params.logh = rsx::ceil_log2(height);
|
||||
params.logd = rsx::ceil_log2(depth);
|
||||
set_parameters(cmd);
|
||||
|
||||
const u32 num_bytes_per_invocation = (sizeof(_BlockType) * optimal_group_size);
|
||||
const u32 linear_invocations = utils::aligned_div(data_length, num_bytes_per_invocation);
|
||||
compute_task::run(cmd, linear_invocations);
|
||||
}
|
||||
};
|
||||
|
||||
// TODO: Replace with a proper manager
|
||||
extern std::unordered_map<u32, std::unique_ptr<gl::compute_task>> g_compute_tasks;
|
||||
|
||||
|
@ -495,6 +495,8 @@ void GLGSRender::emit_geometry(u32 sub_index)
|
||||
|
||||
m_frame_stats.vertex_upload_time += m_profiler.duration();
|
||||
|
||||
gl_state.use_program(m_program->id());
|
||||
|
||||
if (!upload_info.index_info)
|
||||
{
|
||||
if (draw_call.is_single_draw())
|
||||
|
@ -762,8 +762,6 @@ void GLGSRender::load_program_env()
|
||||
const bool update_instruction_buffers = (!!m_interpreter_state && m_shader_interpreter.is_interpreter(m_program));
|
||||
const bool update_raster_env = (rsx::method_registers.polygon_stipple_enabled() && !!(m_graphics_state & rsx::pipeline_state::polygon_stipple_pattern_dirty));
|
||||
|
||||
gl_state.use_program(m_program->id());
|
||||
|
||||
if (manually_flush_ring_buffers)
|
||||
{
|
||||
if (update_fragment_env) m_fragment_env_buffer->reserve_storage_on_heap(128);
|
||||
@ -801,13 +799,13 @@ void GLGSRender::load_program_env()
|
||||
const usz transform_constants_size = (!m_vertex_prog || m_vertex_prog->has_indexed_constants) ? 8192 : m_vertex_prog->constant_ids.size() * 16;
|
||||
if (transform_constants_size)
|
||||
{
|
||||
auto mapping = m_transform_constants_buffer->alloc_from_heap(transform_constants_size, m_uniform_buffer_offset_align);
|
||||
auto mapping = m_transform_constants_buffer->alloc_from_heap(static_cast<u32>(transform_constants_size), m_uniform_buffer_offset_align);
|
||||
auto buf = static_cast<u8*>(mapping.first);
|
||||
|
||||
const std::vector<u16>& constant_ids = (transform_constants_size == 8192) ? std::vector<u16>{} : m_vertex_prog->constant_ids;
|
||||
fill_vertex_program_constants_data(buf, constant_ids);
|
||||
|
||||
m_transform_constants_buffer->bind_range(GL_VERTEX_CONSTANT_BUFFERS_BIND_SLOT, mapping.second, transform_constants_size);
|
||||
m_transform_constants_buffer->bind_range(GL_VERTEX_CONSTANT_BUFFERS_BIND_SLOT, mapping.second, static_cast<u32>(transform_constants_size));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -252,7 +252,7 @@ namespace gl
|
||||
void fbo::draw_buffers(const std::initializer_list<attachment>& indexes) const
|
||||
{
|
||||
rsx::simple_array<GLenum> ids;
|
||||
ids.reserve(indexes.size());
|
||||
ids.reserve(::size32(indexes));
|
||||
|
||||
for (auto &index : indexes)
|
||||
ids.push_back(index.id());
|
||||
|
@ -613,6 +613,14 @@ namespace gl
|
||||
fmt::throw_exception("Unsupported buffer usage 0x%x", usage);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Local memory hints
|
||||
if (usage == GL_DYNAMIC_COPY)
|
||||
{
|
||||
flags |= GL_DYNAMIC_STORAGE_BIT;
|
||||
}
|
||||
}
|
||||
|
||||
if ((flags & GL_MAP_READ_BIT) && !caps.vendor_AMD)
|
||||
{
|
||||
@ -624,7 +632,6 @@ namespace gl
|
||||
flags |= GL_CLIENT_STORAGE_BIT;
|
||||
}
|
||||
|
||||
save_binding_state save(current_target(), *this);
|
||||
DSA_CALL2(NamedBufferStorage, m_id, size, data_, flags);
|
||||
m_size = size;
|
||||
}
|
||||
@ -674,6 +681,7 @@ namespace gl
|
||||
void create()
|
||||
{
|
||||
glGenBuffers(1, &m_id);
|
||||
save_binding_state save(current_target(), *this);
|
||||
}
|
||||
|
||||
void create(GLsizeiptr size, const void* data_ = nullptr, memory_type type = memory_type::local, GLenum usage = GL_STREAM_DRAW)
|
||||
@ -684,8 +692,9 @@ namespace gl
|
||||
|
||||
void create(target target_, GLsizeiptr size, const void* data_ = nullptr, memory_type type = memory_type::local, GLenum usage = GL_STREAM_DRAW)
|
||||
{
|
||||
create();
|
||||
m_target = target_;
|
||||
|
||||
create();
|
||||
allocate(size, data_, type, usage);
|
||||
}
|
||||
|
||||
@ -748,7 +757,7 @@ namespace gl
|
||||
|
||||
void sub_data(GLsizeiptr offset, GLsizeiptr length, GLvoid* data)
|
||||
{
|
||||
ensure(m_memory_type != memory_type::local);
|
||||
ensure(m_memory_type == memory_type::local);
|
||||
DSA_CALL2(NamedBufferSubData, m_id, offset, length, data);
|
||||
}
|
||||
|
||||
|
@ -19,7 +19,6 @@ namespace gl
|
||||
|
||||
gl::vao m_vao;
|
||||
gl::buffer m_text_buffer;
|
||||
gl::buffer m_scale_offsets_buffer;
|
||||
std::unordered_map<u8, std::pair<u32, u32>> m_offsets;
|
||||
|
||||
bool initialized = false;
|
||||
@ -87,19 +86,14 @@ namespace gl
|
||||
|
||||
void init()
|
||||
{
|
||||
m_text_buffer.create();
|
||||
m_scale_offsets_buffer.create();
|
||||
|
||||
GlyphManager glyph_source;
|
||||
auto points = glyph_source.generate_point_map();
|
||||
|
||||
const usz buffer_size = points.size() * sizeof(GlyphManager::glyph_point);
|
||||
|
||||
m_text_buffer.data(buffer_size, points.data());
|
||||
m_text_buffer.create(gl::buffer::target::array, buffer_size, points.data(), gl::buffer::memory_type::host_visible);
|
||||
m_offsets = glyph_source.get_glyph_offsets();
|
||||
|
||||
m_scale_offsets_buffer.data(512 * 4 * sizeof(float));
|
||||
|
||||
//Init VAO
|
||||
int old_vao;
|
||||
glGetIntegerv(GL_VERTEX_ARRAY_BINDING, &old_vao);
|
||||
@ -198,7 +192,6 @@ namespace gl
|
||||
{
|
||||
if (initialized)
|
||||
{
|
||||
m_scale_offsets_buffer.remove();
|
||||
m_text_buffer.remove();
|
||||
m_vao.remove();
|
||||
|
||||
|
@ -21,13 +21,13 @@ namespace gl
|
||||
|
||||
std::pair<buffer*, buffer*> prepare_compute_resources(usz staging_data_length)
|
||||
{
|
||||
if (g_upload_transfer_buffer.size() < staging_data_length)
|
||||
if (g_upload_transfer_buffer.size() < static_cast<GLsizeiptr>(staging_data_length))
|
||||
{
|
||||
g_upload_transfer_buffer.remove();
|
||||
g_upload_transfer_buffer.create(staging_data_length, nullptr, buffer::memory_type::host_visible, GL_STREAM_DRAW);
|
||||
}
|
||||
|
||||
if (g_compute_decode_buffer.size() < staging_data_length * 3)
|
||||
if (g_compute_decode_buffer.size() < static_cast<GLsizeiptr>(staging_data_length) * 3)
|
||||
{
|
||||
g_compute_decode_buffer.remove();
|
||||
g_compute_decode_buffer.create(std::max<GLsizeiptr>(512, staging_data_length * 3), nullptr, buffer::memory_type::local, GL_STATIC_COPY);
|
||||
@ -43,6 +43,31 @@ namespace gl
|
||||
g_compute_decode_buffer.remove();
|
||||
}
|
||||
|
||||
template <typename WordType, bool SwapBytes>
|
||||
void do_deswizzle_transformation(gl::command_context& cmd, u32 block_size, buffer* dst, buffer* src, u32 data_length, u16 width, u16 height, u16 depth)
|
||||
{
|
||||
switch (block_size)
|
||||
{
|
||||
case 4:
|
||||
gl::get_compute_task<gl::cs_deswizzle_3d<u32, WordType, SwapBytes>>()->run(
|
||||
cmd, dst, 0, src, 0,
|
||||
data_length, width, height, depth, 1);
|
||||
break;
|
||||
case 8:
|
||||
gl::get_compute_task<gl::cs_deswizzle_3d<u64, WordType, SwapBytes>>()->run(
|
||||
cmd, dst, 0, src, 0,
|
||||
data_length, width, height, depth, 1);
|
||||
break;
|
||||
case 16:
|
||||
gl::get_compute_task<gl::cs_deswizzle_3d<u128, WordType, SwapBytes>>()->run(
|
||||
cmd, dst, 0, src, 0,
|
||||
data_length, width, height, depth, 1);
|
||||
break;
|
||||
default:
|
||||
fmt::throw_exception("Unreachable");
|
||||
}
|
||||
}
|
||||
|
||||
GLenum get_target(rsx::texture_dimension_extended type)
|
||||
{
|
||||
switch (type)
|
||||
@ -623,11 +648,12 @@ namespace gl
|
||||
const std::vector<rsx::subresource_layout> &input_layouts,
|
||||
bool is_swizzled, GLenum gl_format, GLenum gl_type, std::vector<std::byte>& staging_buffer)
|
||||
{
|
||||
const auto driver_caps = gl::get_driver_caps();
|
||||
rsx::texture_uploader_capabilities caps
|
||||
{
|
||||
.supports_byteswap = true,
|
||||
.supports_vtc_decoding = false,
|
||||
.supports_hw_deswizzle = false,
|
||||
.supports_hw_deswizzle = driver_caps.ARB_compute_shader_supported,
|
||||
.supports_zero_copy = false,
|
||||
.alignment = 4
|
||||
};
|
||||
@ -635,9 +661,12 @@ namespace gl
|
||||
pixel_unpack_settings unpack_settings;
|
||||
unpack_settings.row_length(0).alignment(4);
|
||||
|
||||
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, GL_NONE);
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE);
|
||||
|
||||
if (rsx::is_compressed_host_format(format)) [[likely]]
|
||||
{
|
||||
caps.supports_vtc_decoding = gl::get_driver_caps().vendor_NVIDIA;
|
||||
caps.supports_vtc_decoding = driver_caps.vendor_NVIDIA;
|
||||
unpack_settings.apply();
|
||||
|
||||
glBindTexture(static_cast<GLenum>(dst->get_target()), dst->id());
|
||||
@ -688,7 +717,7 @@ namespace gl
|
||||
else
|
||||
{
|
||||
bool apply_settings = true;
|
||||
bool use_compute_transform = false;
|
||||
bool use_compute_transform = is_swizzled;
|
||||
buffer *upload_scratch_mem = nullptr, *compute_scratch_mem = nullptr;
|
||||
image_memory_requirements mem_info;
|
||||
pixel_buffer_layout mem_layout;
|
||||
@ -698,6 +727,8 @@ namespace gl
|
||||
u8 block_size_in_bytes = rsx::get_format_block_size_in_bytes(format);
|
||||
u64 image_linear_size;
|
||||
|
||||
gl::buffer deswizzle_buf;
|
||||
|
||||
switch (gl_type)
|
||||
{
|
||||
case GL_BYTE:
|
||||
@ -710,8 +741,6 @@ namespace gl
|
||||
case GL_FLOAT:
|
||||
case GL_UNSIGNED_INT_24_8:
|
||||
case GL_FLOAT_32_UNSIGNED_INT_24_8_REV:
|
||||
mem_layout.format = gl_format;
|
||||
mem_layout.type = gl_type;
|
||||
mem_layout.swap_bytes = true;
|
||||
mem_layout.size = 4;
|
||||
use_compute_transform = true;
|
||||
@ -747,14 +776,61 @@ namespace gl
|
||||
|
||||
if (use_compute_transform)
|
||||
{
|
||||
// 0. Preconf
|
||||
mem_layout.swap_bytes = op.require_swap;
|
||||
mem_layout.format = gl_format;
|
||||
mem_layout.type = gl_type;
|
||||
|
||||
// 1. Unmap buffer
|
||||
upload_scratch_mem->unmap();
|
||||
|
||||
// 2. Upload memory to GPU
|
||||
upload_scratch_mem->copy_to(compute_scratch_mem, 0, 0, image_linear_size);
|
||||
if (!op.require_deswizzle)
|
||||
{
|
||||
upload_scratch_mem->copy_to(compute_scratch_mem, 0, 0, image_linear_size);
|
||||
}
|
||||
else
|
||||
{
|
||||
// 2.1 Copy data to deswizzle buf
|
||||
if (deswizzle_buf.size() < image_linear_size)
|
||||
{
|
||||
deswizzle_buf.remove();
|
||||
deswizzle_buf.create(gl::buffer::target::ssbo, image_linear_size, nullptr, gl::buffer::memory_type::local);
|
||||
}
|
||||
|
||||
upload_scratch_mem->copy_to(&deswizzle_buf, 0, 0, image_linear_size);
|
||||
|
||||
// 2.2 Apply compute transform to deswizzle input and dump it in compute_scratch_mem
|
||||
ensure(op.element_size == 2 || op.element_size == 4);
|
||||
const auto block_size = op.element_size * op.block_length;
|
||||
|
||||
if (op.require_swap)
|
||||
{
|
||||
mem_layout.swap_bytes = false;
|
||||
|
||||
if (op.element_size == 4) [[ likely ]]
|
||||
{
|
||||
do_deswizzle_transformation<u32, true>(cmd, block_size, compute_scratch_mem, &deswizzle_buf, image_linear_size, layout.width_in_texel, layout.height_in_texel, layout.depth);
|
||||
}
|
||||
else
|
||||
{
|
||||
do_deswizzle_transformation<u16, true>(cmd, block_size, compute_scratch_mem, &deswizzle_buf, image_linear_size, layout.width_in_texel, layout.height_in_texel, layout.depth);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (op.element_size == 4) [[ likely ]]
|
||||
{
|
||||
do_deswizzle_transformation<u32, false>(cmd, block_size, compute_scratch_mem, &deswizzle_buf, image_linear_size, layout.width_in_texel, layout.height_in_texel, layout.depth);
|
||||
}
|
||||
else
|
||||
{
|
||||
do_deswizzle_transformation<u16, false>(cmd, block_size, compute_scratch_mem, &deswizzle_buf, image_linear_size, layout.width_in_texel, layout.height_in_texel, layout.depth);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Update configuration
|
||||
mem_layout.swap_bytes = op.require_swap;
|
||||
mem_info.image_size_in_texels = image_linear_size / block_size_in_bytes;
|
||||
mem_info.image_size_in_bytes = image_linear_size;
|
||||
mem_info.memory_required = 0;
|
||||
@ -773,6 +849,8 @@ namespace gl
|
||||
dst->copy_from(out_pointer, static_cast<texture::format>(gl_format), static_cast<texture::type>(gl_type), layout.level, region, unpack_settings);
|
||||
}
|
||||
}
|
||||
|
||||
deswizzle_buf.remove();
|
||||
}
|
||||
}
|
||||
|
||||
|
130
rpcs3/Emu/RSX/Program/GLSLSnippets/GPUDeswizzle.glsl
Normal file
130
rpcs3/Emu/RSX/Program/GLSLSnippets/GPUDeswizzle.glsl
Normal file
@ -0,0 +1,130 @@
|
||||
R"(
|
||||
#version 450
|
||||
|
||||
#define SSBO_BASE_LOCATION %loc
|
||||
#define SSBO(x) (SSBO_BASE_LOCATION + x)
|
||||
|
||||
layout(local_size_x = %ws, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
layout(%set, binding=SSBO(0), std430) buffer ssbo0{ uint data_in[]; };
|
||||
layout(%set, binding=SSBO(1), std430) buffer ssbo1{ uint data_out[]; };
|
||||
layout(%push_block) uniform parameters
|
||||
{
|
||||
uint image_width;
|
||||
uint image_height;
|
||||
uint image_depth;
|
||||
uint image_logw;
|
||||
uint image_logh;
|
||||
uint image_logd;
|
||||
uint lod_count;
|
||||
};
|
||||
|
||||
struct invocation_properties
|
||||
{
|
||||
uint data_offset;
|
||||
uvec3 size;
|
||||
uvec3 size_log2;
|
||||
};
|
||||
|
||||
#define bswap_u16(bits) (bits & 0xFF) << 8 | (bits & 0xFF00) >> 8 | (bits & 0xFF0000) << 8 | (bits & 0xFF000000) >> 8
|
||||
#define bswap_u32(bits) (bits & 0xFF) << 24 | (bits & 0xFF00) << 8 | (bits & 0xFF0000) >> 8 | (bits & 0xFF000000) >> 24
|
||||
|
||||
invocation_properties invocation;
|
||||
|
||||
bool init_invocation_properties(const in uint offset)
|
||||
{
|
||||
invocation.data_offset = 0;
|
||||
invocation.size.x = image_width;
|
||||
invocation.size.y = image_height;
|
||||
invocation.size.z = image_depth;
|
||||
invocation.size_log2.x = image_logw;
|
||||
invocation.size_log2.y = image_logh;
|
||||
invocation.size_log2.z = image_logd;
|
||||
uint level_end = image_width * image_height * image_depth;
|
||||
uint level = 1;
|
||||
|
||||
while (offset >= level_end && level < lod_count)
|
||||
{
|
||||
invocation.data_offset = level_end;
|
||||
invocation.size.xy /= 2;
|
||||
invocation.size.xy = max(invocation.size.xy, uvec2(1));
|
||||
invocation.size_log2.xy = max(invocation.size_log2.xy, uvec2(1));
|
||||
invocation.size_log2.xy --;
|
||||
level_end += (invocation.size.x * invocation.size.y * image_depth);
|
||||
level++;
|
||||
}
|
||||
|
||||
return (offset < level_end);
|
||||
}
|
||||
|
||||
uint get_z_index(const in uint x_, const in uint y_, const in uint z_)
|
||||
{
|
||||
uint offset = 0;
|
||||
uint shift = 0;
|
||||
uint x = x_;
|
||||
uint y = y_;
|
||||
uint z = z_;
|
||||
uint log2w = invocation.size_log2.x;
|
||||
uint log2h = invocation.size_log2.y;
|
||||
uint log2d = invocation.size_log2.z;
|
||||
|
||||
do
|
||||
{
|
||||
if (log2w > 0)
|
||||
{
|
||||
offset |= (x & 1) << shift;
|
||||
shift++;
|
||||
x >>= 1;
|
||||
log2w--;
|
||||
}
|
||||
|
||||
if (log2h > 0)
|
||||
{
|
||||
offset |= (y & 1) << shift;
|
||||
shift++;
|
||||
y >>= 1;
|
||||
log2h--;
|
||||
}
|
||||
|
||||
if (log2d > 0)
|
||||
{
|
||||
offset |= (z & 1) << shift;
|
||||
shift++;
|
||||
z >>= 1;
|
||||
log2d--;
|
||||
}
|
||||
}
|
||||
while(x > 0 || y > 0 || z > 0);
|
||||
|
||||
return offset;
|
||||
}
|
||||
|
||||
void main()
|
||||
{
|
||||
uint invocations_x = (gl_NumWorkGroups.x * gl_WorkGroupSize.x);
|
||||
uint texel_id = (gl_GlobalInvocationID.y * invocations_x) + gl_GlobalInvocationID.x;
|
||||
uint word_count = %_wordcount;
|
||||
|
||||
if (!init_invocation_properties(texel_id))
|
||||
return;
|
||||
|
||||
// Calculations done in texels, not bytes
|
||||
uint row_length = invocation.size.x;
|
||||
uint slice_length = (invocation.size.y * row_length);
|
||||
uint level_offset = (texel_id - invocation.data_offset);
|
||||
uint slice_offset = (level_offset % slice_length);
|
||||
uint z = (level_offset / slice_length);
|
||||
uint y = (slice_offset / row_length);
|
||||
uint x = (slice_offset % row_length);
|
||||
|
||||
uint src_texel_id = get_z_index(x, y, z);
|
||||
uint dst_id = (texel_id * word_count);
|
||||
uint src_id = (src_texel_id + invocation.data_offset) * word_count;
|
||||
|
||||
for (uint i = 0; i < word_count; ++i)
|
||||
{
|
||||
uint value = data_in[src_id++];
|
||||
data_out[dst_id++] = %f(value);
|
||||
}
|
||||
}
|
||||
)"
|
32
rpcs3/Emu/RSX/Program/GLSLSnippets/ShuffleBytes.glsl
Normal file
32
rpcs3/Emu/RSX/Program/GLSLSnippets/ShuffleBytes.glsl
Normal file
@ -0,0 +1,32 @@
|
||||
R"(
|
||||
#version 430
|
||||
layout(local_size_x=%ws, local_size_y=1, local_size_z=1) in;
|
||||
layout(%set, binding=%loc, std430) buffer ssbo{ uint data[]; };
|
||||
%ub
|
||||
|
||||
#define KERNEL_SIZE %ks
|
||||
|
||||
// Generic swap routines
|
||||
#define bswap_u16(bits) (bits & 0xFF) << 8 | (bits & 0xFF00) >> 8 | (bits & 0xFF0000) << 8 | (bits & 0xFF000000) >> 8
|
||||
#define bswap_u32(bits) (bits & 0xFF) << 24 | (bits & 0xFF00) << 8 | (bits & 0xFF0000) >> 8 | (bits & 0xFF000000) >> 24
|
||||
#define bswap_u16_u32(bits) (bits & 0xFFFF) << 16 | (bits & 0xFFFF0000) >> 16
|
||||
|
||||
// Depth format conversions
|
||||
#define d24f_to_f32(bits) (bits << 7)
|
||||
#define f32_to_d24f(bits) (bits >> 7)
|
||||
|
||||
uint linear_invocation_id()
|
||||
{
|
||||
uint size_in_x = (gl_NumWorkGroups.x * gl_WorkGroupSize.x);
|
||||
return (gl_GlobalInvocationID.y * size_in_x) + gl_GlobalInvocationID.x;
|
||||
}
|
||||
|
||||
%md
|
||||
void main()
|
||||
{
|
||||
uint invocation_id = linear_invocation_id();
|
||||
uint index = invocation_id * KERNEL_SIZE;
|
||||
uint value;
|
||||
%vars
|
||||
|
||||
)"
|
@ -24,7 +24,7 @@ namespace program_common
|
||||
static std::string get_vertex_interpreter()
|
||||
{
|
||||
const char* s =
|
||||
#include "../Common/Interpreter/VertexInterpreter.glsl"
|
||||
#include "../Program/GLSLInterpreter/VertexInterpreter.glsl"
|
||||
;
|
||||
return s;
|
||||
}
|
||||
@ -32,7 +32,7 @@ namespace program_common
|
||||
static std::string get_fragment_interpreter()
|
||||
{
|
||||
const char* s =
|
||||
#include "../Common/Interpreter/FragmentInterpreter.glsl"
|
||||
#include "../Program/GLSLInterpreter/FragmentInterpreter.glsl"
|
||||
;
|
||||
return s;
|
||||
}
|
||||
|
@ -224,40 +224,14 @@ namespace vk
|
||||
kernel_size = _kernel_size? _kernel_size : optimal_kernel_size;
|
||||
|
||||
m_src =
|
||||
"#version 430\n"
|
||||
"layout(local_size_x=%ws, local_size_y=1, local_size_z=1) in;\n"
|
||||
"layout(std430, set=0, binding=0) buffer ssbo{ uint data[]; };\n"
|
||||
"%ub"
|
||||
"\n"
|
||||
"#define KERNEL_SIZE %ks\n"
|
||||
"\n"
|
||||
"// Generic swap routines\n"
|
||||
"#define bswap_u16(bits) (bits & 0xFF) << 8 | (bits & 0xFF00) >> 8 | (bits & 0xFF0000) << 8 | (bits & 0xFF000000) >> 8\n"
|
||||
"#define bswap_u32(bits) (bits & 0xFF) << 24 | (bits & 0xFF00) << 8 | (bits & 0xFF0000) >> 8 | (bits & 0xFF000000) >> 24\n"
|
||||
"#define bswap_u16_u32(bits) (bits & 0xFFFF) << 16 | (bits & 0xFFFF0000) >> 16\n"
|
||||
"\n"
|
||||
"// Depth format conversions\n"
|
||||
"#define d24_to_f32(bits) floatBitsToUint(float(bits) / 16777215.f)\n"
|
||||
"#define f32_to_d24(bits) uint(uintBitsToFloat(bits) * 16777215.f)\n"
|
||||
"#define d24f_to_f32(bits) (bits << 7)\n"
|
||||
"#define f32_to_d24f(bits) (bits >> 7)\n"
|
||||
"#define d24x8_to_f32(bits) d24_to_f32(bits >> 8)\n"
|
||||
"#define d24x8_to_d24x8_swapped(bits) (bits & 0xFF00) | (bits & 0xFF0000) >> 16 | (bits & 0xFF) << 16\n"
|
||||
"#define f32_to_d24x8_swapped(bits) d24x8_to_d24x8_swapped(f32_to_d24(bits))\n"
|
||||
"\n"
|
||||
"%md"
|
||||
"void main()\n"
|
||||
"{\n"
|
||||
" uint invocations_x = (gl_NumWorkGroups.x * gl_WorkGroupSize.x);"
|
||||
" uint invocation_id = (gl_GlobalInvocationID.y * invocations_x) + gl_GlobalInvocationID.x;\n"
|
||||
" uint index = invocation_id * KERNEL_SIZE;\n"
|
||||
" uint value;\n"
|
||||
"%vars"
|
||||
"\n";
|
||||
#include "../Program/GLSLSnippets/ShuffleBytes.glsl"
|
||||
;
|
||||
|
||||
const auto parameters_size = utils::align(push_constants_size, 16) / 16;
|
||||
const std::pair<std::string_view, std::string> syntax_replace[] =
|
||||
{
|
||||
{ "%loc", "0" },
|
||||
{ "%set", "set = 0"},
|
||||
{ "%ws", std::to_string(optimal_group_size) },
|
||||
{ "%ks", std::to_string(kernel_size) },
|
||||
{ "%vars", variables },
|
||||
|
@ -422,131 +422,8 @@ namespace vk
|
||||
create();
|
||||
|
||||
m_src =
|
||||
"#version 450\n"
|
||||
"layout(local_size_x = %ws, local_size_y = 1, local_size_z = 1) in;\n\n"
|
||||
|
||||
"layout(set=0, binding=0, std430) buffer ssbo0{ uint data_in[]; };\n"
|
||||
"layout(set=0, binding=1, std430) buffer ssbo1{ uint data_out[]; };\n"
|
||||
"layout(push_constant) uniform parameters\n"
|
||||
"{\n"
|
||||
" uint image_width;\n"
|
||||
" uint image_height;\n"
|
||||
" uint image_depth;\n"
|
||||
" uint image_logw;\n"
|
||||
" uint image_logh;\n"
|
||||
" uint image_logd;\n"
|
||||
" uint lod_count;\n"
|
||||
"};\n\n"
|
||||
|
||||
"struct invocation_properties\n"
|
||||
"{\n"
|
||||
" uint data_offset;\n"
|
||||
" uvec3 size;\n"
|
||||
" uvec3 size_log2;\n"
|
||||
"};\n\n"
|
||||
|
||||
"#define bswap_u16(bits) (bits & 0xFF) << 8 | (bits & 0xFF00) >> 8 | (bits & 0xFF0000) << 8 | (bits & 0xFF000000) >> 8\n"
|
||||
"#define bswap_u32(bits) (bits & 0xFF) << 24 | (bits & 0xFF00) << 8 | (bits & 0xFF0000) >> 8 | (bits & 0xFF000000) >> 24\n\n"
|
||||
|
||||
"invocation_properties invocation;\n\n"
|
||||
|
||||
"bool init_invocation_properties(const in uint offset)\n"
|
||||
"{\n"
|
||||
" invocation.data_offset = 0;\n"
|
||||
" invocation.size.x = image_width;\n"
|
||||
" invocation.size.y = image_height;\n"
|
||||
" invocation.size.z = image_depth;\n"
|
||||
" invocation.size_log2.x = image_logw;\n"
|
||||
" invocation.size_log2.y = image_logh;\n"
|
||||
" invocation.size_log2.z = image_logd;\n"
|
||||
" uint level_end = image_width * image_height * image_depth;\n"
|
||||
" uint level = 1;\n\n"
|
||||
|
||||
" while (offset >= level_end && level < lod_count)\n"
|
||||
" {\n"
|
||||
" invocation.data_offset = level_end;\n"
|
||||
" invocation.size.xy /= 2;\n"
|
||||
" invocation.size.xy = max(invocation.size.xy, uvec2(1));\n"
|
||||
" invocation.size_log2.xy = max(invocation.size_log2.xy, uvec2(1));\n"
|
||||
" invocation.size_log2.xy --;\n"
|
||||
" level_end += (invocation.size.x * invocation.size.y * image_depth);\n"
|
||||
" level++;"
|
||||
" }\n\n"
|
||||
|
||||
" return (offset < level_end);\n"
|
||||
"}\n\n"
|
||||
|
||||
"uint get_z_index(const in uint x_, const in uint y_, const in uint z_)\n"
|
||||
"{\n"
|
||||
" uint offset = 0;\n"
|
||||
" uint shift = 0;\n"
|
||||
" uint x = x_;\n"
|
||||
" uint y = y_;\n"
|
||||
" uint z = z_;\n"
|
||||
" uint log2w = invocation.size_log2.x;\n"
|
||||
" uint log2h = invocation.size_log2.y;\n"
|
||||
" uint log2d = invocation.size_log2.z;\n"
|
||||
"\n"
|
||||
" do\n"
|
||||
" {\n"
|
||||
" if (log2w > 0)\n"
|
||||
" {\n"
|
||||
" offset |= (x & 1) << shift;\n"
|
||||
" shift++;\n"
|
||||
" x >>= 1;\n"
|
||||
" log2w--;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" if (log2h > 0)\n"
|
||||
" {\n"
|
||||
" offset |= (y & 1) << shift;\n"
|
||||
" shift++;\n"
|
||||
" y >>= 1;\n"
|
||||
" log2h--;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" if (log2d > 0)\n"
|
||||
" {\n"
|
||||
" offset |= (z & 1) << shift;\n"
|
||||
" shift++;\n"
|
||||
" z >>= 1;\n"
|
||||
" log2d--;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
" while(x > 0 || y > 0 || z > 0);\n"
|
||||
"\n"
|
||||
" return offset;\n"
|
||||
"}\n\n"
|
||||
|
||||
"void main()\n"
|
||||
"{\n"
|
||||
" uint invocations_x = (gl_NumWorkGroups.x * gl_WorkGroupSize.x);"
|
||||
" uint texel_id = (gl_GlobalInvocationID.y * invocations_x) + gl_GlobalInvocationID.x;\n"
|
||||
" uint word_count = %_wordcount;\n\n"
|
||||
|
||||
" if (!init_invocation_properties(texel_id))\n"
|
||||
" return;\n\n"
|
||||
|
||||
" // Calculations done in texels, not bytes\n"
|
||||
" uint row_length = invocation.size.x;\n"
|
||||
" uint slice_length = (invocation.size.y * row_length);\n"
|
||||
" uint level_offset = (texel_id - invocation.data_offset);\n"
|
||||
" uint slice_offset = (level_offset % slice_length);\n"
|
||||
" uint z = (level_offset / slice_length);\n"
|
||||
" uint y = (slice_offset / row_length);\n"
|
||||
" uint x = (slice_offset % row_length);\n\n"
|
||||
|
||||
" uint src_texel_id = get_z_index(x, y, z);\n"
|
||||
" uint dst_id = (texel_id * word_count);\n"
|
||||
" uint src_id = (src_texel_id + invocation.data_offset) * word_count;\n\n"
|
||||
|
||||
" for (uint i = 0; i < word_count; ++i)\n"
|
||||
" {\n"
|
||||
" uint value = data_in[src_id++];\n"
|
||||
" data_out[dst_id++] = %f(value);\n"
|
||||
" }\n\n"
|
||||
|
||||
"}\n";
|
||||
#include "../Program/GLSLSnippets/GPUDeswizzle.glsl"
|
||||
;
|
||||
|
||||
std::string transform;
|
||||
if constexpr (_SwapBytes)
|
||||
@ -567,6 +444,9 @@ namespace vk
|
||||
|
||||
const std::pair<std::string_view, std::string> syntax_replace[] =
|
||||
{
|
||||
{ "%loc", "0" },
|
||||
{ "%set", "set = 0" },
|
||||
{ "%push_block", "push_constant" },
|
||||
{ "%ws", std::to_string(optimal_group_size) },
|
||||
{ "%_wordcount", std::to_string(sizeof(_BlockType) / 4) },
|
||||
{ "%f", transform }
|
||||
|
@ -814,8 +814,10 @@
|
||||
</ProjectReference>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="Emu\RSX\Common\Interpreter\FragmentInterpreter.glsl" />
|
||||
<None Include="Emu\RSX\Common\Interpreter\VertexInterpreter.glsl" />
|
||||
<None Include="Emu\RSX\Program\GLSLInterpreter\FragmentInterpreter.glsl" />
|
||||
<None Include="Emu\RSX\Program\GLSLInterpreter\VertexInterpreter.glsl" />
|
||||
<None Include="Emu\RSX\Program\GLSLSnippets\GPUDeswizzle.glsl" />
|
||||
<None Include="Emu\RSX\Program\GLSLSnippets\ShuffleBytes.glsl" />
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
|
@ -64,15 +64,18 @@
|
||||
<Filter Include="Emu\NP">
|
||||
<UniqueIdentifier>{652ce43e-72db-42cd-831a-0e194f67e731}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="Emu\GPU\RSX\Common\Interpreter">
|
||||
<UniqueIdentifier>{bc97b324-1eea-445a-8fa9-6fc49e3df47c}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="Emu\Audio\FAudio">
|
||||
<UniqueIdentifier>{7555ff6f-67a9-4d02-b744-0bf896751edb}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="Emu\GPU\RSX\Program">
|
||||
<UniqueIdentifier>{d055ca32-157a-4d8c-895e-29509858fcb0}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="Emu\GPU\RSX\Program\Snippets">
|
||||
<UniqueIdentifier>{21667779-4136-4de4-8695-9ea13e5c9bce}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="Emu\GPU\RSX\Program\Interpreter">
|
||||
<UniqueIdentifier>{bc97b324-1eea-445a-8fa9-6fc49e3df47c}</UniqueIdentifier>
|
||||
</Filter>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="Crypto\aes.cpp">
|
||||
@ -2130,11 +2133,17 @@
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="Emu\RSX\Common\Interpreter\FragmentInterpreter.glsl">
|
||||
<Filter>Emu\GPU\RSX\Common\Interpreter</Filter>
|
||||
<None Include="Emu\RSX\Program\GLSLSnippets\GPUDeswizzle.glsl">
|
||||
<Filter>Emu\GPU\RSX\Program\Snippets</Filter>
|
||||
</None>
|
||||
<None Include="Emu\RSX\Common\Interpreter\VertexInterpreter.glsl">
|
||||
<Filter>Emu\GPU\RSX\Common\Interpreter</Filter>
|
||||
<None Include="Emu\RSX\Program\GLSLInterpreter\FragmentInterpreter.glsl">
|
||||
<Filter>Emu\GPU\RSX\Program\Interpreter</Filter>
|
||||
</None>
|
||||
<None Include="Emu\RSX\Program\GLSLInterpreter\VertexInterpreter.glsl">
|
||||
<Filter>Emu\GPU\RSX\Program\Interpreter</Filter>
|
||||
</None>
|
||||
<None Include="Emu\RSX\Program\GLSLSnippets\ShuffleBytes.glsl">
|
||||
<Filter>Emu\GPU\RSX\Program\Snippets</Filter>
|
||||
</None>
|
||||
</ItemGroup>
|
||||
</Project>
|
Loading…
x
Reference in New Issue
Block a user