mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-02-06 18:40:36 +00:00
gl: Finalize host labels implementation
This commit is contained in:
parent
0db06964dc
commit
681debd8f6
@ -300,7 +300,7 @@ namespace gl
|
||||
|
||||
m_src = fmt::replace_all(m_src, syntax_replace);
|
||||
|
||||
param_buffer.create(gl::buffer::target::uniform, 32, nullptr, gl::buffer::memory_type::local, GL_DYNAMIC_COPY);
|
||||
param_buffer.create(gl::buffer::target::uniform, 32, nullptr, gl::buffer::memory_type::local, gl::buffer::usage::dynamic_update);
|
||||
}
|
||||
|
||||
~cs_deswizzle_3d()
|
||||
|
@ -19,8 +19,11 @@ namespace gl
|
||||
void* userptr = vm::get_super_ptr(base_address);
|
||||
|
||||
m_data = std::make_unique<gl::buffer>();
|
||||
m_data->create(buffer::target::userptr, block_size, userptr);
|
||||
m_data->create(buffer::target::array, block_size, userptr, buffer::memory_type::userptr, 0);
|
||||
m_base_address = base_address;
|
||||
|
||||
// Some drivers may reject userptr input for whatever reason. Check that the state is still valid.
|
||||
gl::check_state();
|
||||
}
|
||||
|
||||
void* dma_block::map(const utils::address_range& range) const
|
||||
@ -69,8 +72,8 @@ namespace gl
|
||||
|
||||
utils::address_range to_dma_block_range(u32 start, u32 length)
|
||||
{
|
||||
const auto start_block_address = start & ~s_dma_block_size;
|
||||
const auto end_block_address = (start + length - 1) & ~s_dma_block_size;
|
||||
const auto start_block_address = start & -s_dma_block_size;
|
||||
const auto end_block_address = (start + length + s_dma_block_size - 1) & -s_dma_block_size;
|
||||
return utils::address_range::start_end(start_block_address, end_block_address);
|
||||
}
|
||||
|
||||
@ -81,7 +84,7 @@ namespace gl
|
||||
if (!block)
|
||||
{
|
||||
block = std::make_unique<dma_block>();
|
||||
block->allocate(block_range.start, length);
|
||||
block->allocate(block_range.start, block_range.length());
|
||||
return *block;
|
||||
}
|
||||
|
||||
@ -96,6 +99,7 @@ namespace gl
|
||||
const auto search_end = (block_range.end + 1);
|
||||
|
||||
// 1. Resize to new length
|
||||
ensure((new_length & -s_dma_block_size) == new_length);
|
||||
auto new_owner = std::make_unique<dma_block>();
|
||||
new_owner->allocate(owner->base_addr(), new_length);
|
||||
|
||||
|
@ -24,7 +24,7 @@ namespace gl
|
||||
void* map(const utils::address_range& range) const;
|
||||
|
||||
void set_parent(const dma_block* other);
|
||||
const dma_block* head() const { return m_parent; }
|
||||
const dma_block* head() const { return m_parent ? m_parent : this; }
|
||||
bool can_map(const utils::address_range& range) const;
|
||||
|
||||
u32 base_addr() const { return m_base_address; }
|
||||
|
@ -181,18 +181,18 @@ void GLGSRender::on_init_thread()
|
||||
backend_config.supports_normalized_barycentrics = false;
|
||||
}
|
||||
|
||||
if (gl_caps.AMD_pinned_memory)
|
||||
if (gl_caps.AMD_pinned_memory && g_cfg.video.host_label_synchronization)
|
||||
{
|
||||
backend_config.supports_host_gpu_labels = true;
|
||||
|
||||
if (g_cfg.video.host_label_synchronization)
|
||||
{
|
||||
m_host_gpu_context_data = std::make_unique<gl::buffer>();
|
||||
m_host_gpu_context_data->create(gl::buffer::target::array, 4096);
|
||||
m_host_gpu_context_data = std::make_unique<gl::buffer>();
|
||||
m_host_gpu_context_data->create(gl::buffer::target::array, 4096, nullptr, gl::buffer::memory_type::host_visible,
|
||||
gl::buffer::usage::host_read | gl::buffer::usage::host_write | gl::buffer::usage::persistent_map);
|
||||
|
||||
auto host_context_ptr = reinterpret_cast<rsx::host_gpu_context_t*>(m_host_gpu_context_data->map(0, 4096, gl::buffer::access::read));
|
||||
m_host_dma_ctrl = std::make_unique<rsx::RSXDMAWriter>(host_context_ptr);
|
||||
}
|
||||
auto host_context_ptr = reinterpret_cast<rsx::host_gpu_context_t*>(m_host_gpu_context_data->map(0, 4096, gl::buffer::access::persistent_rw));
|
||||
m_host_dma_ctrl = std::make_unique<rsx::RSXDMAWriter>(host_context_ptr);
|
||||
m_enqueued_host_write_buffer = std::make_unique<gl::scratch_ring_buffer>();
|
||||
m_enqueued_host_write_buffer->create(gl::buffer::target::array, 64 * 0x100000, gl::buffer::usage::dynamic_update);
|
||||
}
|
||||
|
||||
// Use industry standard resource alignment values as defaults
|
||||
@ -425,6 +425,7 @@ void GLGSRender::on_exit()
|
||||
|
||||
m_host_dma_ctrl.reset();
|
||||
m_host_gpu_context_data.reset();
|
||||
m_enqueued_host_write_buffer.reset();
|
||||
|
||||
for (auto &fbo : m_framebuffer_cache)
|
||||
{
|
||||
@ -1222,6 +1223,66 @@ void GLGSRender::notify_tile_unbound(u32 tile)
|
||||
}
|
||||
}
|
||||
|
||||
bool GLGSRender::release_GCM_label(u32 address, u32 args)
|
||||
{
|
||||
if (!backend_config.supports_host_gpu_labels)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
auto host_ctx = ensure(m_host_dma_ctrl->host_ctx());
|
||||
|
||||
if (host_ctx->texture_loads_completed())
|
||||
{
|
||||
// We're about to poll waiting for GPU state, ensure the context is still valid.
|
||||
gl::check_state();
|
||||
|
||||
// All texture loads already seen by the host GPU
|
||||
// Wait for all previously submitted labels to be flushed
|
||||
m_host_dma_ctrl->drain_label_queue();
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto mapping = gl::map_dma(address, 4);
|
||||
const auto write_data = std::bit_cast<u32, be_t<u32>>(args);
|
||||
const auto release_event_id = host_ctx->on_label_acquire();
|
||||
|
||||
// We don't have async texture loads yet, so just release both the label and the commands complete
|
||||
u64 write_buf[2] = { write_data, release_event_id };
|
||||
const auto host_read_offset = m_enqueued_host_write_buffer->alloc(16, 16);
|
||||
m_enqueued_host_write_buffer->get().sub_data(host_read_offset, 16, write_buf);
|
||||
|
||||
// Now write to DMA and then to host context
|
||||
m_enqueued_host_write_buffer->get().copy_to(mapping.second, host_read_offset, mapping.first, 4);
|
||||
m_enqueued_host_write_buffer->get().copy_to(m_host_gpu_context_data.get(), host_read_offset + 8, ::offset32(&rsx::host_gpu_context_t::commands_complete_event), 8);
|
||||
m_enqueued_host_write_buffer->push_barrier(host_read_offset, 16);
|
||||
|
||||
host_ctx->on_label_release();
|
||||
return true;
|
||||
}
|
||||
|
||||
void GLGSRender::enqueue_host_context_write(u32 offset, u32 size, const void* data)
|
||||
{
|
||||
ensure(size <= 8);
|
||||
const u32 host_read_offset = m_enqueued_host_write_buffer->alloc(8, 16);
|
||||
m_enqueued_host_write_buffer->get().sub_data(host_read_offset, size, data);
|
||||
m_enqueued_host_write_buffer->get().copy_to(m_host_gpu_context_data.get(), host_read_offset, offset, size);
|
||||
m_enqueued_host_write_buffer->push_barrier(host_read_offset, 16);
|
||||
}
|
||||
|
||||
void GLGSRender::on_guest_texture_read()
|
||||
{
|
||||
if (!backend_config.supports_host_gpu_labels)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// Tag the read as being in progress
|
||||
u64 event_id = m_host_dma_ctrl->host_ctx()->inc_counter();
|
||||
m_host_dma_ctrl->host_ctx()->texture_load_request_event = event_id;
|
||||
enqueue_host_context_write(::offset32(&rsx::host_gpu_context_t::texture_load_complete_event), 8, &event_id);
|
||||
}
|
||||
|
||||
void GLGSRender::begin_occlusion_query(rsx::reports::occlusion_query_info* query)
|
||||
{
|
||||
query->result = 0;
|
||||
|
@ -152,6 +152,7 @@ class GLGSRender : public GSRender, public ::rsx::reports::ZCULL_control
|
||||
|
||||
// Host context for GPU-driven work
|
||||
std::unique_ptr<gl::buffer> m_host_gpu_context_data;
|
||||
std::unique_ptr<gl::scratch_ring_buffer> m_enqueued_host_write_buffer;
|
||||
|
||||
public:
|
||||
u64 get_cycles() final;
|
||||
@ -196,6 +197,11 @@ public:
|
||||
void get_occlusion_query_result(rsx::reports::occlusion_query_info* query) override;
|
||||
void discard_occlusion_query(rsx::reports::occlusion_query_info* query) override;
|
||||
|
||||
// DMA
|
||||
bool release_GCM_label(u32 address, u32 data) override;
|
||||
void enqueue_host_context_write(u32 offset, u32 size, const void* data);
|
||||
void on_guest_texture_read();
|
||||
|
||||
// GRAPH backend
|
||||
void patch_transform_constants(rsx::context* ctx, u32 index, u32 count) override;
|
||||
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include "GLCompute.h"
|
||||
#include "GLRenderTargets.h"
|
||||
#include "GLOverlays.h"
|
||||
#include "GLGSRender.h"
|
||||
|
||||
#include "glutils/blitter.h"
|
||||
#include "glutils/ring_buffer.h"
|
||||
@ -285,7 +286,7 @@ namespace gl
|
||||
if (!(*dst) || max_mem > static_cast<u64>(dst->size()))
|
||||
{
|
||||
if (*dst) dst->remove();
|
||||
dst->create(buffer::target::ssbo, max_mem, nullptr, buffer::memory_type::local, GL_STATIC_COPY);
|
||||
dst->create(buffer::target::ssbo, max_mem, nullptr, buffer::memory_type::local, 0);
|
||||
}
|
||||
|
||||
if (auto as_vi = dynamic_cast<const gl::viewable_image*>(src);
|
||||
@ -400,7 +401,7 @@ namespace gl
|
||||
return;
|
||||
}
|
||||
|
||||
scratch_mem.create(buffer::target::pixel_pack, max_mem, nullptr, buffer::memory_type::local, GL_STATIC_COPY);
|
||||
scratch_mem.create(buffer::target::pixel_pack, max_mem, nullptr, buffer::memory_type::local, 0);
|
||||
|
||||
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
|
||||
src->copy_to(&scratch_mem, in_offset, 0, mem_info->image_size_in_bytes);
|
||||
@ -835,6 +836,10 @@ namespace gl
|
||||
const GLenum gl_format = std::get<0>(format_type);
|
||||
const GLenum gl_type = std::get<1>(format_type);
|
||||
fill_texture(cmd, dst, gcm_format, subresources_layout, is_swizzled, gl_format, gl_type, data_upload_buf);
|
||||
|
||||
// Notify the renderer of the upload
|
||||
auto renderer = static_cast<GLGSRender*>(rsx::get_current_renderer());
|
||||
renderer->on_guest_texture_read();
|
||||
}
|
||||
|
||||
u32 get_format_texel_width(GLenum format)
|
||||
|
@ -59,7 +59,7 @@ namespace gl
|
||||
pbo.remove();
|
||||
}
|
||||
|
||||
pbo.create(buffer::target::pixel_pack, buffer_size, nullptr, buffer::memory_type::host_visible, GL_STREAM_READ);
|
||||
pbo.create(buffer::target::pixel_pack, buffer_size, nullptr, buffer::memory_type::host_visible, buffer::usage::host_read);
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE);
|
||||
}
|
||||
|
||||
|
@ -3,38 +3,35 @@
|
||||
|
||||
namespace gl
|
||||
{
|
||||
void buffer::allocate(GLsizeiptr size, const void* data_, memory_type type, GLenum usage)
|
||||
void buffer::allocate(GLsizeiptr size, const void* data_, memory_type type, GLuint usage_flags)
|
||||
{
|
||||
m_memory_type = type;
|
||||
|
||||
if (const auto& caps = get_driver_caps();
|
||||
m_target != target::userptr && caps.ARB_buffer_storage_supported)
|
||||
type != memory_type::userptr && caps.ARB_buffer_storage_supported)
|
||||
{
|
||||
GLenum flags = 0;
|
||||
if (type == memory_type::host_visible)
|
||||
if (usage_flags & usage::host_write)
|
||||
{
|
||||
switch (usage)
|
||||
{
|
||||
case GL_STREAM_DRAW:
|
||||
case GL_STATIC_DRAW:
|
||||
case GL_DYNAMIC_DRAW:
|
||||
flags |= GL_MAP_WRITE_BIT;
|
||||
break;
|
||||
case GL_STREAM_READ:
|
||||
case GL_STATIC_READ:
|
||||
case GL_DYNAMIC_READ:
|
||||
flags |= GL_MAP_READ_BIT;
|
||||
break;
|
||||
default:
|
||||
fmt::throw_exception("Unsupported buffer usage 0x%x", usage);
|
||||
}
|
||||
flags |= GL_MAP_WRITE_BIT;
|
||||
}
|
||||
else
|
||||
if (usage_flags & usage::host_read)
|
||||
{
|
||||
// Local memory hints
|
||||
if (usage == GL_DYNAMIC_COPY)
|
||||
{
|
||||
flags |= GL_DYNAMIC_STORAGE_BIT;
|
||||
}
|
||||
flags |= GL_MAP_READ_BIT;
|
||||
}
|
||||
if (usage_flags & usage::persistent_map)
|
||||
{
|
||||
flags |= GL_MAP_PERSISTENT_BIT;
|
||||
}
|
||||
if (usage_flags & usage::dynamic_update)
|
||||
{
|
||||
flags |= GL_DYNAMIC_STORAGE_BIT;
|
||||
}
|
||||
|
||||
ensure((flags & (GL_MAP_PERSISTENT_BIT | GL_DYNAMIC_STORAGE_BIT)) != (GL_MAP_PERSISTENT_BIT | GL_DYNAMIC_STORAGE_BIT),
|
||||
"Mutually exclusive usage flags set!");
|
||||
|
||||
ensure(type == memory_type::local || flags != 0, "Host-visible memory must have usage flags set!");
|
||||
|
||||
if ((flags & GL_MAP_READ_BIT) && !caps.vendor_AMD)
|
||||
{
|
||||
@ -51,10 +48,8 @@ namespace gl
|
||||
}
|
||||
else
|
||||
{
|
||||
data(size, data_, usage);
|
||||
data(size, data_, GL_STREAM_COPY);
|
||||
}
|
||||
|
||||
m_memory_type = type;
|
||||
}
|
||||
|
||||
buffer::~buffer()
|
||||
@ -89,18 +84,18 @@ namespace gl
|
||||
save_binding_state save(current_target(), *this);
|
||||
}
|
||||
|
||||
void buffer::create(GLsizeiptr size, const void* data_, memory_type type, GLenum usage)
|
||||
void buffer::create(GLsizeiptr size, const void* data_, memory_type type, GLuint usage_bits)
|
||||
{
|
||||
create();
|
||||
allocate(size, data_, type, usage);
|
||||
allocate(size, data_, type, usage_bits);
|
||||
}
|
||||
|
||||
void buffer::create(target target_, GLsizeiptr size, const void* data_, memory_type type, GLenum usage)
|
||||
void buffer::create(target target_, GLsizeiptr size, const void* data_, memory_type type, GLuint usage_bits)
|
||||
{
|
||||
m_target = target_;
|
||||
|
||||
create();
|
||||
allocate(size, data_, type, usage);
|
||||
allocate(size, data_, type, usage_bits);
|
||||
}
|
||||
|
||||
void buffer::remove()
|
||||
@ -117,11 +112,19 @@ namespace gl
|
||||
{
|
||||
ensure(m_memory_type != memory_type::local);
|
||||
|
||||
DSA_CALL2(NamedBufferData, m_id, size, data_, usage);
|
||||
m_size = size;
|
||||
|
||||
if (m_memory_type == memory_type::userptr)
|
||||
{
|
||||
glBindBuffer(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, m_id);
|
||||
glBufferData(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, size, data_, usage);
|
||||
return;
|
||||
}
|
||||
|
||||
DSA_CALL2(NamedBufferData, m_id, size, data_, usage);
|
||||
}
|
||||
|
||||
void buffer::sub_data(GLsizeiptr offset, GLsizeiptr length, GLvoid* data)
|
||||
void buffer::sub_data(GLsizeiptr offset, GLsizeiptr length, const GLvoid* data)
|
||||
{
|
||||
ensure(m_memory_type == memory_type::local);
|
||||
DSA_CALL2(NamedBufferSubData, m_id, offset, length, data);
|
||||
|
@ -15,28 +15,37 @@ namespace gl
|
||||
element_array = GL_ELEMENT_ARRAY_BUFFER,
|
||||
uniform = GL_UNIFORM_BUFFER,
|
||||
texture = GL_TEXTURE_BUFFER,
|
||||
ssbo = GL_SHADER_STORAGE_BUFFER,
|
||||
userptr = GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD
|
||||
ssbo = GL_SHADER_STORAGE_BUFFER
|
||||
};
|
||||
|
||||
enum class access
|
||||
{
|
||||
read = GL_MAP_READ_BIT,
|
||||
write = GL_MAP_WRITE_BIT,
|
||||
read_write = GL_MAP_READ_BIT | GL_MAP_WRITE_BIT
|
||||
rw = GL_MAP_READ_BIT | GL_MAP_WRITE_BIT,
|
||||
persistent_rw = GL_MAP_READ_BIT | GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT
|
||||
};
|
||||
|
||||
enum class memory_type
|
||||
{
|
||||
undefined = 0,
|
||||
local = 1,
|
||||
host_visible = 2
|
||||
host_visible = 2,
|
||||
userptr = 4
|
||||
};
|
||||
|
||||
enum usage
|
||||
{
|
||||
host_write = (1 << 0),
|
||||
host_read = (1 << 1),
|
||||
persistent_map = (1 << 2),
|
||||
dynamic_update = (1 << 3),
|
||||
};
|
||||
|
||||
class save_binding_state
|
||||
{
|
||||
GLint m_last_binding;
|
||||
GLenum m_target;
|
||||
GLint m_last_binding = GL_ZERO;
|
||||
GLenum m_target = GL_NONE;
|
||||
|
||||
public:
|
||||
save_binding_state(target target_, const buffer& new_state) : save_binding_state(target_)
|
||||
@ -65,6 +74,11 @@ namespace gl
|
||||
|
||||
~save_binding_state()
|
||||
{
|
||||
if (!m_target)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
glBindBuffer(m_target, m_last_binding);
|
||||
}
|
||||
};
|
||||
@ -78,7 +92,7 @@ namespace gl
|
||||
// Metadata
|
||||
mutable std::pair<u32, u32> m_bound_range{};
|
||||
|
||||
void allocate(GLsizeiptr size, const void* data_, memory_type type, GLenum usage);
|
||||
void allocate(GLsizeiptr size, const void* data_, memory_type type, GLuint usage_bits);
|
||||
|
||||
public:
|
||||
buffer() = default;
|
||||
@ -89,8 +103,8 @@ namespace gl
|
||||
void recreate(GLsizeiptr size, const void* data = nullptr);
|
||||
|
||||
void create();
|
||||
void create(GLsizeiptr size, const void* data_ = nullptr, memory_type type = memory_type::local, GLenum usage = GL_STREAM_DRAW);
|
||||
void create(target target_, GLsizeiptr size, const void* data_ = nullptr, memory_type type = memory_type::local, GLenum usage = GL_STREAM_DRAW);
|
||||
void create(GLsizeiptr size, const void* data_ = nullptr, memory_type type = memory_type::local, GLuint usage_bits = 0);
|
||||
void create(target target_, GLsizeiptr size, const void* data_ = nullptr, memory_type type = memory_type::local, GLuint usage_bits = 0);
|
||||
|
||||
void remove();
|
||||
|
||||
@ -98,7 +112,7 @@ namespace gl
|
||||
void bind() const { bind(current_target()); }
|
||||
|
||||
void data(GLsizeiptr size, const void* data_ = nullptr, GLenum usage = GL_STREAM_DRAW);
|
||||
void sub_data(GLsizeiptr offset, GLsizeiptr length, GLvoid* data);
|
||||
void sub_data(GLsizeiptr offset, GLsizeiptr length, const GLvoid* data);
|
||||
|
||||
GLubyte* map(GLsizeiptr offset, GLsizeiptr length, access access_);
|
||||
void unmap();
|
||||
|
@ -79,4 +79,12 @@ namespace gl
|
||||
{
|
||||
glInsertEventMarkerEXT(static_cast<GLsizei>(strlen(label)), label);
|
||||
}
|
||||
|
||||
// Checks if GL state is still valid
|
||||
void check_state()
|
||||
{
|
||||
// GL_OUT_OF_MEMORY invalidates the OpenGL context and is actually the GL version of DEVICE_LOST.
|
||||
// This spec workaround allows it to be abused by ISVs to indicate a broken GL context.
|
||||
ensure(glGetError() != GL_OUT_OF_MEMORY);
|
||||
}
|
||||
}
|
||||
|
@ -242,14 +242,14 @@ namespace gl
|
||||
}
|
||||
}
|
||||
|
||||
void scratch_ring_buffer::create(buffer::target target_, u64 size)
|
||||
void scratch_ring_buffer::create(buffer::target target_, u64 size, u32 usage_flags)
|
||||
{
|
||||
if (m_storage)
|
||||
{
|
||||
remove();
|
||||
}
|
||||
|
||||
m_storage.create(target_, size, nullptr, gl::buffer::memory_type::local, GL_STATIC_COPY);
|
||||
m_storage.create(target_, size, nullptr, gl::buffer::memory_type::local, usage_flags);
|
||||
}
|
||||
|
||||
void scratch_ring_buffer::remove()
|
||||
|
@ -103,7 +103,7 @@ namespace gl
|
||||
scratch_ring_buffer(const scratch_ring_buffer&) = delete;
|
||||
~scratch_ring_buffer();
|
||||
|
||||
void create(buffer::target _target, u64 size);
|
||||
void create(buffer::target _target, u64 size, u32 usage_flags = 0);
|
||||
void remove();
|
||||
|
||||
u32 alloc(u32 size, u32 alignment);
|
||||
|
@ -80,7 +80,7 @@ namespace gl
|
||||
if (!m_ubo)
|
||||
{
|
||||
ensure(compiled);
|
||||
m_ubo.create(gl::buffer::target::uniform, push_buffer_size, nullptr, gl::buffer::memory_type::local, GL_DYNAMIC_COPY);
|
||||
m_ubo.create(gl::buffer::target::uniform, push_buffer_size, nullptr, gl::buffer::memory_type::local, gl::buffer::usage::dynamic_update);
|
||||
|
||||
// Statically bind the image sources
|
||||
m_program.uniforms["InputTexture"] = GL_TEMP_IMAGE_SLOT(0);
|
||||
|
@ -27,7 +27,7 @@ namespace rsx
|
||||
|
||||
inline bool in_flight_commands_completed() const volatile
|
||||
{
|
||||
return last_label_release2_event == commands_complete_event;
|
||||
return last_label_release2_event <= commands_complete_event;
|
||||
}
|
||||
|
||||
inline bool texture_loads_completed() const volatile
|
||||
|
Loading…
x
Reference in New Issue
Block a user