From 2c803dbe66cf0c94f835e918a5ea1c9ec56c39cc Mon Sep 17 00:00:00 2001 From: kd-11 Date: Tue, 18 Oct 2016 10:57:28 +0300 Subject: [PATCH] gl/vk: Bug fixes and improvements (#2206) * gl: Only bind attrib textures on thread startup * gl: Persistent mapped buffers * gl: Fix emulated primitives in an inlined array * gl: Do not re-update program information every draw call * gl/vk: s1 type is signed normalized not unsigned normalized * gl/rsx: Allow disabling of persistent buffers for debugging gl: Large heap size is more practical gl: Fix a bug with legacy opengl buffers * gl/rsx: Allow emulation of unsupported attribute formats * gl: Fix typos and remove dprints gl: cleanup debug prints * ui: Move the GL legacy buffer toggle to the left pane * vk/gl: Fix cmp type, its range is [-1,1] not [0,1] SNORM_INT --- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 131 ++++++++++++-- rpcs3/Emu/RSX/GL/GLGSRender.h | 12 +- rpcs3/Emu/RSX/GL/GLHelpers.h | 248 ++++++++++++++++++++------- rpcs3/Emu/RSX/GL/GLProcTable.h | 8 + rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp | 70 +++----- rpcs3/Emu/RSX/GL/GLVertexProgram.cpp | 23 ++- rpcs3/Emu/RSX/GL/GLVertexProgram.h | 9 + rpcs3/Emu/RSX/RSXThread.cpp | 5 +- rpcs3/Emu/RSX/RSXVertexProgram.h | 4 +- rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp | 8 +- rpcs3/Gui/SettingsDialog.cpp | 3 + 11 files changed, 378 insertions(+), 143 deletions(-) diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 0f7dde10b6..e3f181e8a4 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -2,12 +2,14 @@ #include "Utilities/Config.h" #include "Emu/Memory/Memory.h" #include "GLGSRender.h" +#include "GLVertexProgram.h" #include "../rsx_methods.h" #include "../Common/BufferUtils.h" #include "../rsx_utils.h" extern cfg::bool_entry g_cfg_rsx_debug_output; extern cfg::bool_entry g_cfg_rsx_overlay; +extern cfg::bool_entry g_cfg_rsx_gl_legacy_buffers; #define DUMP_VERTEX_DATA 0 @@ -380,8 +382,18 @@ void GLGSRender::end() return; } + if (manually_flush_ring_buffers) + { + //Use approximations to reseve space. This path is mostly for debug purposes anyway + u32 approx_vertex_count = rsx::method_registers.current_draw_clause.get_elements_count(); + u32 approx_working_buffer_size = approx_vertex_count * 256; + + //Allocate 256K heap if we have no approximation at this time (inlined array) + m_attrib_ring_buffer->reserve_storage_on_heap(std::max(approx_working_buffer_size, 256 * 1024U)); + m_index_ring_buffer->reserve_storage_on_heap(16 * 1024); + } + draw_fbo.bind(); - m_program->use(); //Check if depth buffer is bound and valid //If ds is not initialized clear it; it seems new depth textures should have depth cleared @@ -452,6 +464,12 @@ void GLGSRender::end() m_program->validate(); } + if (manually_flush_ring_buffers) + { + m_attrib_ring_buffer->unmap(); + m_index_ring_buffer->unmap(); + } + if (indexed_draw_info) { if (__glcheck enable(rsx::method_registers.restart_index_enabled(), GL_PRIMITIVE_RESTART)) @@ -507,17 +525,39 @@ void GLGSRender::on_init_thread() glGetIntegerv(GL_TEXTURE_BUFFER_OFFSET_ALIGNMENT, &m_min_texbuffer_alignment); m_vao.create(); - for (gl::texture &tex : m_gl_attrib_buffers) + const u32 texture_index_offset = + rsx::limits::fragment_textures_count + rsx::limits::vertex_textures_count; + for (int index = 0; index < rsx::limits::vertex_count; ++index) { + auto &tex = m_gl_attrib_buffers[index]; tex.create(); tex.set_target(gl::texture::target::textureBuffer); + + glActiveTexture(GL_TEXTURE0 + texture_index_offset + index); + tex.bind(); } - m_attrib_ring_buffer.create(gl::buffer::target::texture, 16 * 0x100000); - m_uniform_ring_buffer.create(gl::buffer::target::uniform, 16 * 0x100000); - m_index_ring_buffer.create(gl::buffer::target::element_array, 0x100000); + if (g_cfg_rsx_gl_legacy_buffers) + { + LOG_WARNING(RSX, "Using legacy openGL buffers."); + manually_flush_ring_buffers = true; - m_vao.element_array_buffer = m_index_ring_buffer; + m_attrib_ring_buffer.reset(new gl::legacy_ring_buffer()); + m_uniform_ring_buffer.reset(new gl::legacy_ring_buffer()); + m_index_ring_buffer.reset(new gl::legacy_ring_buffer()); + } + else + { + m_attrib_ring_buffer.reset(new gl::ring_buffer()); + m_uniform_ring_buffer.reset(new gl::ring_buffer()); + m_index_ring_buffer.reset(new gl::ring_buffer()); + } + + m_attrib_ring_buffer->create(gl::buffer::target::texture, 256 * 0x100000); + m_uniform_ring_buffer->create(gl::buffer::target::uniform, 64 * 0x100000); + m_index_ring_buffer->create(gl::buffer::target::element_array, 16 * 0x100000); + + m_vao.element_array_buffer = *m_index_ring_buffer; m_gl_texture_cache.initialize_rtt_cache(); m_text_printer.init(); } @@ -553,9 +593,9 @@ void GLGSRender::on_exit() tex.remove(); } - m_attrib_ring_buffer.remove(); - m_uniform_ring_buffer.remove(); - m_index_ring_buffer.remove(); + m_attrib_ring_buffer->remove(); + m_uniform_ring_buffer->remove(); + m_index_ring_buffer->remove(); m_text_printer.close(); @@ -656,6 +696,18 @@ bool GLGSRender::load_program() RSXVertexProgram vertex_program = get_current_vertex_program(); RSXFragmentProgram fragment_program = get_current_fragment_program(); + for (auto &vtx : vertex_program.rsx_vertex_inputs) + { + auto &array_info = rsx::method_registers.vertex_arrays_info[vtx.location]; + if (array_info.type() == rsx::vertex_base_type::s1 || + array_info.type() == rsx::vertex_base_type::cmp) + { + //Some vendors do not support GL_x_SNORM buffer textures + verify(HERE), vtx.flags == 0; + vtx.flags |= GL_VP_FORCE_ATTRIB_SCALING | GL_VP_ATTRIB_S16_INT; + } + } + for (int i = 0; i < 16; ++i) { auto &tex = rsx::method_registers.fragment_textures[i]; @@ -677,13 +729,55 @@ bool GLGSRender::load_program() } } + auto old_program = m_program; m_program = &m_prog_buffer.getGraphicPipelineState(vertex_program, fragment_program, nullptr); m_program->use(); + if (old_program == m_program && !m_transform_constants_dirty) + { + //This path is taken alot so the savings are tangible + struct scale_offset_layout + { + u16 clip_w, clip_h; + float scale_x, offset_x, scale_y, offset_y, scale_z, offset_z; + float fog0, fog1; + u32 alpha_tested; + float alpha_ref; + } + tmp = {}; + + tmp.clip_w = rsx::method_registers.surface_clip_width(); + tmp.clip_h = rsx::method_registers.surface_clip_height(); + tmp.scale_x = rsx::method_registers.viewport_scale_x(); + tmp.offset_x = rsx::method_registers.viewport_offset_x(); + tmp.scale_y = rsx::method_registers.viewport_scale_y(); + tmp.offset_y = rsx::method_registers.viewport_offset_y(); + tmp.scale_z = rsx::method_registers.viewport_scale_z(); + tmp.offset_z = rsx::method_registers.viewport_offset_z(); + tmp.fog0 = rsx::method_registers.fog_params_0(); + tmp.fog1 = rsx::method_registers.fog_params_1(); + tmp.alpha_tested = rsx::method_registers.alpha_test_enabled(); + tmp.alpha_ref = rsx::method_registers.alpha_ref(); + + size_t old_hash = m_transform_buffer_hash; + m_transform_buffer_hash = 0; + + u8 *data = reinterpret_cast(&tmp); + for (int i = 0; i < sizeof(tmp); ++i) + m_transform_buffer_hash ^= std::hash()(data[i]); + + if (old_hash == m_transform_buffer_hash) + return true; + } + + m_transform_constants_dirty = false; + u32 fragment_constants_size = m_prog_buffer.get_fragment_constants_buffer_size(fragment_program); fragment_constants_size = std::max(32U, fragment_constants_size); u32 max_buffer_sz = 512 + 8192 + align(fragment_constants_size, m_uniform_buffer_offset_align); - m_uniform_ring_buffer.reserve_and_map(max_buffer_sz); + + if (manually_flush_ring_buffers) + m_uniform_ring_buffer->reserve_storage_on_heap(align(max_buffer_sz, 512)); u8 *buf; u32 scale_offset_offset; @@ -691,7 +785,7 @@ bool GLGSRender::load_program() u32 fragment_constants_offset; // Scale offset - auto mapping = m_uniform_ring_buffer.alloc_from_reserve(512); + auto mapping = m_uniform_ring_buffer->alloc_from_heap(512, m_uniform_buffer_offset_align); buf = static_cast(mapping.first); scale_offset_offset = mapping.second; fill_scale_offset_data(buf, false); @@ -707,7 +801,7 @@ bool GLGSRender::load_program() memcpy(buf + 19 * sizeof(float), &alpha_ref, sizeof(float)); // Vertex constants - mapping = m_uniform_ring_buffer.alloc_from_reserve(8192); + mapping = m_uniform_ring_buffer->alloc_from_heap(8192, m_uniform_buffer_offset_align); buf = static_cast(mapping.first); vertex_constants_offset = mapping.second; fill_vertex_program_constants_data(buf); @@ -715,21 +809,22 @@ bool GLGSRender::load_program() // Fragment constants if (fragment_constants_size) { - mapping = m_uniform_ring_buffer.alloc_from_reserve(fragment_constants_size); + mapping = m_uniform_ring_buffer->alloc_from_heap(fragment_constants_size, m_uniform_buffer_offset_align); buf = static_cast(mapping.first); fragment_constants_offset = mapping.second; m_prog_buffer.fill_fragment_constants_buffer({ reinterpret_cast(buf), gsl::narrow(fragment_constants_size) }, fragment_program); } - m_uniform_ring_buffer.unmap(); - - m_uniform_ring_buffer.bind_range(0, scale_offset_offset, 512); - m_uniform_ring_buffer.bind_range(1, vertex_constants_offset, 8192); + m_uniform_ring_buffer->bind_range(0, scale_offset_offset, 512); + m_uniform_ring_buffer->bind_range(1, vertex_constants_offset, 8192); if (fragment_constants_size) { - m_uniform_ring_buffer.bind_range(2, fragment_constants_offset, fragment_constants_size); + m_uniform_ring_buffer->bind_range(2, fragment_constants_offset, fragment_constants_size); } + if (manually_flush_ring_buffers) + m_uniform_ring_buffer->unmap(); + return true; } diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index 7b11e21efe..ce25905c53 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -26,19 +26,25 @@ private: gl::gl_texture_cache m_gl_texture_cache; gl::texture m_gl_attrib_buffers[rsx::limits::vertex_count]; - gl::ring_buffer m_attrib_ring_buffer; - gl::ring_buffer m_uniform_ring_buffer; - gl::ring_buffer m_index_ring_buffer; + + std::unique_ptr m_attrib_ring_buffer; + std::unique_ptr m_uniform_ring_buffer; + std::unique_ptr m_index_ring_buffer; u32 m_draw_calls = 0; u32 m_begin_time = 0; u32 m_draw_time = 0; u32 m_vertex_upload_time = 0; u32 m_textures_upload_time = 0; + + //Compare to see if transform matrix have changed + size_t m_transform_buffer_hash = 0; GLint m_min_texbuffer_alignment = 256; GLint m_uniform_buffer_offset_align = 256; + bool manually_flush_ring_buffers = false; + gl::text_writer m_text_printer; public: diff --git a/rpcs3/Emu/RSX/GL/GLHelpers.h b/rpcs3/Emu/RSX/GL/GLHelpers.h index 756df2adff..c50b22a7e8 100644 --- a/rpcs3/Emu/RSX/GL/GLHelpers.h +++ b/rpcs3/Emu/RSX/GL/GLHelpers.h @@ -6,6 +6,7 @@ #include #include #include +#include #include "OpenGL.h" #include "../GCM.h" @@ -383,7 +384,7 @@ namespace gl read_write = GL_READ_WRITE }; - private: + protected: GLuint m_id = GL_NONE; GLsizeiptr m_size = 0; target m_target = target::array; @@ -587,89 +588,210 @@ namespace gl class ring_buffer : public buffer { - u32 m_data_loc = 0; + protected: - u32 m_mapped_block_size = 0; - u32 m_mapped_block_offset; - u32 m_mapped_reserve_offset; - u32 m_mapped_bytes_available; - void *m_mapped_base = nullptr; + u32 m_data_loc = 0; + u32 m_limit = 0; + void *m_memory_mapping = nullptr; + + GLsync m_fence = nullptr; + + void wait_for_sync() + { + verify(HERE), m_fence != nullptr; + + bool done = false; + while (!done) + { + //Check if we are finished, wait time = 1us + GLenum err = glClientWaitSync(m_fence, GL_SYNC_FLUSH_COMMANDS_BIT, 1000); + switch (err) + { + default: + LOG_ERROR(RSX, "err Returned 0x%X", err); + case GL_ALREADY_SIGNALED: + case GL_CONDITION_SATISFIED: + done = true; + break; + case GL_TIMEOUT_EXPIRED: + continue; + } + } + + glDeleteSync(m_fence); + m_fence = nullptr; + } public: - std::pair alloc_and_map(u32 alloc_size) + + virtual void recreate(GLsizeiptr size, const void* data = nullptr) { - alloc_size = align(alloc_size, 0x100); - - buffer::bind(); - u32 limit = m_data_loc + alloc_size; - if (limit > buffer::size()) + if (m_id) { - if (alloc_size > buffer::size()) - { - buffer::data(alloc_size); - } - - m_data_loc = 0; + wait_for_sync(); + remove(); } + + buffer::create(); - void *ptr = glMapBufferRange((GLenum)buffer::current_target(), m_data_loc, alloc_size, - GL_MAP_WRITE_BIT|GL_MAP_INVALIDATE_RANGE_BIT|GL_MAP_UNSYNCHRONIZED_BIT); + glBindBuffer((GLenum)m_target, m_id); + glBufferStorage((GLenum)m_target, size, data, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT); + m_memory_mapping = glMapBufferRange((GLenum)m_target, 0, size, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT); + + verify(HERE), m_memory_mapping != nullptr; + m_data_loc = 0; + m_limit = size; + } + + void create(target target_, GLsizeiptr size, const void* data_ = nullptr) + { + m_target = target_; + recreate(size, data_); + } + + virtual std::pair alloc_from_heap(u32 alloc_size, u16 alignment) + { u32 offset = m_data_loc; - m_data_loc += alloc_size; - return std::make_pair(ptr, offset); - } + if (m_data_loc) offset = align(offset, alignment); - void unmap() - { - buffer::unmap(); - m_mapped_block_size = 0; - m_mapped_base = 0; - } - - void reserve_and_map(u32 max_size) - { - max_size = align(max_size, 0x1000); - auto mapping = alloc_and_map(max_size); - m_mapped_base = mapping.first; - m_mapped_block_offset = mapping.second; - m_mapped_reserve_offset = 0; - m_mapped_bytes_available = max_size; - } - - std::pair alloc_from_reserve(u32 size, u32 alignment = 16) - { - size = align(size, alignment); - - if (m_mapped_bytes_available < size || !m_mapped_base) + if ((offset + alloc_size) > m_limit) { - if (m_mapped_base) - { - //This doesn't really work for some reason, probably since the caller should bind the target - //before making this call as the block may be reallocated - LOG_ERROR(RSX, "reserved allocation exceeded. check for corruption!"); - unmap(); - } - - reserve_and_map((size > 4096) ? size : 4096); + //TODO: Measure the stall here + wait_for_sync(); + m_data_loc = 0; + offset = 0; } - verify(HERE), m_mapped_bytes_available >= size; + if (!m_data_loc) + { + verify(HERE), m_fence == nullptr; + m_fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + } - void *ptr = (char*)m_mapped_base + m_mapped_reserve_offset; - u32 offset = m_mapped_reserve_offset + m_mapped_block_offset; - m_mapped_reserve_offset += size; - m_mapped_bytes_available -= size; - - verify(HERE), (offset & (alignment - 1)) == 0; - return std::make_pair(ptr, offset); + //Align data loc to 256; allows some "guard" region so we dont trample our own data inadvertently + m_data_loc = align(offset + alloc_size, 256); + return std::make_pair(((char*)m_memory_mapping) + offset, offset); } + virtual void remove() + { + if (m_memory_mapping) + { + glBindBuffer((GLenum)m_target, m_id); + glUnmapBuffer((GLenum)m_target); + + m_memory_mapping = nullptr; + m_data_loc = 0; + m_limit = 0; + } + + glDeleteBuffers(1, &m_id); + m_id = 0; + } + + virtual void reserve_storage_on_heap(u32 alloc_size) {} + + virtual void unmap() {} + void bind_range(u32 index, u32 offset, u32 size) const { glBindBufferRange((GLenum)current_target(), index, id(), offset, size); } }; + class legacy_ring_buffer : public ring_buffer + { + u32 m_mapped_bytes = 0; + u32 m_mapping_offset = 0; + + public: + + void recreate(GLsizeiptr size, const void* data = nullptr) override + { + if (m_id) + remove(); + + buffer::create(); + buffer::data(size, data); + + m_memory_mapping = nullptr; + m_data_loc = 0; + m_limit = size; + } + + void create(target target_, GLsizeiptr size, const void* data_ = nullptr) + { + m_target = target_; + recreate(size, data_); + } + + void reserve_storage_on_heap(u32 alloc_size) override + { + verify (HERE), m_memory_mapping == nullptr; + + u32 offset = m_data_loc; + if (m_data_loc) offset = align(offset, 256); + + if ((offset + alloc_size) > m_limit) + { + buffer::data(m_limit, nullptr); + m_data_loc = 0; + } + + glBindBuffer((GLenum)m_target, m_id); + m_memory_mapping = glMapBufferRange((GLenum)m_target, m_data_loc, align(alloc_size, 256), GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_UNSYNCHRONIZED_BIT); + m_mapped_bytes = align(alloc_size, 256); + m_mapping_offset = m_data_loc; + + verify(HERE), m_mapped_bytes >= alloc_size; + } + + std::pair alloc_from_heap(u32 alloc_size, u16 alignment) override + { + u32 offset = m_data_loc; + if (m_data_loc) offset = align(offset, alignment); + + u32 padding = (offset - m_data_loc); + u32 real_size = padding + alloc_size; + + if (real_size > m_mapped_bytes) + { + //Missed allocation. We take a performance hit on doing this. + //Overallocate slightly for the next allocation if requested size is too small + unmap(); + reserve_storage_on_heap(std::max(real_size, 4096U)); + + offset = m_data_loc; + if (m_data_loc) offset = align(offset, alignment); + + padding = (offset - m_data_loc); + real_size = padding + alloc_size; + } + + m_data_loc = offset + alloc_size; + m_mapped_bytes -= real_size; + + u32 local_offset = (offset - m_mapping_offset); + return std::make_pair(((char*)m_memory_mapping) + local_offset, offset); + } + + void remove() override + { + ring_buffer::remove(); + m_mapped_bytes = 0; + } + + void unmap() override + { + buffer::bind(); + buffer::unmap(); + + m_memory_mapping = nullptr; + m_mapped_bytes = 0; + m_mapping_offset = 0; + } + }; + class vao { template diff --git a/rpcs3/Emu/RSX/GL/GLProcTable.h b/rpcs3/Emu/RSX/GL/GLProcTable.h index a2e1875f98..0e3ce9a83f 100644 --- a/rpcs3/Emu/RSX/GL/GLProcTable.h +++ b/rpcs3/Emu/RSX/GL/GLProcTable.h @@ -176,6 +176,14 @@ OPENGL_PROC(PFNGLTEXTUREBUFFERRANGEEXTPROC, TextureBufferRangeEXT); //ARB_Copy_Image OPENGL_PROC(PFNGLCOPYIMAGESUBDATAPROC, CopyImageSubData); +//ARB_Buffer_Storage +OPENGL_PROC(PFNGLBUFFERSTORAGEPROC, BufferStorage); + +//ARB_sync +OPENGL_PROC(PFNGLFENCESYNCPROC, FenceSync); +OPENGL_PROC(PFNGLCLIENTWAITSYNCPROC, ClientWaitSync); +OPENGL_PROC(PFNGLDELETESYNCPROC, DeleteSync); + //KHR_debug OPENGL_PROC(PFNGLDEBUGMESSAGECALLBACKPROC, DebugMessageCallback); diff --git a/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp b/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp index 19c9e4914a..0feb5f55ff 100644 --- a/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp +++ b/rpcs3/Emu/RSX/GL/GLVertexBuffers.cpp @@ -22,13 +22,16 @@ namespace u32 to_gl_internal_type(rsx::vertex_base_type type, u8 size) { /** - * The buffer texture spec only allows fetches aligned to 8, 16, 32, etc... + * NOTE 1. The buffer texture spec only allows fetches aligned to 8, 16, 32, etc... * This rules out most 3-component formats, except for the 32-wide RGB32F, RGB32I, RGB32UI + * + * NOTE 2. While s1 & cmp types are signed normalized 16-bit integers, some GPU vendors dont support texture buffer access + * using these formats. Pass a 16 bit unnormalized integer and convert it in the vertex shader */ - const u32 vec1_types[] = { GL_R16, GL_R32F, GL_R16F, GL_R8, GL_R16I, GL_R16, GL_R8UI }; - const u32 vec2_types[] = { GL_RG16, GL_RG32F, GL_RG16F, GL_RG8, GL_RG16I, GL_RG16, GL_RG8UI }; - const u32 vec3_types[] = { GL_RGBA16, GL_RGB32F, GL_RGBA16F, GL_RGBA8, GL_RGBA16I, GL_RGBA16, GL_RGBA8UI }; //VEC3 COMPONENTS NOT SUPPORTED! - const u32 vec4_types[] = { GL_RGBA16, GL_RGBA32F, GL_RGBA16F, GL_RGBA8, GL_RGBA16I, GL_RGBA16, GL_RGBA8UI }; + const u32 vec1_types[] = { GL_R16I, GL_R32F, GL_R16F, GL_R8, GL_R16I, GL_RGBA16I, GL_R8UI }; + const u32 vec2_types[] = { GL_RG16I, GL_RG32F, GL_RG16F, GL_RG8, GL_RG16I, GL_RGBA16I, GL_RG8UI }; + const u32 vec3_types[] = { GL_RGBA16I, GL_RGB32F, GL_RGBA16F, GL_RGBA8, GL_RGBA16I, GL_RGBA16I, GL_RGBA8UI }; //VEC3 COMPONENTS NOT SUPPORTED! + const u32 vec4_types[] = { GL_RGBA16I, GL_RGBA32F, GL_RGBA16F, GL_RGBA8, GL_RGBA16I, GL_RGBA16I, GL_RGBA8UI }; const u32* vec_selectors[] = { 0, vec1_types, vec2_types, vec3_types, vec4_types }; @@ -152,7 +155,7 @@ namespace } u32 first = 0; - auto mapping = dst.alloc_and_map(vertex_draw_count * sizeof(u16)); + auto mapping = dst.alloc_from_heap(vertex_draw_count * sizeof(u16), 256); char *mapped_buffer = (char *)mapping.first; for (const auto &pair : first_count_commands) @@ -163,7 +166,6 @@ namespace first += pair.second; } - dst.unmap(); return std::make_tuple(vertex_draw_count, mapping.second); } @@ -201,12 +203,10 @@ namespace struct vertex_buffer_visitor { - vertex_buffer_visitor(u32 vtx_cnt, u32 texture_idx_offset, gl::ring_buffer& heap, - gl::glsl::program* prog, gl::texture* attrib_buffer, u32 min_texbuffer_offset) + vertex_buffer_visitor(u32 vtx_cnt, gl::ring_buffer& heap, gl::glsl::program* prog, gl::texture* attrib_buffer, u32 min_texbuffer_offset) : vertex_count(vtx_cnt) , m_attrib_ring_info(heap) , m_program(prog) - , texture_index_offset(texture_idx_offset) , m_gl_attrib_buffers(attrib_buffer) , m_min_texbuffer_alignment(min_texbuffer_offset) { @@ -226,7 +226,7 @@ namespace auto& texture = m_gl_attrib_buffers[vertex_array.index]; u32 buffer_offset = 0; - auto mapping = m_attrib_ring_info.alloc_from_reserve(data_size, m_min_texbuffer_alignment); + auto mapping = m_attrib_ring_info.alloc_from_heap(data_size, m_min_texbuffer_alignment); gsl::byte* dst = static_cast(mapping.first); buffer_offset = mapping.second; gsl::span dest_span(dst, data_size); @@ -236,10 +236,6 @@ namespace write_vertex_array_data_to_buffer(dest_span, vertex_array.data, vertex_count, vertex_array.type, vertex_array.attribute_size, vertex_array.stride, rsx::get_vertex_type_size_on_host(vertex_array.type, vertex_array.attribute_size)); texture.copy_from(m_attrib_ring_info, gl_type, buffer_offset, data_size); - - //Link texture to uniform - glActiveTexture(GL_TEXTURE0 + texture_index_offset + vertex_array.index); - texture.bind(); } void operator()(const rsx::vertex_array_register& vertex_register) @@ -257,15 +253,11 @@ namespace auto& texture = m_gl_attrib_buffers[vertex_register.index]; - auto mapping = m_attrib_ring_info.alloc_from_reserve(data_size, m_min_texbuffer_alignment); + auto mapping = m_attrib_ring_info.alloc_from_heap(data_size, m_min_texbuffer_alignment); u8 *dst = static_cast(mapping.first); memcpy(dst, vertex_register.data.data(), element_size); texture.copy_from(m_attrib_ring_info, gl_type, mapping.second, data_size); - - //Link texture to uniform - glActiveTexture(GL_TEXTURE0 + texture_index_offset + vertex_register.index); - texture.bind(); break; } default: @@ -282,7 +274,6 @@ namespace u32 vertex_count; gl::ring_buffer& m_attrib_ring_info; gl::glsl::program* m_program; - u32 texture_index_offset; gl::texture* m_gl_attrib_buffers; GLint m_min_texbuffer_alignment; }; @@ -325,13 +316,13 @@ namespace rsx::method_registers.current_draw_clause.first_count_commands, rsx::method_registers.current_draw_clause.primitive, m_index_ring_buffer); - upload_vertex_buffers(min_index, max_index, max_vertex_attrib_size, texture_index_offset); + upload_vertex_buffers(min_index, max_index, max_vertex_attrib_size); return std::make_tuple(index_count, std::make_tuple(static_cast(GL_UNSIGNED_SHORT), offset_in_index_buffer)); } - upload_vertex_buffers(min_index, max_index, max_vertex_attrib_size, texture_index_offset); + upload_vertex_buffers(min_index, max_index, max_vertex_attrib_size); return std::make_tuple(vertex_count, std::optional>()); } @@ -351,7 +342,7 @@ namespace index_count = (u32)get_index_count(rsx::method_registers.current_draw_clause.primitive, vertex_count); u32 max_size = index_count * type_size; - auto mapping = m_index_ring_buffer.alloc_and_map(max_size); + auto mapping = m_index_ring_buffer.alloc_from_heap(max_size, 256); void* ptr = mapping.first; u32 offset_in_index_buffer = mapping.second; @@ -359,9 +350,7 @@ namespace command.raw_index_buffer, ptr, type, rsx::method_registers.current_draw_clause.primitive, rsx::method_registers.current_draw_clause.first_count_commands, vertex_count); - m_index_ring_buffer.unmap(); - - upload_vertex_buffers(0, max_index, max_vertex_attrib_size, texture_index_offset); + upload_vertex_buffers(0, max_index, max_vertex_attrib_size); return std::make_tuple(index_count, std::make_tuple(get_index_type(type), offset_in_index_buffer)); } @@ -370,13 +359,13 @@ namespace const rsx::draw_inlined_array& command) { // We need to go through array to determine vertex count so upload it - u32 vertex_count = upload_inline_array(max_vertex_attrib_size, texture_index_offset); + u32 vertex_count = upload_inline_array(max_vertex_attrib_size); if (!gl::is_primitive_native(rsx::method_registers.current_draw_clause.primitive)) { u32 offset_in_index_buffer; u32 index_count; std::tie(index_count, offset_in_index_buffer) = get_index_array_for_emulated_non_indexed_draw( - rsx::method_registers.current_draw_clause.first_count_commands, + { std::make_pair(0, vertex_count) }, rsx::method_registers.current_draw_clause.primitive, m_index_ring_buffer); return std::make_tuple(index_count, std::make_tuple(static_cast(GL_UNSIGNED_SHORT), offset_in_index_buffer)); @@ -385,8 +374,6 @@ namespace } private: - const u32 texture_index_offset = - rsx::limits::fragment_textures_count + rsx::limits::vertex_textures_count; u32 max_vertex_attrib_size = 0; gl::ring_buffer& m_index_ring_buffer; gl::ring_buffer& m_attrib_ring_buffer; @@ -397,21 +384,18 @@ namespace std::function>)> get_vertex_buffers; - void upload_vertex_buffers(u32 min_index, u32 max_index, const u32& max_vertex_attrib_size, - const u32& texture_index_offset) + void upload_vertex_buffers(u32 min_index, u32 max_index, const u32& max_vertex_attrib_size) { u32 verts_allocated = max_index - min_index + 1; - __glcheck m_attrib_ring_buffer.reserve_and_map(verts_allocated * max_vertex_attrib_size); - vertex_buffer_visitor visitor(verts_allocated, texture_index_offset, m_attrib_ring_buffer, + vertex_buffer_visitor visitor(verts_allocated, m_attrib_ring_buffer, m_program, m_gl_attrib_buffers, m_min_texbuffer_alignment); const auto& vertex_buffers = get_vertex_buffers(rsx::method_registers, {{min_index, verts_allocated}}); for (const auto& vbo : vertex_buffers) std::apply_visitor(visitor, vbo); - m_attrib_ring_buffer.unmap(); } - u32 upload_inline_array(const u32& max_vertex_attrib_size, const u32& texture_index_offset) + u32 upload_inline_array(const u32& max_vertex_attrib_size) { u32 stride = 0; u32 offsets[rsx::limits::vertex_count] = {0}; @@ -427,7 +411,6 @@ namespace u32 vertex_draw_count = (u32)(rsx::method_registers.current_draw_clause.inline_vertex_array.size() * sizeof(u32)) / stride; - m_attrib_ring_buffer.reserve_and_map(vertex_draw_count * max_vertex_attrib_size); for (int index = 0; index < rsx::limits::vertex_count; ++index) { auto& vertex_info = rsx::method_registers.vertex_arrays_info[index]; @@ -435,7 +418,7 @@ namespace int location; if (!m_program->uniforms.has_location(s_reg_table[index], &location)) continue; - if (!vertex_info.size()) // disabled, bind a null sampler + if (!vertex_info.size()) continue; const u32 element_size = @@ -447,7 +430,7 @@ namespace u8* src = reinterpret_cast(rsx::method_registers.current_draw_clause.inline_vertex_array.data()); - auto mapping = m_attrib_ring_buffer.alloc_from_reserve(data_size, m_min_texbuffer_alignment); + auto mapping = m_attrib_ring_buffer.alloc_from_heap(data_size, m_min_texbuffer_alignment); u8* dst = static_cast(mapping.first); src += offsets[index]; @@ -469,11 +452,6 @@ namespace } texture.copy_from(m_attrib_ring_buffer, gl_type, mapping.second, data_size); - - // Link texture to uniform - glActiveTexture(GL_TEXTURE0 + texture_index_offset + index); - texture.bind(); - m_attrib_ring_buffer.unmap(); } return vertex_draw_count; } @@ -483,7 +461,7 @@ namespace std::tuple>> GLGSRender::set_vertex_buffer() { std::chrono::time_point then = std::chrono::system_clock::now(); - auto result = std::apply_visitor(draw_command_visitor(m_index_ring_buffer, m_attrib_ring_buffer, + auto result = std::apply_visitor(draw_command_visitor(*m_index_ring_buffer, *m_attrib_ring_buffer, m_gl_attrib_buffers, m_program, m_min_texbuffer_alignment, [this](const auto& state, const auto& list) { return this->get_vertex_buffers(state, list); diff --git a/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp b/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp index 33bc48948a..286c66c85d 100644 --- a/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp +++ b/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp @@ -122,7 +122,7 @@ void GLVertexDecompilerThread::insertInputs(std::stringstream & OS, const std::v { if (attrib.location == std::get<0>(item)) { - if (attrib.int_type) is_int = true; + if (attrib.int_type || attrib.flags & GL_VP_SINT_MASK) is_int = true; break; } } @@ -247,9 +247,18 @@ void add_input(std::stringstream & OS, const ParamItem &PI, const std::vector> index) & 0x1), true, - is_int_type(rsx::method_registers.vertex_arrays_info[index].type())}); + is_int_type(rsx::method_registers.vertex_arrays_info[index].type()), 0}); } else if (rsx::method_registers.register_vertex_info[index].size > 0) { @@ -809,7 +810,7 @@ namespace rsx rsx::method_registers.register_vertex_info[index].frequency, !!((modulo_mask >> index) & 0x1), false, - is_int_type(rsx::method_registers.vertex_arrays_info[index].type())}); + is_int_type(rsx::method_registers.vertex_arrays_info[index].type()), 0}); } } return result; diff --git a/rpcs3/Emu/RSX/RSXVertexProgram.h b/rpcs3/Emu/RSX/RSXVertexProgram.h index e17bcbce26..9b74ccf687 100644 --- a/rpcs3/Emu/RSX/RSXVertexProgram.h +++ b/rpcs3/Emu/RSX/RSXVertexProgram.h @@ -211,10 +211,12 @@ struct rsx_vertex_input bool is_modulo; // either modulo frequency or divide frequency bool is_array; // false if "reg value" bool int_type; + u32 flags; //Initially zero, to be optionally filled by the backend bool operator==(const rsx_vertex_input other) const { - return location == other.location && size == other.size && frequency == other.frequency && is_modulo == other.is_modulo && is_array == other.is_array && int_type == other.int_type; + return location == other.location && size == other.size && frequency == other.frequency && is_modulo == other.is_modulo && + is_array == other.is_array && int_type == other.int_type && flags == other.flags; } }; diff --git a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp index 2c78f3f48a..f078b42111 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexBuffers.cpp @@ -41,10 +41,10 @@ namespace vk * Set up buffer fetches to only work on 4-component access. This is hardware dependant so we use 4-component access to avoid branching based on IHV implementation * AMD GCN 1.0 for example does not support RGB32 formats for texel buffers */ - const VkFormat vec1_types[] = { VK_FORMAT_R16_UNORM, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R16_SFLOAT, VK_FORMAT_R8_UNORM, VK_FORMAT_R16_SINT, VK_FORMAT_R16_UNORM, VK_FORMAT_R8_UINT }; - const VkFormat vec2_types[] = { VK_FORMAT_R16G16_UNORM, VK_FORMAT_R32G32_SFLOAT, VK_FORMAT_R16G16_SFLOAT, VK_FORMAT_R8G8_UNORM, VK_FORMAT_R16G16_SINT, VK_FORMAT_R16G16_UNORM, VK_FORMAT_R8G8_UINT }; - const VkFormat vec3_types[] = { VK_FORMAT_R16G16B16A16_UNORM, VK_FORMAT_R32G32B32A32_SFLOAT, VK_FORMAT_R16G16B16A16_SFLOAT, VK_FORMAT_R8G8B8A8_UNORM, VK_FORMAT_R16G16B16A16_SINT, VK_FORMAT_R16G16B16A16_UNORM, VK_FORMAT_R8G8B8A8_UINT }; //VEC3 COMPONENTS NOT SUPPORTED! - const VkFormat vec4_types[] = { VK_FORMAT_R16G16B16A16_UNORM, VK_FORMAT_R32G32B32A32_SFLOAT, VK_FORMAT_R16G16B16A16_SFLOAT, VK_FORMAT_R8G8B8A8_UNORM, VK_FORMAT_R16G16B16A16_SINT, VK_FORMAT_R16G16B16A16_UNORM, VK_FORMAT_R8G8B8A8_UINT }; + const VkFormat vec1_types[] = { VK_FORMAT_R16_SNORM, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R16_SFLOAT, VK_FORMAT_R8_UNORM, VK_FORMAT_R16_SINT, VK_FORMAT_R16G16B16A16_SNORM, VK_FORMAT_R8_UINT }; + const VkFormat vec2_types[] = { VK_FORMAT_R16G16_SNORM, VK_FORMAT_R32G32_SFLOAT, VK_FORMAT_R16G16_SFLOAT, VK_FORMAT_R8G8_UNORM, VK_FORMAT_R16G16_SINT, VK_FORMAT_R16G16B16A16_SNORM, VK_FORMAT_R8G8_UINT }; + const VkFormat vec3_types[] = { VK_FORMAT_R16G16B16A16_SNORM, VK_FORMAT_R32G32B32A32_SFLOAT, VK_FORMAT_R16G16B16A16_SFLOAT, VK_FORMAT_R8G8B8A8_UNORM, VK_FORMAT_R16G16B16A16_SINT, VK_FORMAT_R16G16B16A16_SNORM, VK_FORMAT_R8G8B8A8_UINT }; //VEC3 COMPONENTS NOT SUPPORTED! + const VkFormat vec4_types[] = { VK_FORMAT_R16G16B16A16_SNORM, VK_FORMAT_R32G32B32A32_SFLOAT, VK_FORMAT_R16G16B16A16_SFLOAT, VK_FORMAT_R8G8B8A8_UNORM, VK_FORMAT_R16G16B16A16_SINT, VK_FORMAT_R16G16B16A16_SNORM, VK_FORMAT_R8G8B8A8_UINT }; const VkFormat* vec_selectors[] = { 0, vec1_types, vec2_types, vec3_types, vec4_types }; diff --git a/rpcs3/Gui/SettingsDialog.cpp b/rpcs3/Gui/SettingsDialog.cpp index 8ceebc14e8..aa882ddf27 100644 --- a/rpcs3/Gui/SettingsDialog.cpp +++ b/rpcs3/Gui/SettingsDialog.cpp @@ -307,6 +307,7 @@ SettingsDialog::SettingsDialog(wxWindow* parent) wxCheckBox* chbox_gs_vsync = new wxCheckBox(p_graphics, wxID_ANY, "VSync"); wxCheckBox* chbox_gs_debug_output = new wxCheckBox(p_graphics, wxID_ANY, "Debug Output"); wxCheckBox* chbox_gs_overlay = new wxCheckBox(p_graphics, wxID_ANY, "Debug overlay"); + wxCheckBox* chbox_gs_gl_legacy_buffers = new wxCheckBox(p_graphics, wxID_ANY, "Use Legacy OpenGL Buffers"); wxCheckBox* chbox_audio_dump = new wxCheckBox(p_audio, wxID_ANY, "Dump to file"); wxCheckBox* chbox_audio_conv = new wxCheckBox(p_audio, wxID_ANY, "Convert to 16 bit"); wxCheckBox* chbox_hle_exitonstop = new wxCheckBox(p_misc, wxID_ANY, "Exit RPCS3 when process finishes"); @@ -382,6 +383,7 @@ SettingsDialog::SettingsDialog(wxWindow* parent) pads.emplace_back(std::make_unique(cfg_location{ "Video", "VSync" }, chbox_gs_vsync)); pads.emplace_back(std::make_unique(cfg_location{ "Video", "Debug output" }, chbox_gs_debug_output)); pads.emplace_back(std::make_unique(cfg_location{ "Video", "Debug overlay" }, chbox_gs_overlay)); + pads.emplace_back(std::make_unique(cfg_location{ "Video", "Use Legacy OpenGL Buffers (Debug)" }, chbox_gs_gl_legacy_buffers)); pads.emplace_back(std::make_unique(cfg_location{ "Audio", "Renderer" }, cbox_audio_out)); pads.emplace_back(std::make_unique(cfg_location{ "Audio", "Dump to file" }, chbox_audio_dump)); @@ -467,6 +469,7 @@ SettingsDialog::SettingsDialog(wxWindow* parent) s_subpanel_graphics1->Add(chbox_gs_read_color, wxSizerFlags().Border(wxALL, 5).Expand()); s_subpanel_graphics1->Add(chbox_gs_dump_depth, wxSizerFlags().Border(wxALL, 5).Expand()); s_subpanel_graphics1->Add(chbox_gs_read_depth, wxSizerFlags().Border(wxALL, 5).Expand()); + s_subpanel_graphics1->Add(chbox_gs_gl_legacy_buffers, wxSizerFlags().Border(wxALL, 5).Expand()); s_subpanel_graphics2->Add(s_round_gs_aspect, wxSizerFlags().Border(wxALL, 5).Expand()); s_subpanel_graphics2->Add(s_round_gs_frame_limit, wxSizerFlags().Border(wxALL, 5).Expand()); s_subpanel_graphics2->AddSpacer(68);