diff --git a/rpcs3/Emu/RSX/Common/BufferUtils.cpp b/rpcs3/Emu/RSX/Common/BufferUtils.cpp index 9e8f338f48..b278851f41 100644 --- a/rpcs3/Emu/RSX/Common/BufferUtils.cpp +++ b/rpcs3/Emu/RSX/Common/BufferUtils.cpp @@ -453,6 +453,10 @@ void stream_vector(void *dst, u32 x, u32 y, u32 z, u32 w) _mm_stream_si128((__m128i*)dst, vector); } +void stream_vector(void *dst, f32 x, f32 y, f32 z, f32 w) +{ + stream_vector(dst, (u32&)x, (u32&)y, (u32&)z, (u32&)w); +} void stream_vector_from_memory(void *dst, void *src) { const __m128i &vector = _mm_loadu_si128((__m128i*)src); diff --git a/rpcs3/Emu/RSX/Common/BufferUtils.h b/rpcs3/Emu/RSX/Common/BufferUtils.h index db825d0827..f8213ec27a 100644 --- a/rpcs3/Emu/RSX/Common/BufferUtils.h +++ b/rpcs3/Emu/RSX/Common/BufferUtils.h @@ -49,6 +49,7 @@ void write_index_array_for_non_indexed_non_native_primitive_to_buffer(char* dst, /** * Stream a 128 bits vector to dst. */ +void stream_vector(void *dst, f32 x, f32 y, f32 z, f32 w); void stream_vector(void *dst, u32 x, u32 y, u32 z, u32 w); /** diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index ea543967d9..fd36f906e2 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -1,12 +1,11 @@ #include "stdafx.h" #include "Utilities/Config.h" #include "Emu/Memory/Memory.h" -#include "Emu/System.h" #include "GLGSRender.h" #include "rsx_gl_cache.h" -#include "../rsx_utils.h" #include "../rsx_methods.h" #include "../Common/BufferUtils.h" +#include "../rsx_utils.h" extern cfg::bool_entry g_cfg_rsx_debug_output; extern cfg::bool_entry g_cfg_rsx_overlay; @@ -153,7 +152,8 @@ void GLGSRender::begin() __glcheck glStencilOp(rsx::method_registers[NV4097_SET_STENCIL_OP_FAIL], rsx::method_registers[NV4097_SET_STENCIL_OP_ZFAIL], rsx::method_registers[NV4097_SET_STENCIL_OP_ZPASS]); - if (rsx::method_registers[NV4097_SET_TWO_SIDED_STENCIL_TEST_ENABLE]) { + if (rsx::method_registers[NV4097_SET_TWO_SIDED_STENCIL_TEST_ENABLE]) + { __glcheck glStencilMaskSeparate(GL_BACK, rsx::method_registers[NV4097_SET_BACK_STENCIL_MASK]); __glcheck glStencilFuncSeparate(GL_BACK, rsx::method_registers[NV4097_SET_BACK_STENCIL_FUNC], rsx::method_registers[NV4097_SET_BACK_STENCIL_FUNC_REF], rsx::method_registers[NV4097_SET_BACK_STENCIL_FUNC_MASK]); @@ -230,8 +230,6 @@ void GLGSRender::begin() __glcheck glCullFace(rsx::method_registers[NV4097_SET_CULL_FACE]); } - glDisable(GL_CULL_FACE); - __glcheck glFrontFace(rsx::method_registers[NV4097_SET_FRONT_FACE] ^ 1); __glcheck enable(rsx::method_registers[NV4097_SET_POLY_SMOOTH_ENABLE], GL_POLYGON_SMOOTH); @@ -248,7 +246,7 @@ void GLGSRender::begin() } std::chrono::time_point now = std::chrono::system_clock::now(); - m_begin_time += std::chrono::duration_cast(now - then).count(); + m_begin_time += (u32)std::chrono::duration_cast(now - then).count(); m_draw_calls++; } @@ -291,8 +289,8 @@ void GLGSRender::end() int location; if (m_program->uniforms.has_location("texture" + std::to_string(i), &location)) { - glProgramUniform1i(m_program->id(), location, texture_index); - m_gl_textures[i].init(texture_index, textures[i]); + __glcheck glProgramUniform1i(m_program->id(), location, texture_index); + __glcheck m_gl_textures[i].init(texture_index, textures[i]); texture_index++; @@ -322,26 +320,38 @@ void GLGSRender::end() */ } + __glcheck 0; + u32 offset_in_index_buffer = set_vertex_buffer(); m_vao.bind(); std::chrono::time_point then = std::chrono::system_clock::now(); if (g_cfg_rsx_debug_output) + { m_program->validate(); + } if (draw_command == rsx::draw_command::indexed) { rsx::index_array_type indexed_type = rsx::to_index_array_type(rsx::method_registers[NV4097_SET_INDEX_ARRAY_DMA] >> 4); if (indexed_type == rsx::index_array_type::u32) - __glcheck glDrawElements(gl::draw_mode(draw_mode), vertex_draw_count, GL_UNSIGNED_INT, (GLvoid *)(offset_in_index_buffer)); - if (indexed_type == rsx::index_array_type::u16) - __glcheck glDrawElements(gl::draw_mode(draw_mode), vertex_draw_count, GL_UNSIGNED_SHORT, (GLvoid *)(offset_in_index_buffer)); + { + __glcheck glDrawElements(gl::draw_mode(draw_mode), vertex_draw_count, GL_UNSIGNED_INT, (GLvoid *)(std::ptrdiff_t)offset_in_index_buffer); + } + else if (indexed_type == rsx::index_array_type::u16) + { + __glcheck glDrawElements(gl::draw_mode(draw_mode), vertex_draw_count, GL_UNSIGNED_SHORT, (GLvoid *)(std::ptrdiff_t)offset_in_index_buffer); + } + else + { + throw std::logic_error("bad index array type"); + } } else if (!is_primitive_native(draw_mode)) { - __glcheck glDrawElements(gl::draw_mode(draw_mode), vertex_draw_count, GL_UNSIGNED_SHORT, (GLvoid *)(offset_in_index_buffer)); + __glcheck glDrawElements(gl::draw_mode(draw_mode), vertex_draw_count, GL_UNSIGNED_SHORT, (GLvoid *)(std::ptrdiff_t)offset_in_index_buffer); } else { @@ -349,7 +359,7 @@ void GLGSRender::end() } std::chrono::time_point now = std::chrono::system_clock::now(); - m_draw_time += std::chrono::duration_cast(now - then).count(); + m_draw_time += (u32)std::chrono::duration_cast(now - then).count(); write_buffers(); @@ -377,8 +387,7 @@ void GLGSRender::set_viewport() rsx::window_origin shader_window_origin = rsx::to_window_origin((shader_window >> 12) & 0xf); - //TODO - if (true || shader_window_origin == rsx::window_origin::bottom) + if (shader_window_origin == rsx::window_origin::bottom) { __glcheck glViewport(viewport_x, viewport_y, viewport_w, viewport_h); __glcheck glScissor(scissor_x, scissor_y, scissor_w, scissor_h); @@ -387,11 +396,13 @@ void GLGSRender::set_viewport() { u16 shader_window_height = shader_window & 0xfff; - __glcheck glViewport(viewport_x, shader_window_height - viewport_y - viewport_h - 1, viewport_w, viewport_h); - __glcheck glScissor(scissor_x, shader_window_height - scissor_y - scissor_h - 1, scissor_w, scissor_h); + __glcheck glViewport(viewport_x, shader_window_height - viewport_y - viewport_h + 1, viewport_w, viewport_h); + __glcheck glScissor(scissor_x, shader_window_height - scissor_y - scissor_h + 1, scissor_w, scissor_h); } glEnable(GL_SCISSOR_TEST); + + __glcheck 0; } void GLGSRender::on_init_thread() @@ -406,6 +417,7 @@ void GLGSRender::on_init_thread() LOG_NOTICE(RSX, "%s", (const char*)glGetString(GL_VENDOR)); glEnable(GL_VERTEX_PROGRAM_POINT_SIZE); + glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &m_uniform_buffer_offset_align); glGetIntegerv(GL_TEXTURE_BUFFER_OFFSET_ALIGNMENT, &m_min_texbuffer_alignment); m_vao.create(); @@ -415,11 +427,11 @@ void GLGSRender::on_init_thread() tex.set_target(gl::texture::target::textureBuffer); } - m_attrib_ring_buffer.reset(new gl::ring_buffer(16 * 0x100000, gl::buffer::target::texture)); - m_uniform_ring_buffer.reset(new gl::ring_buffer(16 * 0x100000, gl::buffer::target::uniform)); - m_index_ring_buffer.reset(new gl::ring_buffer(0x100000, gl::buffer::target::element_array)); + m_attrib_ring_buffer.create(gl::buffer::target::texture, 16 * 0x100000); + m_uniform_ring_buffer.create(gl::buffer::target::uniform, 16 * 0x100000); + m_index_ring_buffer.create(gl::buffer::target::element_array, 0x100000); - m_vao.element_array_buffer = m_index_ring_buffer->get_buffer(); + m_vao.element_array_buffer = m_index_ring_buffer; m_gl_texture_cache.initialize_rtt_cache(); } @@ -446,15 +458,18 @@ void GLGSRender::on_exit() tex.remove(); } - m_attrib_ring_buffer->destroy(); - m_uniform_ring_buffer->destroy(); - m_index_ring_buffer->destroy(); + m_attrib_ring_buffer.remove(); + m_uniform_ring_buffer.remove(); + m_index_ring_buffer.remove(); } void nv4097_clear_surface(u32 arg, GLGSRender* renderer) { //LOG_NOTICE(Log::RSX, "nv4097_clear_surface(0x%x)", arg); - if (!rsx::method_registers[NV4097_SET_SURFACE_FORMAT]) return; + if (!rsx::method_registers[NV4097_SET_SURFACE_FORMAT]) + { + return; + } if ((arg & 0xf3) == 0) { @@ -475,9 +490,10 @@ void nv4097_clear_surface(u32 arg, GLGSRender* renderer) GLbitfield mask = 0; + rsx::surface_depth_format surface_depth_format = rsx::to_surface_depth_format((rsx::method_registers[NV4097_SET_SURFACE_FORMAT] >> 5) & 0x7); + if (arg & 0x1) { - rsx::surface_depth_format surface_depth_format = rsx::to_surface_depth_format((rsx::method_registers[NV4097_SET_SURFACE_FORMAT] >> 5) & 0x7); u32 max_depth_value = get_max_depth_value(surface_depth_format); u32 clear_depth = rsx::method_registers[NV4097_SET_ZSTENCIL_CLEAR_VALUE] >> 8; @@ -487,7 +503,7 @@ void nv4097_clear_surface(u32 arg, GLGSRender* renderer) mask |= GLenum(gl::buffers::depth); } - if (arg & 0x2) + if (surface_depth_format == rsx::surface_depth_format::z24s8 && arg & 0x2) { u8 clear_stencil = rsx::method_registers[NV4097_SET_ZSTENCIL_CLEAR_VALUE] & 0xff; @@ -535,50 +551,111 @@ bool GLGSRender::do_method(u32 cmd, u32 arg) return true; } +//binding 0 +struct alignas(4) glsl_matrix_buffer +{ + float viewport_matrix[4][4]; + float window_matrix[4][4]; + float normalize_matrix[4][4]; +}; + +//binding 1 +struct alignas(4) glsl_vertex_constants_buffer +{ + float vc[468][4]; +}; + +//binding 2 +struct alignas(4) glsl_fragment_constants_buffer +{ + float fc[2048][4]; +}; + +static void fill_matrix_buffer(glsl_matrix_buffer *buffer) +{ + rsx::fill_viewport_matrix(buffer->viewport_matrix, true); + rsx::fill_window_matrix(buffer->window_matrix, true); + + u32 viewport_horizontal = rsx::method_registers[NV4097_SET_VIEWPORT_HORIZONTAL]; + u32 viewport_vertical = rsx::method_registers[NV4097_SET_VIEWPORT_VERTICAL]; + + f32 viewport_x = f32(viewport_horizontal & 0xffff); + f32 viewport_y = f32(viewport_vertical & 0xffff); + f32 viewport_w = f32(viewport_horizontal >> 16); + f32 viewport_h = f32(viewport_vertical >> 16); + + u32 shader_window = rsx::method_registers[NV4097_SET_SHADER_WINDOW]; + + rsx::window_origin shader_window_origin = rsx::to_window_origin((shader_window >> 12) & 0xf); + u16 shader_window_height = shader_window & 0xfff; + + f32 left = viewport_x; + f32 right = viewport_x + viewport_w; + f32 top = viewport_y; + f32 bottom = viewport_y + viewport_h; + //f32 far_ = (f32&)rsx::method_registers[NV4097_SET_CLIP_MAX]; + //f32 near_ = (f32&)rsx::method_registers[NV4097_SET_CLIP_MIN]; + + if (shader_window_origin == rsx::window_origin::bottom) + { + top = shader_window_height - (viewport_y + viewport_h) + 1; + bottom = shader_window_height - viewport_y + 1; + } + + f32 scale_x = 2.0f / (right - left); + f32 scale_y = 2.0f / (top - bottom); + f32 scale_z = 2.0f; + + f32 offset_x = -(right + left) / (right - left); + f32 offset_y = -(top + bottom) / (top - bottom); + f32 offset_z = -1.0; + + if (shader_window_origin == rsx::window_origin::top) + { + scale_y = -scale_y; + offset_y = -offset_y; + } + + rsx::fill_scale_offset_matrix(buffer->normalize_matrix, true, offset_x, offset_y, offset_z, scale_x, scale_y, scale_z); +} + bool GLGSRender::load_program() { rsx::program_info info = programs_cache.get(get_raw_program(), rsx::decompile_language::glsl); m_program = (gl::glsl::program*)info.program; m_program->use(); - // u32 fragment_constants_sz = m_prog_buffer.get_fragment_constants_buffer_size(fragment_program); - u32 fragment_constants_sz = info.fragment_shader.decompiled->constants.size() * sizeof(f32) * 4; - fragment_constants_sz = std::max(32U, fragment_constants_sz); - u32 max_buffer_sz = 8192 + 512 + fragment_constants_sz; + u32 fragment_constants_count = info.fragment_shader.decompiled->constants.size(); + u32 fragment_constants_size = fragment_constants_count * sizeof(rsx::fragment_program::ucode_instr); - u32 is_alpha_tested = !!(rsx::method_registers[NV4097_SET_ALPHA_TEST_ENABLE]); - u8 alpha_ref_raw = (u8)(rsx::method_registers[NV4097_SET_ALPHA_REF] & 0xFF); - float alpha_ref = alpha_ref_raw / 255.f; + u32 max_buffer_sz = + align(sizeof(glsl_matrix_buffer), m_uniform_buffer_offset_align) + + align(sizeof(glsl_vertex_constants_buffer), m_uniform_buffer_offset_align) + + align(fragment_constants_size, m_uniform_buffer_offset_align); + + m_uniform_ring_buffer.reserve_and_map(max_buffer_sz); - u8 *buf; u32 scale_offset_offset; u32 vertex_constants_offset; u32 fragment_constants_offset; - m_uniform_ring_buffer->reserve_and_map(max_buffer_sz); - auto mapping = m_uniform_ring_buffer->alloc_from_reserve(512); - buf = static_cast(mapping.first); - scale_offset_offset = mapping.second; - - fill_scale_offset_data(buf, false); - memcpy(buf + 16 * sizeof(float), &rsx::method_registers[NV4097_SET_FOG_PARAMS], sizeof(float)); - memcpy(buf + 17 * sizeof(float), &rsx::method_registers[NV4097_SET_FOG_PARAMS + 1], sizeof(float)); - memcpy(buf + 18 * sizeof(float), &is_alpha_tested, sizeof(u32)); - memcpy(buf + 19 * sizeof(float), &alpha_ref, sizeof(float)); - - mapping = m_uniform_ring_buffer->alloc_from_reserve(512 * 16); - buf = static_cast(mapping.first); - vertex_constants_offset = mapping.second; - - fill_vertex_program_constants_data(buf); - - mapping = m_uniform_ring_buffer->alloc_from_reserve(fragment_constants_sz); - buf = static_cast(mapping.first); - fragment_constants_offset = mapping.second; - - // fill fragment constants - if (!info.fragment_shader.decompiled->constants.empty()) { + auto mapping = m_uniform_ring_buffer.alloc_from_reserve(sizeof(glsl_matrix_buffer), m_uniform_buffer_offset_align); + fill_matrix_buffer((glsl_matrix_buffer *)mapping.first); + scale_offset_offset = mapping.second; + } + + { + auto mapping = m_uniform_ring_buffer.alloc_from_reserve(sizeof(glsl_vertex_constants_buffer), m_uniform_buffer_offset_align); + fill_vertex_program_constants_data(mapping.first); + vertex_constants_offset = mapping.second; + } + + if (fragment_constants_size) + { + auto mapping = m_uniform_ring_buffer.alloc_from_reserve(fragment_constants_size, m_uniform_buffer_offset_align); + fragment_constants_offset = mapping.second; + u32 buffer_offset = 0; static const __m128i mask = _mm_set_epi8( @@ -587,31 +664,63 @@ bool GLGSRender::load_program() 0x6, 0x7, 0x4, 0x5, 0x2, 0x3, 0x0, 0x1); - auto ucode = (const rsx::fragment_program::ucode_instr*)info.fragment_shader.decompiled->raw->ucode_ptr; + auto ucode = (const rsx::fragment_program::ucode_instr *)info.fragment_shader.decompiled->raw->ucode_ptr; for (const auto& constant : info.fragment_shader.decompiled->constants) { - const void *data = ucode + (u32)(constant.id / (sizeof(f32) * 4)); + const void *data = ucode + u32(constant.id / sizeof(rsx::fragment_program::ucode_instr)); const __m128i &vector = _mm_loadu_si128((const __m128i*)data); const __m128i &shuffled_vector = _mm_shuffle_epi8(vector, mask); - _mm_stream_si128((__m128i*)((char*)buf + buffer_offset), shuffled_vector); + _mm_stream_si128((__m128i*)((char*)mapping.first + buffer_offset), shuffled_vector); - //float x = ((float*)((char*)buf + buffer_offset))[0]; - //float y = ((float*)((char*)buf + buffer_offset))[1]; - //float z = ((float*)((char*)buf + buffer_offset))[2]; - //float w = ((float*)((char*)buf + buffer_offset))[3]; + //float x = ((float*)((char*)mapping.first + buffer_offset))[0]; + //float y = ((float*)((char*)mapping.first + buffer_offset))[1]; + //float z = ((float*)((char*)mapping.first + buffer_offset))[2]; + //float w = ((float*)((char*)mapping.first + buffer_offset))[3]; //LOG_WARNING(RSX, "fc%u = {%g, %g, %g, %g}", constant.id, x, y, z, w); buffer_offset += 4 * sizeof(f32); } } - m_uniform_ring_buffer->unmap(); + m_uniform_ring_buffer.unmap(); - glBindBufferRange(GL_UNIFORM_BUFFER, 0, m_uniform_ring_buffer->get_buffer().id(), scale_offset_offset, 512); - glBindBufferRange(GL_UNIFORM_BUFFER, 1, m_uniform_ring_buffer->get_buffer().id(), vertex_constants_offset, 512 * 16); - glBindBufferRange(GL_UNIFORM_BUFFER, 2, m_uniform_ring_buffer->get_buffer().id(), fragment_constants_offset, fragment_constants_sz); + /* + { + + m_uniform_ring_buffer.bind(); + auto buffer_range = m_uniform_ring_buffer.allocate( + align(sizeof(glsl_matrix_buffer), m_uniform_buffer_offset_align) + + align(sizeof(glsl_vertex_constants_buffer), m_uniform_buffer_offset_align)); + + gl::allocator allocator{ m_uniform_ring_buffer, buffer_range }; + + matrix_buffer_range = allocator.allocate(sizeof(glsl_matrix_buffer), m_uniform_buffer_offset_align); + vertex_constants_buffer_range = allocator.allocate(sizeof(glsl_vertex_constants_buffer), m_uniform_buffer_offset_align); + + glsl_matrix_buffer *buffer = allocator.get(matrix_buffer_range); + fill_scale_offset_data(buffer, false); + fill_matrix_buffer(buffer); + fill_vertex_program_constants_data(allocator.get(vertex_constants_buffer_range)); + + if (contains_fragment_constants) + { + //fragment_constants_buffer_range = allocator.allocate(info.fragment_shader.decompiled->constants.size() * sizeof(f32) * 4); + } + } + + if (contains_fragment_constants) + { + //m_uniform_ring_buffer.bind_range(2, fragment_constants_buffer_range); + } + */ + + m_uniform_ring_buffer.bind_range(0, scale_offset_offset, sizeof(glsl_matrix_buffer)); + m_uniform_ring_buffer.bind_range(1, vertex_constants_offset, sizeof(glsl_vertex_constants_buffer)); + m_uniform_ring_buffer.bind_range(2, fragment_constants_offset, fragment_constants_size); + + __glcheck 0; return true; } diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index 00151259ab..a3620c93ff 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -26,9 +26,9 @@ private: gl::gl_texture_cache m_gl_texture_cache; gl::texture m_gl_attrib_buffers[rsx::limits::vertex_count]; - std::unique_ptr m_attrib_ring_buffer; - std::unique_ptr m_uniform_ring_buffer; - std::unique_ptr m_index_ring_buffer; + gl::ring_buffer m_attrib_ring_buffer; + gl::ring_buffer m_uniform_ring_buffer; + gl::ring_buffer m_index_ring_buffer; u32 m_draw_calls = 0; u32 m_begin_time = 0; @@ -36,6 +36,7 @@ private: u32 m_vertex_upload_time = 0; GLint m_min_texbuffer_alignment = 256; + GLint m_uniform_buffer_offset_align = 256; public: gl::fbo draw_fbo; diff --git a/rpcs3/Emu/RSX/GL/gl_helpers.h b/rpcs3/Emu/RSX/GL/gl_helpers.h index 0f8ada9369..201d7e99bb 100644 --- a/rpcs3/Emu/RSX/GL/gl_helpers.h +++ b/rpcs3/Emu/RSX/GL/gl_helpers.h @@ -47,7 +47,7 @@ namespace gl } } }; -#define __glcheck gl::__glcheck_impl_t{ __FILE__, __FUNCTION__, __LINE__ }, +#define __glcheck ::gl::__glcheck_impl_t{ __FILE__, __FUNCTION__, __LINE__ }, #else #define __glcheck #endif @@ -496,6 +496,11 @@ namespace gl glBindBuffer((GLenum)target_, m_id); } + void bind() const + { + bind(current_target()); + } + target current_target() const { return m_target; @@ -580,12 +585,9 @@ namespace gl } }; - class ring_buffer + class ring_buffer : public buffer { - buffer storage_buffer; - buffer::target m_target; u32 m_data_loc = 0; - u32 m_size; u32 m_mapped_block_size = 0; u32 m_mapped_block_offset; @@ -594,50 +596,39 @@ namespace gl void *m_mapped_base = nullptr; public: - ring_buffer(u32 initial_size, buffer::target target) + std::pair alloc_and_map(u32 alloc_size) { - storage_buffer.create(); - storage_buffer.data(initial_size); - m_size = initial_size; - m_target = target; - } + alloc_size = align(alloc_size, 0x100); - void destroy() - { - storage_buffer.remove(); - } - - std::pair alloc_and_map(u32 size) - { - size = (size + 255) & ~255; - - glBindBuffer((GLenum)m_target, storage_buffer.id()); - u32 limit = m_data_loc + size; - if (limit > m_size) + buffer::bind(); + u32 limit = m_data_loc + alloc_size; + if (limit > buffer::size()) { - if (size > m_size) - m_size = size; + if (alloc_size > buffer::size()) + { + buffer::data(alloc_size); + } - storage_buffer.data(m_size, nullptr); m_data_loc = 0; } - void *ptr = glMapBufferRange((GLenum)m_target, m_data_loc, size, GL_MAP_WRITE_BIT|GL_MAP_INVALIDATE_RANGE_BIT|GL_MAP_UNSYNCHRONIZED_BIT); + void *ptr = glMapBufferRange((GLenum)buffer::current_target(), m_data_loc, alloc_size, + GL_MAP_WRITE_BIT|GL_MAP_INVALIDATE_RANGE_BIT|GL_MAP_UNSYNCHRONIZED_BIT); u32 offset = m_data_loc; - m_data_loc += size; + m_data_loc += alloc_size; return std::make_pair(ptr, offset); } void unmap() { - glUnmapBuffer((GLenum)m_target); + buffer::unmap(); m_mapped_block_size = 0; m_mapped_base = 0; } void reserve_and_map(u32 max_size) { - max_size = (max_size + 4095) & ~4095; + max_size = align(max_size, 0x1000); auto mapping = alloc_and_map(max_size); m_mapped_base = mapping.first; m_mapped_block_offset = mapping.second; @@ -647,8 +638,7 @@ namespace gl std::pair alloc_from_reserve(u32 size, u32 alignment = 16) { - alignment -= 1; - size = (size + alignment) & ~alignment; + size = align(size, alignment); if (m_mapped_bytes_available < size || !m_mapped_base) { @@ -670,13 +660,13 @@ namespace gl m_mapped_reserve_offset += size; m_mapped_bytes_available -= size; - EXPECTS((offset & alignment) == 0); + EXPECTS((offset & (alignment - 1)) == 0); return std::make_pair(ptr, offset); } - buffer& get_buffer() + void bind_range(u32 index, u32 offset, u32 size) const { - return storage_buffer; + glBindBufferRange((GLenum)current_target(), index, id(), offset, size); } }; diff --git a/rpcs3/Emu/RSX/GL/rsx_gl_cache.cpp b/rpcs3/Emu/RSX/GL/rsx_gl_cache.cpp index db290e3580..aefc243bab 100644 --- a/rpcs3/Emu/RSX/GL/rsx_gl_cache.cpp +++ b/rpcs3/Emu/RSX/GL/rsx_gl_cache.cpp @@ -247,17 +247,16 @@ rsx::complete_shader glsl_complete_shader(const rsx::decompiled_shader &shader, result.code += "out vec4 wpos;\n"; // TODO - if (1) + if (0) { finalize += "\tgl_Position = o0;\n"; - finalize += "\tgl_Position = gl_Position * viewport_matrix;\n"; } else { finalize += " wpos = window_matrix * viewport_matrix * vec4(o0.xyz, 1.0);\n" " gl_Position = normalize_matrix * vec4(wpos.xyz, 1.0);\n" - " gl_Position.w = wpos.w = o0.w;\n"; + " gl_Position.w = o0.w;\n"; } for (std::size_t index = 0; index < 16; ++index) diff --git a/rpcs3/Emu/RSX/GL/vertex_buffer.cpp b/rpcs3/Emu/RSX/GL/vertex_buffer.cpp index 602b158c86..4b1208ee4b 100644 --- a/rpcs3/Emu/RSX/GL/vertex_buffer.cpp +++ b/rpcs3/Emu/RSX/GL/vertex_buffer.cpp @@ -203,14 +203,14 @@ u32 GLGSRender::set_vertex_buffer() vertex_draw_count = (u32)get_index_count(draw_mode, gsl::narrow(vertex_draw_count)); u32 block_sz = vertex_draw_count * type_size; - auto mapping = m_index_ring_buffer->alloc_and_map(block_sz); + auto mapping = m_index_ring_buffer.alloc_and_map(block_sz); void *ptr = mapping.first; offset_in_index_buffer = mapping.second; gsl::span dst{ reinterpret_cast(ptr), gsl::narrow(block_sz) }; std::tie(min_index, max_index) = write_index_array_data_to_buffer(dst, type, draw_mode, first_count_commands); - m_index_ring_buffer->unmap(); + m_index_ring_buffer.unmap(); } if (draw_command == rsx::draw_command::inlined_array) @@ -228,7 +228,7 @@ u32 GLGSRender::set_vertex_buffer() } vertex_draw_count = (u32)(inline_vertex_array.size() * sizeof(u32)) / stride; - m_attrib_ring_buffer->reserve_and_map(vertex_draw_count * max_vertex_attrib_size); + m_attrib_ring_buffer.reserve_and_map(vertex_draw_count * max_vertex_attrib_size); for (int index = 0; index < rsx::limits::vertex_count; ++index) { @@ -253,7 +253,7 @@ u32 GLGSRender::set_vertex_buffer() auto &texture = m_gl_attrib_buffers[index]; u8 *src = reinterpret_cast(inline_vertex_array.data()); - auto mapping = m_attrib_ring_buffer->alloc_from_reserve(data_size, m_min_texbuffer_alignment); + auto mapping = m_attrib_ring_buffer.alloc_from_reserve(data_size, m_min_texbuffer_alignment); u8 *dst = static_cast(mapping.first); src += offsets[index]; @@ -276,13 +276,13 @@ u32 GLGSRender::set_vertex_buffer() dst += element_size; } - texture.copy_from(m_attrib_ring_buffer->get_buffer(), gl_type, mapping.second, data_size); + texture.copy_from(m_attrib_ring_buffer, gl_type, mapping.second, data_size); //Link texture to uniform m_program->uniforms.texture(location, index + rsx::limits::textures_count, texture); if (!is_primitive_native(draw_mode)) { - std::tie(vertex_draw_count, offset_in_index_buffer) = get_index_array_for_emulated_non_indexed_draw({ { 0, vertex_draw_count } }, draw_mode, *m_index_ring_buffer); + std::tie(vertex_draw_count, offset_in_index_buffer) = get_index_array_for_emulated_non_indexed_draw({ { 0, vertex_draw_count } }, draw_mode, m_index_ring_buffer); } } } @@ -298,7 +298,7 @@ u32 GLGSRender::set_vertex_buffer() if (draw_command == rsx::draw_command::array || draw_command == rsx::draw_command::indexed) { u32 verts_allocated = std::max(vertex_draw_count, max_index + 1); - m_attrib_ring_buffer->reserve_and_map(verts_allocated * max_vertex_attrib_size); + m_attrib_ring_buffer.reserve_and_map(verts_allocated * max_vertex_attrib_size); for (int index = 0; index < rsx::limits::vertex_count; ++index) { @@ -337,7 +337,7 @@ u32 GLGSRender::set_vertex_buffer() if (draw_command == rsx::draw_command::array) { - auto mapping = m_attrib_ring_buffer->alloc_from_reserve(data_size, m_min_texbuffer_alignment); + auto mapping = m_attrib_ring_buffer.alloc_from_reserve(data_size, m_min_texbuffer_alignment); gsl::byte *dst = static_cast(mapping.first); buffer_offset = mapping.second; @@ -354,7 +354,7 @@ u32 GLGSRender::set_vertex_buffer() if (draw_command == rsx::draw_command::indexed) { data_size = (max_index + 1) * element_size; - auto mapping = m_attrib_ring_buffer->alloc_from_reserve(data_size, m_min_texbuffer_alignment); + auto mapping = m_attrib_ring_buffer.alloc_from_reserve(data_size, m_min_texbuffer_alignment); gsl::byte *dst = static_cast(mapping.first); buffer_offset = mapping.second; @@ -364,7 +364,7 @@ u32 GLGSRender::set_vertex_buffer() write_vertex_array_data_to_buffer(dest_span, src_ptr, 0, max_index + 1, vertex_info.type, vertex_info.size, vertex_info.stride, rsx::get_vertex_type_size_on_host(vertex_info.type, vertex_info.size)); } - texture.copy_from(m_attrib_ring_buffer->get_buffer(), gl_type, buffer_offset, data_size); + texture.copy_from(m_attrib_ring_buffer, gl_type, buffer_offset, data_size); //Link texture to uniform m_program->uniforms.texture(location, index + rsx::limits::textures_count, texture); @@ -385,11 +385,11 @@ u32 GLGSRender::set_vertex_buffer() auto &texture = m_gl_attrib_buffers[index]; - auto mapping = m_attrib_ring_buffer->alloc_from_reserve(data_size, m_min_texbuffer_alignment); + auto mapping = m_attrib_ring_buffer.alloc_from_reserve(data_size, m_min_texbuffer_alignment); u8 *dst = static_cast(mapping.first); memcpy(dst, vertex_data.data(), data_size); - texture.copy_from(m_attrib_ring_buffer->get_buffer(), gl_type, mapping.second, data_size); + texture.copy_from(m_attrib_ring_buffer, gl_type, mapping.second, data_size); //Link texture to uniform m_program->uniforms.texture(location, index + rsx::limits::textures_count, texture); @@ -411,11 +411,11 @@ u32 GLGSRender::set_vertex_buffer() if (draw_command == rsx::draw_command::array && !is_primitive_native(draw_mode)) { - std::tie(vertex_draw_count, offset_in_index_buffer) = get_index_array_for_emulated_non_indexed_draw(first_count_commands, draw_mode, *m_index_ring_buffer); + std::tie(vertex_draw_count, offset_in_index_buffer) = get_index_array_for_emulated_non_indexed_draw(first_count_commands, draw_mode, m_index_ring_buffer); } } - m_attrib_ring_buffer->unmap(); + m_attrib_ring_buffer.unmap(); std::chrono::time_point now = std::chrono::system_clock::now(); m_vertex_upload_time += std::chrono::duration_cast(now - then).count(); diff --git a/rpcs3/Emu/RSX/rsx_utils.cpp b/rpcs3/Emu/RSX/rsx_utils.cpp index 25a9b5ecf2..bd52dfc60b 100644 --- a/rpcs3/Emu/RSX/rsx_utils.cpp +++ b/rpcs3/Emu/RSX/rsx_utils.cpp @@ -1,5 +1,8 @@ #include "stdafx.h" #include "rsx_utils.h" +#include "rsx_methods.h" +#include "Emu/RSX/GCM.h" +#include "Common/BufferUtils.h" extern "C" { @@ -42,4 +45,66 @@ namespace rsx dst.reset(new u8[clip_h * dst_pitch]); clip_image(dst.get(), src, clip_x, clip_y, clip_w, clip_h, bpp, src_pitch, dst_pitch); } + + void fill_scale_offset_matrix(void *dest_, bool transpose, + float offset_x, float offset_y, float offset_z, + float scale_x, float scale_y, float scale_z) + { + char *dest = (char*)dest_; + + if (transpose) + { + stream_vector(dest + 4 * sizeof(f32) * 0, scale_x, 0, 0, 0); + stream_vector(dest + 4 * sizeof(f32) * 1, 0, scale_y, 0, 0); + stream_vector(dest + 4 * sizeof(f32) * 2, 0, 0, scale_z, 0); + stream_vector(dest + 4 * sizeof(f32) * 3, offset_x, offset_y, offset_z, 1); + } + else + { + stream_vector(dest + 4 * sizeof(f32) * 0, scale_x, 0, 0, offset_x); + stream_vector(dest + 4 * sizeof(f32) * 1, 0, scale_y, 0, offset_y); + stream_vector(dest + 4 * sizeof(f32) * 2, 0, 0, scale_z, offset_z); + stream_vector(dest + 4 * sizeof(f32) * 3, 0.f, 0.f, 0.f, 1.f); + } + } + + void fill_window_matrix(void *dest, bool transpose) + { + u32 shader_window = method_registers[NV4097_SET_SHADER_WINDOW]; + + u16 height = shader_window & 0xfff; + window_origin origin = to_window_origin((shader_window >> 12) & 0xf); + window_pixel_center pixelCenter = to_window_pixel_center(shader_window >> 16); + + f32 offset_x = f32(method_registers[NV4097_SET_WINDOW_OFFSET] & 0xffff); + f32 offset_y = f32(method_registers[NV4097_SET_WINDOW_OFFSET] >> 16); + f32 scale_y = 1.0; + + if (origin == window_origin::bottom) + { + offset_y = height - offset_y + 1; + scale_y = -1.0f; + } + + if (false && pixelCenter == window_pixel_center::half) + { + offset_x += 0.5f; + offset_y += 0.5f; + } + + fill_scale_offset_matrix(dest, transpose, offset_x, offset_y, 0.0f, 1.0f, scale_y, 1.0f); + } + + void fill_viewport_matrix(void *buffer, bool transpose) + { + f32 offset_x = (f32&)method_registers[NV4097_SET_VIEWPORT_OFFSET + 0]; + f32 offset_y = (f32&)method_registers[NV4097_SET_VIEWPORT_OFFSET + 1]; + f32 offset_z = (f32&)method_registers[NV4097_SET_VIEWPORT_OFFSET + 2]; + + f32 scale_x = (f32&)method_registers[NV4097_SET_VIEWPORT_SCALE + 0]; + f32 scale_y = (f32&)method_registers[NV4097_SET_VIEWPORT_SCALE + 1]; + f32 scale_z = (f32&)method_registers[NV4097_SET_VIEWPORT_SCALE + 2]; + + fill_scale_offset_matrix(buffer, transpose, offset_x, offset_y, offset_z, scale_x, scale_y, scale_z); + } } diff --git a/rpcs3/Emu/RSX/rsx_utils.h b/rpcs3/Emu/RSX/rsx_utils.h index d02b63218c..587efc24f4 100644 --- a/rpcs3/Emu/RSX/rsx_utils.h +++ b/rpcs3/Emu/RSX/rsx_utils.h @@ -114,4 +114,10 @@ namespace rsx void clip_image(u8 *dst, const u8 *src, int clip_x, int clip_y, int clip_w, int clip_h, int bpp, int src_pitch, int dst_pitch); void clip_image(std::unique_ptr& dst, const u8 *src, int clip_x, int clip_y, int clip_w, int clip_h, int bpp, int src_pitch, int dst_pitch); + + void fill_scale_offset_matrix(void *dest_, bool transpose, + float offset_x, float offset_y, float offset_z, + float scale_x, float scale_y, float scale_z); + void fill_window_matrix(void *dest, bool transpose); + void fill_viewport_matrix(void *buffer, bool transpose); }