From b7979d3f57bc078bb7bd27edf834ed7fab20af9c Mon Sep 17 00:00:00 2001 From: kd-11 Date: Fri, 20 Apr 2018 23:44:34 +0300 Subject: [PATCH] rsx/vk: Improvements and minor optimizations - Improve dirty state tracking affecting program state - vk: Refactor out transform constants upload into a separate channel to avoid if possible transform data uploads are quite expensive --- rpcs3/Emu/RSX/Common/ProgramStateCache.cpp | 18 ++-- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 4 +- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 13 +-- rpcs3/Emu/RSX/RSXThread.cpp | 11 ++- rpcs3/Emu/RSX/RSXThread.h | 16 +++- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 106 +++++++++++++-------- rpcs3/Emu/RSX/VK/VKGSRender.h | 13 ++- rpcs3/Emu/RSX/rsx_methods.cpp | 24 +++-- 8 files changed, 130 insertions(+), 75 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/ProgramStateCache.cpp b/rpcs3/Emu/RSX/Common/ProgramStateCache.cpp index e15a4e25f2..c1ca8bdae9 100644 --- a/rpcs3/Emu/RSX/Common/ProgramStateCache.cpp +++ b/rpcs3/Emu/RSX/Common/ProgramStateCache.cpp @@ -141,18 +141,22 @@ fragment_program_utils::fragment_program_metadata fragment_program_utils::analys if (program_offset < 0) program_offset = instIndex * 16; - if (opcode == RSX_FP_OPCODE_TEX || - opcode == RSX_FP_OPCODE_TEXBEM || - opcode == RSX_FP_OPCODE_TXP || - opcode == RSX_FP_OPCODE_TXPBEM || - opcode == RSX_FP_OPCODE_TXD || - opcode == RSX_FP_OPCODE_TXB || - opcode == RSX_FP_OPCODE_TXL) + switch(opcode) + { + case RSX_FP_OPCODE_TEX: + case RSX_FP_OPCODE_TEXBEM: + case RSX_FP_OPCODE_TXP: + case RSX_FP_OPCODE_TXPBEM: + case RSX_FP_OPCODE_TXD: + case RSX_FP_OPCODE_TXB: + case RSX_FP_OPCODE_TXL: { //Bits 17-20 of word 1, swapped within u16 sections //Bits 16-23 are swapped into the upper 8 bits (24-31) const u32 tex_num = (inst.word[0] >> 25) & 15; textures_mask |= (1 << tex_num); + break; + } } if (is_constant(inst.word[1]) || is_constant(inst.word[2]) || is_constant(inst.word[3])) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index fd2f553597..adb6af3593 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -389,17 +389,17 @@ void D3D12GSRender::end() .Offset((INT)currentDescriptorIndex + vertex_buffer_count, m_descriptor_stride_srv_cbv_uav) ); - if (m_transform_constants_dirty && !g_cfg.video.debug_output) + if (!g_cfg.video.debug_output && (m_graphics_state & rsx::pipeline_state::transform_constants_dirty)) { m_current_transform_constants_buffer_descriptor_id = (u32)currentDescriptorIndex + 1 + vertex_buffer_count; upload_and_bind_vertex_shader_constants(currentDescriptorIndex + 1 + vertex_buffer_count); - m_transform_constants_dirty = false; get_current_resource_storage().command_list->SetGraphicsRootDescriptorTable(VERTEX_CONSTANT_BUFFERS_SLOT, CD3DX12_GPU_DESCRIPTOR_HANDLE(get_current_resource_storage().descriptors_heap->GetGPUDescriptorHandleForHeapStart()) .Offset(m_current_transform_constants_buffer_descriptor_id, m_descriptor_stride_srv_cbv_uav) ); } + m_graphics_state = 0; std::chrono::time_point constants_duration_end = steady_clock::now(); m_timers.constants_duration += std::chrono::duration_cast(constants_duration_end - constants_duration_start).count(); diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 1a925eddf6..e317030739 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -1049,7 +1049,7 @@ bool GLGSRender::check_program_state() void GLGSRender::load_program(const gl::vertex_upload_info& upload_info) { - if (m_fragment_program_dirty || m_vertex_program_dirty) + if (m_graphics_state & rsx::pipeline_state::invalidate_pipeline_bits) { get_current_fragment_program(fs_sampler_state); verify(HERE), current_fragment_program.valid; @@ -1091,12 +1091,13 @@ void GLGSRender::load_program(const gl::vertex_upload_info& upload_info) const u32 fragment_constants_size = (const u32)m_prog_buffer.get_fragment_constants_buffer_size(current_fragment_program); const u32 fragment_buffer_size = fragment_constants_size + (18 * 4 * sizeof(float)); + const bool update_transform_constants = !!(m_graphics_state & rsx::pipeline_state::transform_constants_dirty); if (manually_flush_ring_buffers) { m_vertex_state_buffer->reserve_storage_on_heap(512); m_fragment_constants_buffer->reserve_storage_on_heap(align(fragment_buffer_size, 256)); - if (m_transform_constants_dirty) m_transform_constants_buffer->reserve_storage_on_heap(8192); + if (update_transform_constants) m_transform_constants_buffer->reserve_storage_on_heap(8192); } // Vertex state @@ -1112,7 +1113,7 @@ void GLGSRender::load_program(const gl::vertex_upload_info& upload_info) *(reinterpret_cast(buf + 144)) = rsx::method_registers.clip_max(); fill_vertex_layout_state(m_vertex_layout, upload_info.allocated_vertex_count, reinterpret_cast(buf + 160), upload_info.persistent_mapping_offset, upload_info.volatile_mapping_offset); - if (m_transform_constants_dirty) + if (update_transform_constants) { // Vertex constants mapping = m_transform_constants_buffer->alloc_from_heap(8192, m_uniform_buffer_offset_align); @@ -1137,17 +1138,17 @@ void GLGSRender::load_program(const gl::vertex_upload_info& upload_info) m_vertex_state_buffer->bind_range(0, vertex_state_offset, 512); m_fragment_constants_buffer->bind_range(2, fragment_constants_offset, fragment_buffer_size); - if (m_transform_constants_dirty) m_transform_constants_buffer->bind_range(1, vertex_constants_offset, 8192); + if (update_transform_constants) m_transform_constants_buffer->bind_range(1, vertex_constants_offset, 8192); if (manually_flush_ring_buffers) { m_vertex_state_buffer->unmap(); m_fragment_constants_buffer->unmap(); - if (m_transform_constants_dirty) m_transform_constants_buffer->unmap(); + if (update_transform_constants) m_transform_constants_buffer->unmap(); } - m_transform_constants_dirty = false; + m_graphics_state = 0; } void GLGSRender::update_draw_state() diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index fba3dd5fa3..4c0a4e9b54 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -246,7 +246,8 @@ namespace rsx m_rtts_dirty = true; memset(m_textures_dirty, -1, sizeof(m_textures_dirty)); memset(m_vertex_textures_dirty, -1, sizeof(m_vertex_textures_dirty)); - m_transform_constants_dirty = true; + + m_graphics_state = pipeline_state::all_dirty; } thread::~thread() @@ -1329,10 +1330,10 @@ namespace rsx void thread::get_current_vertex_program() { - if (!m_vertex_program_dirty) + if (!(m_graphics_state & rsx::pipeline_state::vertex_program_dirty)) return; - m_vertex_program_dirty = false; + m_graphics_state &= ~(rsx::pipeline_state::vertex_program_dirty); const u32 transform_program_start = rsx::method_registers.transform_program_start(); current_vertex_program.output_mask = rsx::method_registers.vertex_attrib_output_mask(); current_vertex_program.skip_vertex_input_check = false; @@ -1544,10 +1545,10 @@ namespace rsx void thread::get_current_fragment_program(const std::array, rsx::limits::fragment_textures_count>& sampler_descriptors) { - if (!m_fragment_program_dirty) + if (!(m_graphics_state & rsx::pipeline_state::fragment_program_dirty)) return; - m_fragment_program_dirty = false; + m_graphics_state &= ~(rsx::pipeline_state::fragment_program_dirty); auto &result = current_fragment_program = {}; const u32 shader_program = rsx::method_registers.shader_program_address(); diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index f2030c7a8d..b4c05bb09d 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -69,6 +69,18 @@ namespace rsx context_clear_all = context_clear_color | context_clear_depth }; + enum pipeline_state : u8 + { + fragment_program_dirty = 1, + vertex_program_dirty = 2, + fragment_state_dirty = 4, + vertex_state_dirty = 8, + transform_constants_dirty = 16, + + invalidate_pipeline_bits = fragment_program_dirty | vertex_program_dirty, + all_dirty = 255 + }; + u32 get_vertex_type_size_on_host(vertex_base_type type, u32 size); u32 get_address(u32 offset, u32 location); @@ -327,12 +339,10 @@ namespace rsx u32 local_mem_addr, main_mem_addr; bool m_rtts_dirty; - bool m_transform_constants_dirty; bool m_textures_dirty[16]; bool m_vertex_textures_dirty[4]; bool m_framebuffer_state_contested = false; - bool m_fragment_program_dirty = false; - bool m_vertex_program_dirty = false; + u32 m_graphics_state = 0; protected: std::array get_color_surface_addresses() const; diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index b6b9a9bed0..6ffad39d19 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -593,6 +593,8 @@ VKGSRender::VKGSRender() : GSRender() m_attrib_ring_info.heap.reset(new vk::buffer(*m_device, VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000, memory_map.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 0)); m_uniform_buffer_ring_info.init(VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "uniform buffer"); m_uniform_buffer_ring_info.heap.reset(new vk::buffer(*m_device, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, memory_map.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, 0)); + m_transform_constants_ring_info.init(VK_TRANSFORM_CONSTANTS_BUFFER_SIZE_M * 0x100000, "transform constants buffer"); + m_transform_constants_ring_info.heap.reset(new vk::buffer(*m_device, VK_TRANSFORM_CONSTANTS_BUFFER_SIZE_M * 0x100000, memory_map.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, 0)); m_index_buffer_ring_info.init(VK_INDEX_RING_BUFFER_SIZE_M * 0x100000, "index buffer"); m_index_buffer_ring_info.heap.reset(new vk::buffer(*m_device, VK_INDEX_RING_BUFFER_SIZE_M * 0x100000, memory_map.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_INDEX_BUFFER_BIT, 0)); m_texture_upload_buffer_ring_info.init(VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000, "texture upload buffer", 32 * 0x100000); @@ -688,6 +690,7 @@ VKGSRender::~VKGSRender() //Heaps m_index_buffer_ring_info.heap.reset(); m_uniform_buffer_ring_info.heap.reset(); + m_transform_constants_ring_info.heap.reset(); m_attrib_ring_info.heap.reset(); m_texture_upload_buffer_ring_info.heap.reset(); @@ -893,6 +896,7 @@ void VKGSRender::check_heap_status() if (m_attrib_ring_info.is_critical() || m_texture_upload_buffer_ring_info.is_critical() || m_uniform_buffer_ring_info.is_critical() || + m_transform_constants_ring_info.is_critical() || m_index_buffer_ring_info.is_critical()) { std::chrono::time_point submit_start = steady_clock::now(); @@ -917,6 +921,7 @@ void VKGSRender::check_heap_status() m_index_buffer_ring_info.reset_allocation_stats(); m_uniform_buffer_ring_info.reset_allocation_stats(); + m_transform_constants_ring_info.reset_allocation_stats(); m_attrib_ring_info.reset_allocation_stats(); m_texture_upload_buffer_ring_info.reset_allocation_stats(); m_current_frame->reset_heap_ptrs(); @@ -1938,6 +1943,7 @@ void VKGSRender::advance_queued_frames() m_vertex_cache->purge(); m_current_frame->tag_frame_end(m_attrib_ring_info.get_current_put_pos_minus_one(), m_uniform_buffer_ring_info.get_current_put_pos_minus_one(), + m_transform_constants_ring_info.get_current_put_pos_minus_one(), m_index_buffer_ring_info.get_current_put_pos_minus_one(), m_texture_upload_buffer_ring_info.get_current_put_pos_minus_one()); @@ -2045,11 +2051,13 @@ void VKGSRender::process_swap_request(frame_context_t *ctx, bool free_resources) //Heap cleanup; deallocates memory consumed by the frame if it is still held m_attrib_ring_info.m_get_pos = ctx->attrib_heap_ptr; m_uniform_buffer_ring_info.m_get_pos = ctx->ubo_heap_ptr; + m_transform_constants_ring_info.m_get_pos = ctx->vtxconst_heap_ptr; m_index_buffer_ring_info.m_get_pos = ctx->index_heap_ptr; m_texture_upload_buffer_ring_info.m_get_pos = ctx->texture_upload_heap_ptr; m_attrib_ring_info.notify(); m_uniform_buffer_ring_info.notify(); + m_transform_constants_ring_info.notify(); m_index_buffer_ring_info.notify(); m_texture_upload_buffer_ring_info.notify(); } @@ -2209,7 +2217,7 @@ bool VKGSRender::check_program_status() void VKGSRender::load_program(const vk::vertex_upload_info& vertex_info) { - if (m_fragment_program_dirty || m_vertex_program_dirty) + if (m_graphics_state & rsx::pipeline_state::invalidate_pipeline_bits) { get_current_fragment_program(fs_sampler_state); verify(HERE), current_fragment_program.valid; @@ -2219,6 +2227,7 @@ void VKGSRender::load_program(const vk::vertex_upload_info& vertex_info) auto &vertex_program = current_vertex_program; auto &fragment_program = current_fragment_program; + auto old_program = m_program; vk::pipeline_props properties = {}; @@ -2372,49 +2381,66 @@ void VKGSRender::load_program(const vk::vertex_upload_info& vertex_info) vk::leave_uninterruptible(); - const size_t fragment_constants_sz = m_prog_buffer->get_fragment_constants_buffer_size(fragment_program); - const size_t fragment_buffer_sz = fragment_constants_sz + (18 * 4 * sizeof(float)); - const size_t required_mem = 512 + 8192 + fragment_buffer_sz; - - const size_t vertex_state_offset = m_uniform_buffer_ring_info.alloc<256>(required_mem); - const size_t vertex_constants_offset = vertex_state_offset + 512; - const size_t fragment_constants_offset = vertex_constants_offset + 8192; - - //We do this in one go - u8 *buf = (u8*)m_uniform_buffer_ring_info.map(vertex_state_offset, required_mem); - - //Vertex state - fill_scale_offset_data(buf, false); - fill_user_clip_data(buf + 64); - *(reinterpret_cast(buf + 128)) = rsx::method_registers.transform_branch_bits(); - *(reinterpret_cast(buf + 132)) = vertex_info.vertex_index_base; - *(reinterpret_cast(buf + 136)) = rsx::method_registers.point_size(); - *(reinterpret_cast(buf + 140)) = rsx::method_registers.clip_min(); - *(reinterpret_cast(buf + 144)) = rsx::method_registers.clip_max(); - - fill_vertex_layout_state(m_vertex_layout, vertex_info.allocated_vertex_count, reinterpret_cast(buf + 160), - vertex_info.persistent_window_offset, vertex_info.volatile_window_offset); - - //Vertex constants - buf = buf + 512; - fill_vertex_program_constants_data(buf); - m_transform_constants_dirty = false; - - //Fragment constants - buf = buf + 8192; - if (fragment_constants_sz) + if (1)//m_graphics_state & (rsx::pipeline_state::fragment_state_dirty | rsx::pipeline_state::vertex_state_dirty)) { - m_prog_buffer->fill_fragment_constants_buffer({ reinterpret_cast(buf), ::narrow(fragment_constants_sz) }, - fragment_program, vk::sanitize_fp_values()); + const size_t fragment_constants_sz = m_prog_buffer->get_fragment_constants_buffer_size(fragment_program); + const size_t fragment_buffer_sz = fragment_constants_sz + (18 * 4 * sizeof(float)); + const size_t required_mem = 512 + fragment_buffer_sz; + + const size_t vertex_state_offset = m_uniform_buffer_ring_info.alloc<256>(required_mem); + const size_t fragment_constants_offset = vertex_state_offset + 512; + + //We do this in one go + u8 *buf = (u8*)m_uniform_buffer_ring_info.map(vertex_state_offset, required_mem); + + //Vertex state + fill_scale_offset_data(buf, false); + fill_user_clip_data(buf + 64); + *(reinterpret_cast(buf + 128)) = rsx::method_registers.transform_branch_bits(); + *(reinterpret_cast(buf + 132)) = vertex_info.vertex_index_base; + *(reinterpret_cast(buf + 136)) = rsx::method_registers.point_size(); + *(reinterpret_cast(buf + 140)) = rsx::method_registers.clip_min(); + *(reinterpret_cast(buf + 144)) = rsx::method_registers.clip_max(); + + fill_vertex_layout_state(m_vertex_layout, vertex_info.allocated_vertex_count, reinterpret_cast(buf + 160), + vertex_info.persistent_window_offset, vertex_info.volatile_window_offset); + + //Fragment constants + buf = buf + 512; + if (fragment_constants_sz) + { + m_prog_buffer->fill_fragment_constants_buffer({ reinterpret_cast(buf), ::narrow(fragment_constants_sz) }, + fragment_program, vk::sanitize_fp_values()); + } + + fill_fragment_state_buffer(buf + fragment_constants_sz, fragment_program); + + m_uniform_buffer_ring_info.unmap(); + + m_vertex_state_buffer_info = { m_uniform_buffer_ring_info.heap->value, vertex_state_offset, 512 }; + m_fragment_state_buffer_info = { m_uniform_buffer_ring_info.heap->value, fragment_constants_offset, fragment_buffer_sz }; } - fill_fragment_state_buffer(buf + fragment_constants_sz, fragment_program); - - m_uniform_buffer_ring_info.unmap(); + if (m_graphics_state & rsx::pipeline_state::transform_constants_dirty) + { + //Vertex constants + const size_t vertex_constants_offset = m_transform_constants_ring_info.alloc<256>(8192); + auto buf = m_transform_constants_ring_info.map(vertex_constants_offset, 8192); - m_program->bind_uniform({ m_uniform_buffer_ring_info.heap->value, vertex_state_offset, 512 }, SCALE_OFFSET_BIND_SLOT, m_current_frame->descriptor_set); - m_program->bind_uniform({ m_uniform_buffer_ring_info.heap->value, vertex_constants_offset, 8192 }, VERTEX_CONSTANT_BUFFERS_BIND_SLOT, m_current_frame->descriptor_set); - m_program->bind_uniform({ m_uniform_buffer_ring_info.heap->value, fragment_constants_offset, fragment_buffer_sz }, FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT, m_current_frame->descriptor_set); + fill_vertex_program_constants_data(buf); + m_transform_constants_ring_info.unmap(); + m_vertex_constants_buffer_info = { m_transform_constants_ring_info.heap->value, vertex_constants_offset, 8192 }; + } + + if (1)//m_graphics_state || old_program != m_program) + { + m_program->bind_uniform(m_vertex_state_buffer_info, SCALE_OFFSET_BIND_SLOT, m_current_frame->descriptor_set); + m_program->bind_uniform(m_vertex_constants_buffer_info, VERTEX_CONSTANT_BUFFERS_BIND_SLOT, m_current_frame->descriptor_set); + m_program->bind_uniform(m_fragment_state_buffer_info, FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT, m_current_frame->descriptor_set); + } + + //Clear flags + m_graphics_state = 0; } static const u32 mr_color_offset[rsx::limits::color_buffers_count] = diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index 6f6dfee12a..e0366f487b 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -40,7 +40,8 @@ namespace vk //NOTE: Texture uploads can be huge, upto 16MB for a single texture (4096x4096px) #define VK_ATTRIB_RING_BUFFER_SIZE_M 384 #define VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M 256 -#define VK_UBO_RING_BUFFER_SIZE_M 128 +#define VK_UBO_RING_BUFFER_SIZE_M 64 +#define VK_TRANSFORM_CONSTANTS_BUFFER_SIZE_M 64 #define VK_INDEX_RING_BUFFER_SIZE_M 64 #define VK_MAX_ASYNC_CB_COUNT 64 @@ -152,6 +153,7 @@ struct frame_context_t //Heap pointers s64 attrib_heap_ptr = 0; s64 ubo_heap_ptr = 0; + s64 vtxconst_heap_ptr = 0; s64 index_heap_ptr = 0; s64 texture_upload_heap_ptr = 0; @@ -167,6 +169,7 @@ struct frame_context_t attrib_heap_ptr = other.attrib_heap_ptr; ubo_heap_ptr = other.attrib_heap_ptr; + vtxconst_heap_ptr = other.vtxconst_heap_ptr; index_heap_ptr = other.attrib_heap_ptr; texture_upload_heap_ptr = other.texture_upload_heap_ptr; } @@ -178,10 +181,11 @@ struct frame_context_t std::swap(samplers_to_clean, other.samplers_to_clean); } - void tag_frame_end(s64 attrib_loc, s64 ubo_loc, s64 index_loc, s64 texture_loc) + void tag_frame_end(s64 attrib_loc, s64 ubo_loc, s64 vtxconst_loc, s64 index_loc, s64 texture_loc) { attrib_heap_ptr = attrib_loc; ubo_heap_ptr = ubo_loc; + vtxconst_heap_ptr = vtxconst_loc; index_heap_ptr = index_loc; texture_upload_heap_ptr = texture_loc; @@ -314,9 +318,14 @@ private: u64 m_last_heap_sync_time = 0; vk::vk_data_heap m_attrib_ring_info; vk::vk_data_heap m_uniform_buffer_ring_info; + vk::vk_data_heap m_transform_constants_ring_info; vk::vk_data_heap m_index_buffer_ring_info; vk::vk_data_heap m_texture_upload_buffer_ring_info; + VkDescriptorBufferInfo m_vertex_state_buffer_info; + VkDescriptorBufferInfo m_vertex_constants_buffer_info; + VkDescriptorBufferInfo m_fragment_state_buffer_info; + std::array frame_context_storage; //Temp frame context to use if the real frame queue is overburdened. Only used for storage frame_context_t m_aux_frame_context; diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index c097e3043f..25672b950c 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -342,12 +342,17 @@ namespace rsx u32 load = rsx::method_registers.transform_constant_load(); if ((load + index) >= 512) { - LOG_ERROR(RSX, "Invalid register index (load=%d, index=%d)", load, index); + LOG_ERROR(RSX, "Invalid transform register index (load=%d, index=%d)", load, index); return; } - rsx::method_registers.transform_constants[load + reg][subreg] = arg; - rsxthr->m_transform_constants_dirty = true; + auto &value = rsx::method_registers.transform_constants[load + reg][subreg]; + if (value != arg) + { + //Transform constants invalidation is expensive (~8k bytes per update) + value = arg; + rsxthr->m_graphics_state |= rsx::pipeline_state::transform_constants_dirty; + } } }; @@ -357,19 +362,18 @@ namespace rsx static void impl(thread* rsx, u32 _reg, u32 arg) { method_registers.commit_4_transform_program_instructions(index); - rsx->m_vertex_program_dirty = true; + rsx->m_graphics_state |= rsx::pipeline_state::vertex_program_dirty; } }; void set_transform_program_start(thread* rsx, u32, u32) { - rsx->m_vertex_program_dirty = true; + rsx->m_graphics_state |= rsx::pipeline_state::vertex_program_dirty; } void set_vertex_attribute_output_mask(thread* rsx, u32, u32) { - rsx->m_vertex_program_dirty = true; - rsx->m_fragment_program_dirty = true; + rsx->m_graphics_state |= rsx::pipeline_state::vertex_program_dirty | rsx::pipeline_state::fragment_program_dirty; } void set_begin_end(thread* rsxthr, u32 _reg, u32 arg) @@ -535,7 +539,7 @@ namespace rsx void invalidate_L2(thread* rsx, u32, u32) { - rsx->m_fragment_program_dirty = true; + rsx->m_graphics_state |= rsx::pipeline_state::fragment_program_dirty; } void set_surface_dirty_bit(thread* rsx, u32, u32) @@ -556,7 +560,7 @@ namespace rsx static void impl(thread* rsx, u32 _reg, u32 arg) { rsx->m_textures_dirty[index] = true; - rsx->m_fragment_program_dirty = true; + rsx->m_graphics_state |= rsx::pipeline_state::fragment_program_dirty; } }; @@ -584,7 +588,7 @@ namespace rsx u32 address = get_address(method_registers.blit_engine_output_offset_nv3062() + pixel_offset + index * 4, method_registers.blit_engine_output_location_nv3062()); vm::write32(address, arg); - rsx->m_fragment_program_dirty = true; + rsx->m_graphics_state |= rsx::pipeline_state::fragment_program_dirty; } }; }