diff --git a/rpcs3/Emu/RSX/Common/BufferUtils.cpp b/rpcs3/Emu/RSX/Common/BufferUtils.cpp index 463959e473..df77c36aa6 100644 --- a/rpcs3/Emu/RSX/Common/BufferUtils.cpp +++ b/rpcs3/Emu/RSX/Common/BufferUtils.cpp @@ -282,4 +282,10 @@ void stream_vector(void *dst, u32 x, u32 y, u32 z, u32 w) noexcept { __m128i vector = _mm_set_epi32(w, z, y, x); _mm_stream_si128((__m128i*)dst, vector); +} + +void stream_vector_from_memory(void *dst, void *src) noexcept +{ + const __m128i &vector = _mm_loadu_si128((__m128i*)src); + _mm_stream_si128((__m128i*)dst, vector); } \ No newline at end of file diff --git a/rpcs3/Emu/RSX/Common/BufferUtils.h b/rpcs3/Emu/RSX/Common/BufferUtils.h index b98152ca27..05186ab7b2 100644 --- a/rpcs3/Emu/RSX/Common/BufferUtils.h +++ b/rpcs3/Emu/RSX/Common/BufferUtils.h @@ -48,3 +48,8 @@ void write_index_array_for_non_indexed_non_native_primitive_to_buffer(char* dst, * Stream a 128 bits vector to dst. */ void stream_vector(void *dst, u32 x, u32 y, u32 z, u32 w) noexcept; + +/** + * Stream a 128 bits vector from src to dst. + */ +void stream_vector_from_memory(void *dst, void *src) noexcept; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index 6fbda2c511..e11ebe9f0b 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -167,9 +167,6 @@ void D3D12GSRender::upload_and_bind_scale_offset_matrix(size_t descriptorIndex) void D3D12GSRender::upload_and_bind_vertex_shader_constants(size_t descriptor_index) { - for (const auto &entry : transform_constants) - local_transform_constants[entry.first] = entry.second; - size_t buffer_size = 512 * 4 * sizeof(float); assert(m_constantsData.can_alloc(buffer_size)); @@ -177,16 +174,7 @@ void D3D12GSRender::upload_and_bind_vertex_shader_constants(size_t descriptor_in void *mapped_buffer; ThrowIfFailed(m_constantsData.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), &mapped_buffer)); - for (const auto &entry : local_transform_constants) - { - float data[4] = { - entry.second.x, - entry.second.y, - entry.second.z, - entry.second.w - }; - streamToBuffer((char*)mapped_buffer + heap_offset + entry.first * 4 * sizeof(float), data, 4 * sizeof(float)); - } + fill_vertex_program_constants_data((char*)mapped_buffer + heap_offset); m_constantsData.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); D3D12_CONSTANT_BUFFER_VIEW_DESC constant_buffer_view_desc = { diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index a8bfbaf469..62d60fe461 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -672,7 +672,6 @@ void D3D12GSRender::flip(int buffer) storage.uav_heap_get_pos = m_UAVHeap.get_current_put_pos_minus_one(); // Flush - local_transform_constants.clear(); m_texturesRTTs.clear(); // Now get ready for next frame diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index a8cf6a0ec8..d316a98ff3 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -72,7 +72,6 @@ private: RSXFragmentProgram fragment_program; PipelineStateObjectCache m_cachePSO; std::tuple, size_t> *m_PSO; - std::unordered_map local_transform_constants; struct { diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 8bac746b79..906e5e09eb 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -586,6 +586,7 @@ namespace rsx std::this_thread::sleep_for(std::chrono::milliseconds((s64)(1000.0 / limit - rsx->timer_sync.GetElapsedTimeInMilliSec()))); rsx->timer_sync.Start(); + rsx->local_transform_constants.clear(); } void user_command(thread* rsx, u32 arg) @@ -1048,6 +1049,18 @@ namespace rsx stream_vector((char*)buffer + 48, 0, 0, 0, (u32&)one); } + /** + * Fill buffer with vertex program constants. + * Buffer must be at least 512 float4 wide. + */ + void thread::fill_vertex_program_constants_data(void *buffer) noexcept + { + for (const auto &entry : transform_constants) + local_transform_constants[entry.first] = entry.second; + for (const auto &entry : local_transform_constants) + stream_vector_from_memory((char*)buffer + entry.first * 4 * sizeof(float), (void*)entry.second.rgba); + } + u64 thread::timestamp() const { // Get timestamp, and convert it from microseconds to nanoseconds diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index 8feaa9bd77..4e1d8aa824 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -161,6 +161,9 @@ namespace rsx std::unordered_map> transform_constants; + // Constant stored for whole frame + std::unordered_map local_transform_constants; + u32 transform_program[512 * 4] = {}; virtual void load_vertex_data(u32 first, u32 count); @@ -224,6 +227,12 @@ namespace rsx */ void fill_scale_offset_data(void *buffer) const noexcept; + /** + * Fill buffer with vertex program constants. + * Buffer must be at least 512 float4 wide. + */ + void fill_vertex_program_constants_data(void *buffer) noexcept; + public: void reset(); void init(const u32 ioAddress, const u32 ioSize, const u32 ctrlAddress, const u32 localAddress);