diff --git a/rpcs3/Emu/CPU/Backends/AArch64JIT.cpp b/rpcs3/Emu/CPU/Backends/AArch64JIT.cpp index e185a4f36b..ab22a1fa32 100644 --- a/rpcs3/Emu/CPU/Backends/AArch64JIT.cpp +++ b/rpcs3/Emu/CPU/Backends/AArch64JIT.cpp @@ -47,7 +47,7 @@ namespace aarch64 continue; } - if (auto ri = llvm::dyn_cast(&*bit)) + if (llvm::dyn_cast(&*bit)) { if (auto ci = llvm::dyn_cast(&*prev)) { diff --git a/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp b/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp index e74794c0e0..a6371f27a9 100644 --- a/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp @@ -141,7 +141,6 @@ DECLARE(spu_runtime::tr_interpreter) = [] ghc_cpp_trampoline(reinterpret_cast(&spu_recompiler_base::old_interpreter), c, args); }); return trptr; - return trptr; #endif }(); diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 58367ed4c3..274365f4cb 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -8,8 +8,6 @@ #include "Emu/RSX/rsx_methods.h" #include "Emu/RSX/NV47/HW/context_accessors.define.h" -#include "../Program/program_state_cache2.hpp" - [[noreturn]] extern void report_fatal_error(std::string_view _text, bool is_html = false, bool include_help_text = true); namespace diff --git a/rpcs3/Emu/RSX/Program/ProgramStateCache.cpp b/rpcs3/Emu/RSX/Program/ProgramStateCache.cpp index 350627bdc0..5dd0953278 100644 --- a/rpcs3/Emu/RSX/Program/ProgramStateCache.cpp +++ b/rpcs3/Emu/RSX/Program/ProgramStateCache.cpp @@ -4,6 +4,25 @@ #include #include "util/v128.hpp" +#include "util/asm.hpp" + + +#if defined(ARCH_X64) +#include "emmintrin.h" +#include "immintrin.h" +#endif + +#ifdef ARCH_ARM64 +#ifndef _MSC_VER +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wstrict-aliasing" +#pragma GCC diagnostic ignored "-Wold-style-cast" +#endif +#include "Emu/CPU/sse2neon.h" +#ifndef _MSC_VER +#pragma GCC diagnostic pop +#endif +#endif using namespace program_hash_util; @@ -561,3 +580,71 @@ bool fragment_program_compare::operator()(const RSXFragmentProgram& binary1, con return true; } } + +namespace rsx +{ +#if defined(ARCH_X64) || defined(ARCH_ARM64) + static void write_fragment_constants_to_buffer_sse2(const std::span& buffer, const RSXFragmentProgram& rsx_prog, const std::vector& offsets_cache, bool sanitize) + { + f32* dst = buffer.data(); + for (usz offset_in_fragment_program : offsets_cache) + { + char* data = static_cast(rsx_prog.get_data()) + offset_in_fragment_program; + + const __m128i vector = _mm_loadu_si128(reinterpret_cast<__m128i*>(data)); + const __m128i shuffled_vector = _mm_or_si128(_mm_slli_epi16(vector, 8), _mm_srli_epi16(vector, 8)); + + if (sanitize) + { + //Convert NaNs and Infs to 0 + const auto masked = _mm_and_si128(shuffled_vector, _mm_set1_epi32(0x7fffffff)); + const auto valid = _mm_cmplt_epi32(masked, _mm_set1_epi32(0x7f800000)); + const auto result = _mm_and_si128(shuffled_vector, valid); + _mm_stream_si128(utils::bless<__m128i>(dst), result); + } + else + { + _mm_stream_si128(utils::bless<__m128i>(dst), shuffled_vector); + } + + dst += 4; + } + } +#endif + + static void write_fragment_constants_to_buffer_fallback(const std::span& buffer, const RSXFragmentProgram& rsx_prog, const std::vector& offsets_cache, bool sanitize) + { + f32* dst = buffer.data(); + + for (usz offset_in_fragment_program : offsets_cache) + { + char* data = static_cast(rsx_prog.get_data()) + offset_in_fragment_program; + + for (u32 i = 0; i < 4; i++) + { + const u32 value = reinterpret_cast(data)[i]; + const u32 shuffled = ((value >> 8) & 0xff00ff) | ((value << 8) & 0xff00ff00); + + if (sanitize && (shuffled & 0x7fffffff) >= 0x7f800000) + { + dst[i] = 0.f; + } + else + { + dst[i] = std::bit_cast(shuffled); + } + } + + dst += 4; + } + } + + void write_fragment_constants_to_buffer(const std::span& buffer, const RSXFragmentProgram& rsx_prog, const std::vector& offsets_cache, bool sanitize) + { +#if defined(ARCH_X64) || defined(ARCH_ARM64) + write_fragment_constants_to_buffer_sse2(buffer, rsx_prog, offsets_cache, sanitize); +#else + write_fragment_constants_to_buffer_fallback(buffer, rsx_prog, offsets_cache, sanitize); +#endif + } +} diff --git a/rpcs3/Emu/RSX/Program/ProgramStateCache.h b/rpcs3/Emu/RSX/Program/ProgramStateCache.h index 6f13ab20db..f80f9eeeec 100644 --- a/rpcs3/Emu/RSX/Program/ProgramStateCache.h +++ b/rpcs3/Emu/RSX/Program/ProgramStateCache.h @@ -81,6 +81,11 @@ namespace program_hash_util }; } +namespace rsx +{ + void write_fragment_constants_to_buffer(const std::span& buffer, const RSXFragmentProgram& rsx_prog, const std::vector& offsets_cache, bool sanitize = true); +} + /** * Cache for program help structure (blob, string...) @@ -275,32 +280,6 @@ public: } }; - struct - { - std::unordered_map db; - - void add(program_buffer_patch_entry& e) - { - db[e.fp_key] = e; - } - - void add(f32& key, f32& value) - { - db[key] = { key, value }; - } - - void clear() - { - db.clear(); - } - - bool is_empty() const - { - return db.empty(); - } - } - patch_table; - public: program_state_cache() = default; ~program_state_cache() @@ -404,7 +383,11 @@ public: return { result, &vertex_program, &fragment_program }; } - void fill_fragment_constants_buffer(std::span dst_buffer, const fragment_program_type& fragment_program, const RSXFragmentProgram& rsx_prog, bool sanitize = false) const; + void fill_fragment_constants_buffer(std::span dst_buffer, const fragment_program_type& fragment_program, const RSXFragmentProgram& rsx_prog, bool sanitize = false) const + { + ensure((dst_buffer.size_bytes() >= ::narrow(fragment_program.FragmentConstantOffsetCache.size()) * 16u)); + rsx::write_fragment_constants_to_buffer(dst_buffer, rsx_prog, fragment_program.FragmentConstantOffsetCache, sanitize); + } void clear() { diff --git a/rpcs3/Emu/RSX/Program/program_state_cache2.hpp b/rpcs3/Emu/RSX/Program/program_state_cache2.hpp deleted file mode 100644 index 4d30cfa897..0000000000 --- a/rpcs3/Emu/RSX/Program/program_state_cache2.hpp +++ /dev/null @@ -1,87 +0,0 @@ -#pragma once - -#include "ProgramStateCache.h" - -#include "util/asm.hpp" - -#if defined(ARCH_X64) -#include "emmintrin.h" -#endif - -template -void program_state_cache::fill_fragment_constants_buffer(std::span dst_buffer, const typename Traits::fragment_program_type& fragment_program, const RSXFragmentProgram& rsx_prog, bool sanitize) const -{ - ensure((dst_buffer.size_bytes() >= ::narrow(fragment_program.FragmentConstantOffsetCache.size()) * 16u)); - - f32* dst = dst_buffer.data(); - alignas(16) f32 tmp[4]; - for (usz offset_in_fragment_program : fragment_program.FragmentConstantOffsetCache) - { - char* data = static_cast(rsx_prog.get_data()) + offset_in_fragment_program; - -#if defined(ARCH_X64) - const __m128i vector = _mm_loadu_si128(reinterpret_cast<__m128i*>(data)); - const __m128i shuffled_vector = _mm_or_si128(_mm_slli_epi16(vector, 8), _mm_srli_epi16(vector, 8)); -#else - for (u32 i = 0; i < 4; i++) - { - const u32 value = reinterpret_cast(data)[i]; - tmp[i] = std::bit_cast(((value >> 8) & 0xff00ff) | ((value << 8) & 0xff00ff00)); - } -#endif - - if (!patch_table.is_empty()) - { -#if defined(ARCH_X64) - _mm_store_ps(tmp, _mm_castsi128_ps(shuffled_vector)); -#endif - - for (int i = 0; i < 4; ++i) - { - bool patched = false; - for (auto& e : patch_table.db) - { - //TODO: Use fp comparison with fabsf without hurting performance - patched = e.second.test_and_set(tmp[i], &dst[i]); - if (patched) - { - break; - } - } - - if (!patched) - { - dst[i] = tmp[i]; - } - } - } - else if (sanitize) - { -#if defined(ARCH_X64) - //Convert NaNs and Infs to 0 - const auto masked = _mm_and_si128(shuffled_vector, _mm_set1_epi32(0x7fffffff)); - const auto valid = _mm_cmplt_epi32(masked, _mm_set1_epi32(0x7f800000)); - const auto result = _mm_and_si128(shuffled_vector, valid); - _mm_stream_si128(utils::bless<__m128i>(dst), result); -#else - for (u32 i = 0; i < 4; i++) - { - const u32 value = std::bit_cast(tmp[i]); - tmp[i] = (value & 0x7fffffff) < 0x7f800000 ? value : 0; - } - - std::memcpy(dst, tmp, 16); -#endif - } - else - { -#if defined(ARCH_X64) - _mm_stream_si128(utils::bless<__m128i>(dst), shuffled_vector); -#else - std::memcpy(dst, tmp, 16); -#endif - } - - dst += 4; - } -} diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 8c765abd5c..fb5c245b87 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -18,7 +18,6 @@ #include "Emu/RSX/NV47/HW/context_accessors.define.h" #include "Emu/Memory/vm_locking.h" -#include "../Program/program_state_cache2.hpp" #include "../Program/SPIRVCommon.h" #include "util/asm.hpp" diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index 4e47f2aa88..14b28d0838 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -915,7 +915,6 @@ - diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index 898d086fb6..860a8d83ed 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -2230,9 +2230,6 @@ Emu\GPU\RSX\Program - - Emu\GPU\RSX\Program - Emu\GPU\RSX\Program