mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-01-29 00:33:01 +00:00
rsx: Fix fragment constants decoding for non-x86 platforms
This commit is contained in:
parent
03a612487d
commit
3ef8046f5c
@ -47,7 +47,7 @@ namespace aarch64
|
||||
continue;
|
||||
}
|
||||
|
||||
if (auto ri = llvm::dyn_cast<llvm::ReturnInst>(&*bit))
|
||||
if (llvm::dyn_cast<llvm::ReturnInst>(&*bit))
|
||||
{
|
||||
if (auto ci = llvm::dyn_cast<llvm::CallInst>(&*prev))
|
||||
{
|
||||
|
@ -141,7 +141,6 @@ DECLARE(spu_runtime::tr_interpreter) = []
|
||||
ghc_cpp_trampoline(reinterpret_cast<u64>(&spu_recompiler_base::old_interpreter), c, args);
|
||||
});
|
||||
return trptr;
|
||||
return trptr;
|
||||
#endif
|
||||
}();
|
||||
|
||||
|
@ -8,8 +8,6 @@
|
||||
#include "Emu/RSX/rsx_methods.h"
|
||||
#include "Emu/RSX/NV47/HW/context_accessors.define.h"
|
||||
|
||||
#include "../Program/program_state_cache2.hpp"
|
||||
|
||||
[[noreturn]] extern void report_fatal_error(std::string_view _text, bool is_html = false, bool include_help_text = true);
|
||||
|
||||
namespace
|
||||
|
@ -4,6 +4,25 @@
|
||||
|
||||
#include <stack>
|
||||
#include "util/v128.hpp"
|
||||
#include "util/asm.hpp"
|
||||
|
||||
|
||||
#if defined(ARCH_X64)
|
||||
#include "emmintrin.h"
|
||||
#include "immintrin.h"
|
||||
#endif
|
||||
|
||||
#ifdef ARCH_ARM64
|
||||
#ifndef _MSC_VER
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wstrict-aliasing"
|
||||
#pragma GCC diagnostic ignored "-Wold-style-cast"
|
||||
#endif
|
||||
#include "Emu/CPU/sse2neon.h"
|
||||
#ifndef _MSC_VER
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
#endif
|
||||
|
||||
using namespace program_hash_util;
|
||||
|
||||
@ -561,3 +580,71 @@ bool fragment_program_compare::operator()(const RSXFragmentProgram& binary1, con
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
namespace rsx
|
||||
{
|
||||
#if defined(ARCH_X64) || defined(ARCH_ARM64)
|
||||
static void write_fragment_constants_to_buffer_sse2(const std::span<f32>& buffer, const RSXFragmentProgram& rsx_prog, const std::vector<usz>& offsets_cache, bool sanitize)
|
||||
{
|
||||
f32* dst = buffer.data();
|
||||
for (usz offset_in_fragment_program : offsets_cache)
|
||||
{
|
||||
char* data = static_cast<char*>(rsx_prog.get_data()) + offset_in_fragment_program;
|
||||
|
||||
const __m128i vector = _mm_loadu_si128(reinterpret_cast<__m128i*>(data));
|
||||
const __m128i shuffled_vector = _mm_or_si128(_mm_slli_epi16(vector, 8), _mm_srli_epi16(vector, 8));
|
||||
|
||||
if (sanitize)
|
||||
{
|
||||
//Convert NaNs and Infs to 0
|
||||
const auto masked = _mm_and_si128(shuffled_vector, _mm_set1_epi32(0x7fffffff));
|
||||
const auto valid = _mm_cmplt_epi32(masked, _mm_set1_epi32(0x7f800000));
|
||||
const auto result = _mm_and_si128(shuffled_vector, valid);
|
||||
_mm_stream_si128(utils::bless<__m128i>(dst), result);
|
||||
}
|
||||
else
|
||||
{
|
||||
_mm_stream_si128(utils::bless<__m128i>(dst), shuffled_vector);
|
||||
}
|
||||
|
||||
dst += 4;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static void write_fragment_constants_to_buffer_fallback(const std::span<f32>& buffer, const RSXFragmentProgram& rsx_prog, const std::vector<usz>& offsets_cache, bool sanitize)
|
||||
{
|
||||
f32* dst = buffer.data();
|
||||
|
||||
for (usz offset_in_fragment_program : offsets_cache)
|
||||
{
|
||||
char* data = static_cast<char*>(rsx_prog.get_data()) + offset_in_fragment_program;
|
||||
|
||||
for (u32 i = 0; i < 4; i++)
|
||||
{
|
||||
const u32 value = reinterpret_cast<u32*>(data)[i];
|
||||
const u32 shuffled = ((value >> 8) & 0xff00ff) | ((value << 8) & 0xff00ff00);
|
||||
|
||||
if (sanitize && (shuffled & 0x7fffffff) >= 0x7f800000)
|
||||
{
|
||||
dst[i] = 0.f;
|
||||
}
|
||||
else
|
||||
{
|
||||
dst[i] = std::bit_cast<f32>(shuffled);
|
||||
}
|
||||
}
|
||||
|
||||
dst += 4;
|
||||
}
|
||||
}
|
||||
|
||||
void write_fragment_constants_to_buffer(const std::span<f32>& buffer, const RSXFragmentProgram& rsx_prog, const std::vector<usz>& offsets_cache, bool sanitize)
|
||||
{
|
||||
#if defined(ARCH_X64) || defined(ARCH_ARM64)
|
||||
write_fragment_constants_to_buffer_sse2(buffer, rsx_prog, offsets_cache, sanitize);
|
||||
#else
|
||||
write_fragment_constants_to_buffer_fallback(buffer, rsx_prog, offsets_cache, sanitize);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
@ -81,6 +81,11 @@ namespace program_hash_util
|
||||
};
|
||||
}
|
||||
|
||||
namespace rsx
|
||||
{
|
||||
void write_fragment_constants_to_buffer(const std::span<f32>& buffer, const RSXFragmentProgram& rsx_prog, const std::vector<usz>& offsets_cache, bool sanitize = true);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Cache for program help structure (blob, string...)
|
||||
@ -275,32 +280,6 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
struct
|
||||
{
|
||||
std::unordered_map<f32, program_buffer_patch_entry> db;
|
||||
|
||||
void add(program_buffer_patch_entry& e)
|
||||
{
|
||||
db[e.fp_key] = e;
|
||||
}
|
||||
|
||||
void add(f32& key, f32& value)
|
||||
{
|
||||
db[key] = { key, value };
|
||||
}
|
||||
|
||||
void clear()
|
||||
{
|
||||
db.clear();
|
||||
}
|
||||
|
||||
bool is_empty() const
|
||||
{
|
||||
return db.empty();
|
||||
}
|
||||
}
|
||||
patch_table;
|
||||
|
||||
public:
|
||||
program_state_cache() = default;
|
||||
~program_state_cache()
|
||||
@ -404,7 +383,11 @@ public:
|
||||
return { result, &vertex_program, &fragment_program };
|
||||
}
|
||||
|
||||
void fill_fragment_constants_buffer(std::span<f32> dst_buffer, const fragment_program_type& fragment_program, const RSXFragmentProgram& rsx_prog, bool sanitize = false) const;
|
||||
void fill_fragment_constants_buffer(std::span<f32> dst_buffer, const fragment_program_type& fragment_program, const RSXFragmentProgram& rsx_prog, bool sanitize = false) const
|
||||
{
|
||||
ensure((dst_buffer.size_bytes() >= ::narrow<int>(fragment_program.FragmentConstantOffsetCache.size()) * 16u));
|
||||
rsx::write_fragment_constants_to_buffer(dst_buffer, rsx_prog, fragment_program.FragmentConstantOffsetCache, sanitize);
|
||||
}
|
||||
|
||||
void clear()
|
||||
{
|
||||
|
@ -1,87 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "ProgramStateCache.h"
|
||||
|
||||
#include "util/asm.hpp"
|
||||
|
||||
#if defined(ARCH_X64)
|
||||
#include "emmintrin.h"
|
||||
#endif
|
||||
|
||||
template <typename Traits>
|
||||
void program_state_cache<Traits>::fill_fragment_constants_buffer(std::span<f32> dst_buffer, const typename Traits::fragment_program_type& fragment_program, const RSXFragmentProgram& rsx_prog, bool sanitize) const
|
||||
{
|
||||
ensure((dst_buffer.size_bytes() >= ::narrow<int>(fragment_program.FragmentConstantOffsetCache.size()) * 16u));
|
||||
|
||||
f32* dst = dst_buffer.data();
|
||||
alignas(16) f32 tmp[4];
|
||||
for (usz offset_in_fragment_program : fragment_program.FragmentConstantOffsetCache)
|
||||
{
|
||||
char* data = static_cast<char*>(rsx_prog.get_data()) + offset_in_fragment_program;
|
||||
|
||||
#if defined(ARCH_X64)
|
||||
const __m128i vector = _mm_loadu_si128(reinterpret_cast<__m128i*>(data));
|
||||
const __m128i shuffled_vector = _mm_or_si128(_mm_slli_epi16(vector, 8), _mm_srli_epi16(vector, 8));
|
||||
#else
|
||||
for (u32 i = 0; i < 4; i++)
|
||||
{
|
||||
const u32 value = reinterpret_cast<u32*>(data)[i];
|
||||
tmp[i] = std::bit_cast<f32, u32>(((value >> 8) & 0xff00ff) | ((value << 8) & 0xff00ff00));
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!patch_table.is_empty())
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
_mm_store_ps(tmp, _mm_castsi128_ps(shuffled_vector));
|
||||
#endif
|
||||
|
||||
for (int i = 0; i < 4; ++i)
|
||||
{
|
||||
bool patched = false;
|
||||
for (auto& e : patch_table.db)
|
||||
{
|
||||
//TODO: Use fp comparison with fabsf without hurting performance
|
||||
patched = e.second.test_and_set(tmp[i], &dst[i]);
|
||||
if (patched)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!patched)
|
||||
{
|
||||
dst[i] = tmp[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (sanitize)
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
//Convert NaNs and Infs to 0
|
||||
const auto masked = _mm_and_si128(shuffled_vector, _mm_set1_epi32(0x7fffffff));
|
||||
const auto valid = _mm_cmplt_epi32(masked, _mm_set1_epi32(0x7f800000));
|
||||
const auto result = _mm_and_si128(shuffled_vector, valid);
|
||||
_mm_stream_si128(utils::bless<__m128i>(dst), result);
|
||||
#else
|
||||
for (u32 i = 0; i < 4; i++)
|
||||
{
|
||||
const u32 value = std::bit_cast<u32>(tmp[i]);
|
||||
tmp[i] = (value & 0x7fffffff) < 0x7f800000 ? value : 0;
|
||||
}
|
||||
|
||||
std::memcpy(dst, tmp, 16);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
_mm_stream_si128(utils::bless<__m128i>(dst), shuffled_vector);
|
||||
#else
|
||||
std::memcpy(dst, tmp, 16);
|
||||
#endif
|
||||
}
|
||||
|
||||
dst += 4;
|
||||
}
|
||||
}
|
@ -18,7 +18,6 @@
|
||||
#include "Emu/RSX/NV47/HW/context_accessors.define.h"
|
||||
#include "Emu/Memory/vm_locking.h"
|
||||
|
||||
#include "../Program/program_state_cache2.hpp"
|
||||
#include "../Program/SPIRVCommon.h"
|
||||
|
||||
#include "util/asm.hpp"
|
||||
|
@ -915,7 +915,6 @@
|
||||
<ClInclude Include="Emu\RSX\Program\CgBinaryProgram.h" />
|
||||
<ClInclude Include="Emu\RSX\Common\BufferUtils.h" />
|
||||
<ClInclude Include="Emu\RSX\Program\FragmentProgramDecompiler.h" />
|
||||
<ClInclude Include="Emu\RSX\Program\program_state_cache2.hpp" />
|
||||
<ClInclude Include="Emu\RSX\Common\ring_buffer_helper.h" />
|
||||
<ClInclude Include="Emu\RSX\Program\ShaderParam.h" />
|
||||
<ClInclude Include="Emu\RSX\Common\surface_store.h" />
|
||||
|
@ -2230,9 +2230,6 @@
|
||||
<ClInclude Include="Emu\RSX\Program\GLSLCommon.h">
|
||||
<Filter>Emu\GPU\RSX\Program</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Emu\RSX\Program\program_state_cache2.hpp">
|
||||
<Filter>Emu\GPU\RSX\Program</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Emu\RSX\Program\program_util.h">
|
||||
<Filter>Emu\GPU\RSX\Program</Filter>
|
||||
</ClInclude>
|
||||
|
Loading…
x
Reference in New Issue
Block a user