From 06b0e35fb9ec0a98e37432614b5b74538ea8b400 Mon Sep 17 00:00:00 2001 From: Ivan Chikish Date: Sat, 8 Apr 2023 15:21:22 +0300 Subject: [PATCH] Update to LLVM 16.0.1 Fix Zen4+ AVX-512 detection --- .ci/build-freebsd.sh | 2 +- .ci/get_keys-windows.sh | 2 +- .ci/setup-windows.sh | 2 +- 3rdparty/llvm/llvm | 2 +- BUILDING.md | 2 +- Utilities/JIT.cpp | 36 +++++++++++++--- Utilities/JIT.h | 6 +++ rpcs3/Emu/CPU/CPUTranslator.cpp | 7 +++- rpcs3/Emu/CPU/CPUTranslator.h | 1 + rpcs3/Emu/Cell/PPUThread.cpp | 10 ++--- rpcs3/Emu/Cell/SPURecompiler.cpp | 71 +++++++++++++++----------------- 11 files changed, 87 insertions(+), 54 deletions(-) diff --git a/.ci/build-freebsd.sh b/.ci/build-freebsd.sh index d04bcfc7e9..8e1eafc39b 100755 --- a/.ci/build-freebsd.sh +++ b/.ci/build-freebsd.sh @@ -7,7 +7,7 @@ git submodule -q update --init --depth 1 $(awk '/path/ && !/llvm/ && !/SPIRV/ { # Prefer newer Clang than in base system (see also .ci/install-freebsd.sh) # libc++ isn't in llvm* packages, so download manually -fetch https://github.com/llvm/llvm-project/releases/download/llvmorg-16.0.0/llvm-project-16.0.0.src.tar.xz +fetch https://github.com/llvm/llvm-project/releases/download/llvmorg-16.0.1/llvm-project-16.0.1.src.tar.xz tar xf llvm*.tar.xz export CC=clang16 CXX=clang++16 cmake -B libcxx_build -G Ninja -S llvm*/libcxx \ diff --git a/.ci/get_keys-windows.sh b/.ci/get_keys-windows.sh index bdc0bad98a..a6201f54a3 100644 --- a/.ci/get_keys-windows.sh +++ b/.ci/get_keys-windows.sh @@ -1,4 +1,4 @@ #!/bin/sh -ex -curl -L -o "./llvm.lock" "https://github.com/RPCS3/llvm-mirror/releases/download/custom-build-win-16.0.0/llvmlibs_mt.7z.sha256" +curl -L -o "./llvm.lock" "https://github.com/RPCS3/llvm-mirror/releases/download/custom-build-win-16.0.1/llvmlibs_mt.7z.sha256" curl -L -o "./glslang.lock" "https://github.com/RPCS3/glslang/releases/download/custom-build-win/glslanglibs_mt.7z.sha256" diff --git a/.ci/setup-windows.sh b/.ci/setup-windows.sh index a70263e0f5..614ca9e98b 100755 --- a/.ci/setup-windows.sh +++ b/.ci/setup-windows.sh @@ -19,7 +19,7 @@ QT_DECL_URL="${QT_HOST}${QT_PREFIX}qtdeclarative${QT_SUFFIX}" QT_TOOL_URL="${QT_HOST}${QT_PREFIX}qttools${QT_SUFFIX}" QT_MM_URL="${QT_HOST}${QT_PREFIX}qtmultimedia${QT_SUFFIX}" QT_SVG_URL="${QT_HOST}${QT_PREFIX}qtsvg${QT_SUFFIX}" -LLVMLIBS_URL='https://github.com/RPCS3/llvm-mirror/releases/download/custom-build-win-16.0.0/llvmlibs_mt.7z' +LLVMLIBS_URL='https://github.com/RPCS3/llvm-mirror/releases/download/custom-build-win-16.0.1/llvmlibs_mt.7z' GLSLANG_URL='https://github.com/RPCS3/glslang/releases/download/custom-build-win/glslanglibs_mt.7z' VULKAN_SDK_URL="https://www.dropbox.com/s/cs77c3iv5mbo0bt/VulkanSDK-${VULKAN_VER}-Installer.exe" diff --git a/3rdparty/llvm/llvm b/3rdparty/llvm/llvm index 08d094a0e4..cd89023f79 160000 --- a/3rdparty/llvm/llvm +++ b/3rdparty/llvm/llvm @@ -1 +1 @@ -Subproject commit 08d094a0e457360ad8b94b017d2dc277e697ca76 +Subproject commit cd89023f797900e4492da58b7bed36f702120011 diff --git a/BUILDING.md b/BUILDING.md index a6def2d666..e141ed189b 100644 --- a/BUILDING.md +++ b/BUILDING.md @@ -111,7 +111,7 @@ git submodule update --init Open `rpcs3.sln`. The recommended build configuration is `Release`. (On older revisions: `Release - LLVM`) -You may want to download the precompiled [LLVM libs](https://github.com/RPCS3/llvm-mirror/releases/download/custom-build-win-16.0.0/llvmlibs_mt.7z) and extract them to `3rdparty\llvm\`, as well as download and extract the [additional libs](https://github.com/RPCS3/glslang/releases/download/custom-build-win/glslanglibs_mt.7z) to `lib\%CONFIGURATION%-x64\` to speed up compilation time (unoptimised/debug libs are currently not available precompiled). +You may want to download the precompiled [LLVM libs](https://github.com/RPCS3/llvm-mirror/releases/download/custom-build-win-16.0.1/llvmlibs_mt.7z) and extract them to `3rdparty\llvm\`, as well as download and extract the [additional libs](https://github.com/RPCS3/glslang/releases/download/custom-build-win/glslanglibs_mt.7z) to `lib\%CONFIGURATION%-x64\` to speed up compilation time (unoptimised/debug libs are currently not available precompiled). If you're not using the precompiled libs, build the following projects in *__BUILD_BEFORE* folder by right-clicking on a project > *Build*.: * glslang diff --git a/Utilities/JIT.cpp b/Utilities/JIT.cpp index 2c203813a6..4f30e840b2 100644 --- a/Utilities/JIT.cpp +++ b/Utilities/JIT.cpp @@ -1373,6 +1373,34 @@ std::string jit_compiler::cpu(const std::string& _cpu) return m_cpu; } +std::string jit_compiler::triple1() +{ +#if defined(_WIN32) + return llvm::Triple::normalize(llvm::sys::getProcessTriple()); +#elif defined(__APPLE__) && defined(ARCH_X64) + return llvm::Triple::normalize("x86_64-unknown-linux-gnu"); +#elif defined(__APPLE__) && defined(ARCH_ARM64) + return llvm::Triple::normalize("aarch64-unknown-linux-gnu"); +#else + return llvm::Triple::normalize(llvm::sys::getProcessTriple()); +#endif +} + +std::string jit_compiler::triple2() +{ +#if defined(_WIN32) && defined(ARCH_X64) + return llvm::Triple::normalize("x86_64-unknown-linux-gnu"); +#elif defined(_WIN32) && defined(ARCH_ARM64) + return llvm::Triple::normalize("aarch64-unknown-linux-gnu"); +#elif defined(__APPLE__) && defined(ARCH_X64) + return llvm::Triple::normalize("x86_64-unknown-linux-gnu"); +#elif defined(__APPLE__) && defined(ARCH_ARM64) + return llvm::Triple::normalize("aarch64-unknown-linux-gnu"); +#else + return llvm::Triple::normalize(llvm::sys::getProcessTriple()); +#endif +} + jit_compiler::jit_compiler(const std::unordered_map& _link, const std::string& _cpu, u32 flags) : m_context(new llvm::LLVMContext) , m_cpu(cpu(_cpu)) @@ -1380,7 +1408,7 @@ jit_compiler::jit_compiler(const std::unordered_map& _link, co std::string result; auto null_mod = std::make_unique ("null_", *m_context); - null_mod->setTargetTriple(llvm::Triple::normalize(llvm::sys::getProcessTriple())); + null_mod->setTargetTriple(jit_compiler::triple1()); std::unique_ptr mem; @@ -1394,9 +1422,7 @@ jit_compiler::jit_compiler(const std::unordered_map& _link, co else { mem = std::make_unique(); -#if defined(_WIN32) && defined(ARCH_X64) - null_mod->setTargetTriple(llvm::Triple::normalize("x86_64-unknown-linux-gnu")); -#endif + null_mod->setTargetTriple(jit_compiler::triple2()); } } else @@ -1412,7 +1438,7 @@ jit_compiler::jit_compiler(const std::unordered_map& _link, co .setOptLevel(llvm::CodeGenOpt::Aggressive) .setCodeModel(flags & 0x2 ? llvm::CodeModel::Large : llvm::CodeModel::Small) #ifdef __APPLE__ - .setCodeModel(llvm::CodeModel::Large) + //.setCodeModel(llvm::CodeModel::Large) #endif .setRelocationModel(llvm::Reloc::Model::PIC_) .setMCPU(m_cpu) diff --git a/Utilities/JIT.h b/Utilities/JIT.h index 3cabf1afb9..498d5baf6b 100644 --- a/Utilities/JIT.h +++ b/Utilities/JIT.h @@ -546,6 +546,12 @@ public: // Get CPU info static std::string cpu(const std::string& _cpu); + + // Get system triple (PPU) + static std::string triple1(); + + // Get system triple (SPU) + static std::string triple2(); }; #endif diff --git a/rpcs3/Emu/CPU/CPUTranslator.cpp b/rpcs3/Emu/CPU/CPUTranslator.cpp index 28969827b5..ee8d31a318 100644 --- a/rpcs3/Emu/CPU/CPUTranslator.cpp +++ b/rpcs3/Emu/CPU/CPUTranslator.cpp @@ -125,7 +125,9 @@ void cpu_translator::initialize(llvm::LLVMContext& context, llvm::ExecutionEngin cpu == "bdver2" || cpu == "bdver3" || cpu == "bdver4" || - cpu.startswith("znver")) + cpu == "znver1" || + cpu == "znver2" || + cpu == "znver3") { m_use_fma = true; m_use_avx = true; @@ -158,7 +160,8 @@ void cpu_translator::initialize(llvm::LLVMContext& context, llvm::ExecutionEngin cpu == "icelake-server" || cpu == "tigerlake" || cpu == "rocketlake" || - cpu == "sapphirerapids") + cpu == "sapphirerapids" || + (cpu.startswith("znver") && cpu != "znver1" && cpu != "znver2" && cpu != "znver3")) { m_use_avx = true; m_use_fma = true; diff --git a/rpcs3/Emu/CPU/CPUTranslator.h b/rpcs3/Emu/CPU/CPUTranslator.h index 72ce4ce01f..da465e3d52 100644 --- a/rpcs3/Emu/CPU/CPUTranslator.h +++ b/rpcs3/Emu/CPU/CPUTranslator.h @@ -13,6 +13,7 @@ #pragma GCC diagnostic ignored "-Wstrict-aliasing" #pragma GCC diagnostic ignored "-Weffc++" #pragma GCC diagnostic ignored "-Wmissing-noreturn" +#pragma GCC diagnostic ignored "-Wredundant-decls" #endif #include "llvm/IR/LLVMContext.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index c3141d6963..ea98d772d8 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -2132,7 +2132,7 @@ static void ppu_check(ppu_thread& ppu, u64 addr) // ppu_check() shall not return directly if (ppu.test_stopped()) - ; + {} ppu_escape(&ppu); } @@ -3690,7 +3690,7 @@ bool ppu_initialize(const ppu_module& info, bool check_only) // Settings: should be populated by settings which affect codegen (TODO) enum class ppu_settings : u32 { - non_win32, + platform_bit, accurate_dfma, fixup_vnan, fixup_nj_denormals, @@ -3707,8 +3707,8 @@ bool ppu_initialize(const ppu_module& info, bool check_only) be_t> settings{}; -#ifndef _WIN32 - settings += ppu_settings::non_win32; +#if !defined(_WIN32) && !defined(__APPLE__) + settings += ppu_settings::platform_bit; #endif if (g_cfg.core.use_accurate_dfma) settings += ppu_settings::accurate_dfma; @@ -3937,7 +3937,7 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module& module_part, co std::unique_ptr _module = std::make_unique(obj_name, jit.get_context()); // Initialize target - _module->setTargetTriple(Triple::normalize(sys::getProcessTriple())); + _module->setTargetTriple(jit_compiler::triple1()); _module->setDataLayout(jit.get_engine().getTargetMachine()->createDataLayout()); // Initialize translator diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index ca8819215a..63d70acf48 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -4290,7 +4290,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator const auto fail = llvm::BasicBlock::Create(m_context, "", m_function); m_ir->CreateCondBr(m_ir->CreateICmpEQ(m_base_pc, m_ir->getInt32(m_base)), next, fail); m_ir->SetInsertPoint(fail); - m_ir->CreateStore(m_ir->getInt32(target), spu_ptr(&spu_thread::pc), true); + m_ir->CreateStore(m_ir->getInt32(target), spu_ptr(&spu_thread::pc)); tail_chunk(nullptr); m_ir->SetInsertPoint(next); } @@ -4328,7 +4328,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator { ensure(!m_finfo->fn); - m_ir->CreateStore(m_ir->getInt32(target), spu_ptr(&spu_thread::pc), true); + m_ir->CreateStore(m_ir->getInt32(target), spu_ptr(&spu_thread::pc)); } else { @@ -4766,7 +4766,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator } template - void set_vr(const bf_t& index, T expr, bool fixup = true) + void set_vr(const bf_t& index, T expr, std::function vr_assume = nullptr, bool fixup = true) { // Process expression const auto value = expr.eval(m_ir); @@ -4790,6 +4790,10 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator return; } + if (vr_assume) + { + } + set_reg_fixed(index, value, fixup); } @@ -4869,7 +4873,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator // Update PC for current or explicitly specified instruction address void update_pc(u32 target = -1) { - m_ir->CreateStore(m_ir->CreateAnd(get_pc(target + 1 ? target : m_pos), 0x3fffc), spu_ptr(&spu_thread::pc), true); + m_ir->CreateStore(m_ir->CreateAnd(get_pc(target + 1 ? target : m_pos), 0x3fffc), spu_ptr(&spu_thread::pc)); } // Call cpu_thread::check_state if necessary and return or continue (full check) @@ -4889,14 +4893,14 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator if (may_be_unsafe_for_savestate) { - m_ir->CreateStore(m_ir->getInt8(1), spu_ptr(&spu_thread::unsavable), true); + m_ir->CreateStore(m_ir->getInt8(1), spu_ptr(&spu_thread::unsavable)); } - m_ir->CreateStore(m_ir->getFalse(), m_fake_global1, true); + m_ir->CreateStore(m_ir->getFalse(), m_fake_global1); if (may_be_unsafe_for_savestate) { - m_ir->CreateStore(m_ir->getInt8(0), spu_ptr(&spu_thread::unsavable), true); + m_ir->CreateStore(m_ir->getInt8(0), spu_ptr(&spu_thread::unsavable)); } m_ir->CreateBr(_body); @@ -5003,11 +5007,7 @@ public: // Create LLVM module std::unique_ptr _module = std::make_unique(m_hash + ".obj", m_context); -#if defined(_WIN32) && defined(ARCH_X64) - _module->setTargetTriple(Triple::normalize("x86_64-unknown-linux-gnu")); -#else - _module->setTargetTriple(Triple::normalize(sys::getProcessTriple())); -#endif + _module->setTargetTriple(jit_compiler::triple2()); _module->setDataLayout(m_jit.get_engine().getTargetMachine()->createDataLayout()); m_module = _module.get(); @@ -5035,7 +5035,7 @@ public: // Emit state check const auto pstate = spu_ptr(&spu_thread::state); - m_ir->CreateCondBr(m_ir->CreateICmpNE(m_ir->CreateLoad(get_type(), pstate, true), m_ir->getInt32(0)), label_stop, label_test, m_md_unlikely); + m_ir->CreateCondBr(m_ir->CreateICmpNE(m_ir->CreateLoad(get_type(), pstate), m_ir->getInt32(0)), label_stop, label_test, m_md_unlikely); // Emit code check u32 check_iterations = 0; @@ -5043,7 +5043,7 @@ public: // Set block hash for profiling (if enabled) if (g_cfg.core.spu_prof && g_cfg.core.spu_verification) - m_ir->CreateStore(m_ir->getInt64((m_hash_start & -65536)), spu_ptr(&spu_thread::block_hash), true); + m_ir->CreateStore(m_ir->getInt64((m_hash_start & -65536)), spu_ptr(&spu_thread::block_hash)); if (!g_cfg.core.spu_verification) { @@ -5294,7 +5294,7 @@ public: // Set block hash for profiling (if enabled) if (g_cfg.core.spu_prof) - m_ir->CreateStore(m_ir->getInt64((m_hash_start & -65536) | (m_entry >> 2)), spu_ptr(&spu_thread::block_hash), true); + m_ir->CreateStore(m_ir->getInt64((m_hash_start & -65536) | (m_entry >> 2)), spu_ptr(&spu_thread::block_hash)); m_finfo = &m_functions[m_entry]; m_ir->CreateBr(add_block(m_entry)); @@ -5535,7 +5535,7 @@ public: { for (auto& i : bb) { - // Replace volatile fake store with spu_test_state call + // Replace fake store with spu_test_state call if (auto si = dyn_cast(&i); si && si->getOperand(1) == m_fake_global1) { m_ir->SetInsertPoint(si); @@ -5681,11 +5681,7 @@ public: // Create LLVM module std::unique_ptr _module = std::make_unique("spu_interpreter.obj", m_context); -#if defined(_WIN32) && defined(ARCH_X64) - _module->setTargetTriple(Triple::normalize("x86_64-unknown-linux-gnu")); -#else - _module->setTargetTriple(Triple::normalize(sys::getProcessTriple())); -#endif + _module->setTargetTriple(jit_compiler::triple2()); _module->setDataLayout(m_jit.get_engine().getTargetMachine()->createDataLayout()); m_module = _module.get(); @@ -5985,7 +5981,7 @@ public: ncall->setTailCall(); m_ir->CreateRetVoid(); m_ir->SetInsertPoint(_stop); - m_ir->CreateStore(m_interp_pc, spu_ptr(&spu_thread::pc), true); + m_ir->CreateStore(m_interp_pc, spu_ptr(&spu_thread::pc)); call("spu_escape", spu_runtime::g_escape, m_thread)->setTailCall(); m_ir->CreateRetVoid(); } @@ -6233,8 +6229,9 @@ public: } else { - const auto val = m_ir->CreateLoad(get_type(), ptr, true); - m_ir->CreateStore(m_ir->getInt64(0), ptr, true); + const auto val = m_ir->CreateLoad(get_type(), ptr); + val->setAtomic(llvm::AtomicOrdering::Unordered); + m_ir->CreateStore(m_ir->getInt64(0), ptr)->setAtomic(llvm::AtomicOrdering::Unordered); val0 = val; } @@ -6319,7 +6316,7 @@ public: } case SPU_RdEventMask: { - res.value = m_ir->CreateTrunc(m_ir->CreateLShr(m_ir->CreateLoad(get_type(), spu_ptr(&spu_thread::ch_events), true), 32), get_type()); + res.value = m_ir->CreateTrunc(m_ir->CreateLShr(m_ir->CreateLoad(get_type(), spu_ptr(&spu_thread::ch_events)), 32), get_type()); break; } case SPU_RdEventStat: @@ -6575,7 +6572,7 @@ public: // Illegal update, access violate with special address m_ir->SetInsertPoint(fail); const auto ptr = _ptr(m_memptr, 0xffdead04); - m_ir->CreateStore(m_ir->getInt32("TAG\0"_u32), ptr, true); + m_ir->CreateStore(m_ir->getInt32("TAG\0"_u32), ptr); m_ir->CreateBr(next); m_ir->SetInsertPoint(any); @@ -7097,7 +7094,7 @@ public: minusb = eval(x); } - if (auto k = get_known_bits(minusb); (k & kbc(32)).isZero()) + if (auto k = get_known_bits(minusb); !!(k.Zero & 32)) { set_vr(op.rt, a >> (minusb & 31)); return; @@ -7116,7 +7113,7 @@ public: minusb = eval(x); } - if (auto k = get_known_bits(minusb); (k & kbc(32)).isZero()) + if (auto k = get_known_bits(minusb); !!(k.Zero & 32)) { set_vr(op.rt, a >> (minusb & 31)); return; @@ -7129,7 +7126,7 @@ public: { const auto [a, b] = get_vrs(op.ra, op.rb); - if (auto k = get_known_bits(b); (k & kbc(32)).isZero()) + if (auto k = get_known_bits(b); !!(k.Zero & 32)) { set_vr(op.rt, a << (b & 31)); return; @@ -7154,7 +7151,7 @@ public: minusb = eval(x); } - if (auto k = get_known_bits(minusb); (k & kbc(16)).isZero()) + if (auto k = get_known_bits(minusb); !!(k.Zero & 16)) { set_vr(op.rt, a >> (minusb & 15)); return; @@ -7173,7 +7170,7 @@ public: minusb = eval(x); } - if (auto k = get_known_bits(minusb); (k & kbc(16)).isZero()) + if (auto k = get_known_bits(minusb); !!(k.Zero & 16)) { set_vr(op.rt, a >> (minusb & 15)); return; @@ -7186,7 +7183,7 @@ public: { const auto [a, b] = get_vrs(op.ra, op.rb); - if (auto k = get_known_bits(b); (k & kbc(16)).isZero()) + if (auto k = get_known_bits(b); !!(k.Zero & 16)) { set_vr(op.rt, a << (b & 15)); return; @@ -8992,7 +8989,7 @@ public: const auto i = select(a > 0x47f0000000000000, eval(s | 0x47f0000000000000), d); const auto n = select(a > 0x7ff0000000000000, splat(0x47f8000000000000), i); const auto z = select(a < 0x3810000000000000, s, n); - set_vr(op.rt, zshuffle(bitcast(z), 2, 0, 3, 1), false); + set_vr(op.rt, zshuffle(bitcast(z), 2, 0, 3, 1), nullptr, false); return; } @@ -9808,11 +9805,11 @@ public: m_ir->CreateCondBr(cond.value, halt, next, m_md_unlikely); m_ir->SetInsertPoint(halt); if (m_interp_magn) - m_ir->CreateStore(m_function->getArg(2), spu_ptr(&spu_thread::pc))->setVolatile(true); + m_ir->CreateStore(m_function->getArg(2), spu_ptr(&spu_thread::pc)); else update_pc(); const auto ptr = _ptr(m_memptr, 0xffdead00); - m_ir->CreateStore(m_ir->getInt32("HALT"_u32), ptr)->setVolatile(true); + m_ir->CreateStore(m_ir->getInt32("HALT"_u32), ptr); m_ir->CreateBr(next); m_ir->SetInsertPoint(next); } @@ -10292,8 +10289,8 @@ public: if (m_finfo && m_finfo->fn) { // Can't afford external tail call in true functions - m_ir->CreateStore(m_ir->getInt32("BIJT"_u32), _ptr(m_memptr, 0xffdead20))->setVolatile(true); - m_ir->CreateStore(m_ir->getFalse(), m_fake_global1, true); + m_ir->CreateStore(m_ir->getInt32("BIJT"_u32), _ptr(m_memptr, 0xffdead20)); + m_ir->CreateStore(m_ir->getFalse(), m_fake_global1); m_ir->CreateBr(sw->getDefaultDest()); } else