Update to LLVM 16.0.0, switch to upstream LLVM

This commit is contained in:
Ivan Chikish 2023-03-11 22:08:27 +03:00 committed by Ivan
parent 7081b89e97
commit fb88e1c1c9
24 changed files with 746 additions and 231 deletions

View File

@ -7,9 +7,9 @@ git submodule -q update --init --depth 1 $(awk '/path/ && !/llvm/ { print $3 }'
# Prefer newer Clang than in base system (see also .ci/install-freebsd.sh)
# libc++ isn't in llvm* packages, so download manually
fetch https://github.com/llvm/llvm-project/releases/download/llvmorg-15.0.6/llvm-project-15.0.6.src.tar.xz
fetch https://github.com/llvm/llvm-project/releases/download/llvmorg-16.0.0/llvm-project-16.0.0.src.tar.xz
tar xf llvm*.tar.xz
export CC=clang15 CXX=clang++15
export CC=clang16 CXX=clang++16
cmake -B libcxx_build -G Ninja -S llvm*/libcxx \
-DLLVM_CCACHE_BUILD=ON \
-DLIBCXX_INCLUDE_BENCHMARKS=OFF \
@ -20,7 +20,7 @@ export CXXFLAGS="$CXXFLAGS -nostdinc++ -isystem$PWD/libcxx_prefix/include/c++/v1
export LDFLAGS="$LDFLAGS -nostdlib++ -L$PWD/libcxx_prefix/lib -l:libc++.a -lcxxrt"
CONFIGURE_ARGS="
-DWITH_LLVM=OFF
-DWITH_LLVM=ON
-DUSE_SDL=OFF
-DUSE_PRECOMPILED_HEADERS=OFF
-DUSE_NATIVE_INSTRUCTIONS=OFF

View File

@ -9,16 +9,11 @@ if [ -z "$CIRRUS_CI" ]; then
cd rpcs3 || exit 1
fi
# Pull all the submodules except llvm, since it is built separately and we just download that build
# Pull all the submodules except llvm
# Note: Tried to use git submodule status, but it takes over 20 seconds
# shellcheck disable=SC2046
git submodule -q update --init $(awk '/path/ && !/llvm/ { print $3 }' .gitmodules)
# Download pre-compiled llvm libs
curl -sLO https://github.com/RPCS3/llvm-mirror/releases/download/custom-build/llvmlibs-linux.tar.gz
mkdir llvmlibs
tar -xzf ./llvmlibs-linux.tar.gz -C llvmlibs
mkdir build && cd build || exit 1
if [ "$COMPILER" = "gcc" ]; then
@ -42,8 +37,6 @@ export CFLAGS="$CFLAGS -fuse-ld=${LINKER}"
cmake .. \
-DCMAKE_INSTALL_PREFIX=/usr \
-DBUILD_LLVM_SUBMODULE=OFF \
-DLLVM_DIR=llvmlibs/lib/cmake/llvm/ \
-DUSE_NATIVE_INSTRUCTIONS=OFF \
-DUSE_PRECOMPILED_HEADERS=OFF \
-DCMAKE_C_FLAGS="$CFLAGS" \

View File

@ -1,10 +1,10 @@
#!/bin/sh -ex
brew install -f --overwrite llvm@14 nasm ninja git p7zip create-dmg ccache
brew install -f --overwrite llvm@16 nasm ninja git p7zip create-dmg ccache
#/usr/sbin/softwareupdate --install-rosetta --agree-to-license
arch -x86_64 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
arch -x86_64 /usr/local/homebrew/bin/brew install -f --overwrite llvm@14 sdl2 glew cmake
arch -x86_64 /usr/local/homebrew/bin/brew install -f --overwrite llvm@16 sdl2 glew cmake
#export MACOSX_DEPLOYMENT_TARGET=12.0
export CXX=clang++
@ -33,7 +33,7 @@ cd ..
export Qt5_DIR="$WORKDIR/qt-downloader/5.15.2/clang_64/lib/cmake/Qt5"
export SDL2_DIR="$BREW_X64_PATH/opt/sdl2/lib/cmake/SDL2"
export PATH="$BREW_PATH/opt/llvm@14/bin:$WORKDIR/qt-downloader/5.15.2/clang_64/bin:$BREW_BIN:$BREW_SBIN:/usr/bin:/bin:/usr/sbin:/sbin:/opt/X11/bin:/Library/Apple/usr/bin:$PATH"
export PATH="$BREW_PATH/opt/llvm@16/bin:$WORKDIR/qt-downloader/5.15.2/clang_64/bin:$BREW_BIN:$BREW_SBIN:/usr/bin:/bin:/usr/sbin:/sbin:/opt/X11/bin:/Library/Apple/usr/bin:$PATH"
export LDFLAGS="-L$BREW_X64_PATH/lib -Wl,-rpath,$BREW_X64_PATH/lib"
export CPPFLAGS="-I$BREW_X64_PATH/include -msse -msse2 -mcx16 -no-pie"
export LIBRARY_PATH="$BREW_X64_PATH/lib"

View File

@ -15,7 +15,7 @@ echo "AVVER=$AVVER" >> ../.ci/ci-vars.env
cd bin
mkdir "rpcs3.app/Contents/lib/"
cp "/usr/local/Homebrew/opt/llvm@14/lib/c++/libc++abi.1.0.dylib" "rpcs3.app/Contents/lib/libc++abi.1.dylib"
cp "/usr/local/Homebrew/opt/llvm@16/lib/c++/libc++abi.1.0.dylib" "rpcs3.app/Contents/lib/libc++abi.1.dylib"
rm -rf "rpcs3.app/Contents/Frameworks/QtPdf.framework" \
"rpcs3.app/Contents/Frameworks/QtQml.framework" \

View File

@ -1,4 +1,4 @@
#!/bin/sh -ex
curl -L -o "./llvm.lock" "https://github.com/RPCS3/llvm-mirror/releases/download/custom-build-win/llvmlibs_mt.7z.sha256"
curl -L -o "./llvm.lock" "https://github.com/RPCS3/llvm-mirror/releases/download/custom-build-win-16.0.0/llvmlibs_mt.7z.sha256"
curl -L -o "./glslang.lock" "https://github.com/RPCS3/glslang/releases/download/custom-build-win/glslanglibs_mt.7z.sha256"

View File

@ -9,7 +9,7 @@ export ASSUME_ALWAYS_YES=true
pkg info # debug
# Prefer newer Clang than in base system (see also .ci/build-freebsd.sh)
pkg install llvm15
pkg install llvm16
# Mandatory dependencies (qt5-dbus and qt5-gui are pulled via qt5-widgets)
pkg install git ccache cmake ninja qt5-qmake qt5-buildtools qt5-widgets qt5-concurrent qt5-multimedia qt5-svg glew openal-soft ffmpeg

View File

@ -19,7 +19,7 @@ QT_DECL_URL="${QT_HOST}${QT_PREFIX}qtdeclarative${QT_SUFFIX}"
QT_TOOL_URL="${QT_HOST}${QT_PREFIX}qttools${QT_SUFFIX}"
QT_MM_URL="${QT_HOST}${QT_PREFIX}qtmultimedia${QT_SUFFIX}"
QT_SVG_URL="${QT_HOST}${QT_PREFIX}qtsvg${QT_SUFFIX}"
LLVMLIBS_URL='https://github.com/RPCS3/llvm-mirror/releases/download/custom-build-win/llvmlibs_mt.7z'
LLVMLIBS_URL='https://github.com/RPCS3/llvm-mirror/releases/download/custom-build-win-16.0.0/llvmlibs_mt.7z'
GLSLANG_URL='https://github.com/RPCS3/glslang/releases/download/custom-build-win/glslanglibs_mt.7z'
VULKAN_SDK_URL="https://www.dropbox.com/s/cs77c3iv5mbo0bt/VulkanSDK-${VULKAN_VER}-Installer.exe"

View File

@ -61,7 +61,7 @@ windows_task:
linux_task:
container:
image: rpcs3/rpcs3-ci-bionic:1.6
image: rpcs3/rpcs3-ci-bionic:1.7
cpu: 4
memory: 16G
env:

34
3rdparty/llvm.cmake vendored
View File

@ -2,7 +2,7 @@ if(WITH_LLVM)
CHECK_CXX_COMPILER_FLAG("-msse -msse2 -mcx16" COMPILER_X86)
CHECK_CXX_COMPILER_FLAG("-march=armv8-a+lse" COMPILER_ARM)
if(BUILD_LLVM_SUBMODULE)
if(BUILD_LLVM)
message(STATUS "LLVM will be built from the submodule.")
set(LLVM_TARGETS_TO_BUILD "AArch64;X86")
@ -38,49 +38,33 @@ if(WITH_LLVM)
set(CMAKE_CXX_FLAGS ${CXX_FLAGS_OLD})
# now tries to find LLVM again
find_package(LLVM 13.0 CONFIG)
find_package(LLVM 16.0 CONFIG)
if(NOT LLVM_FOUND)
message(FATAL_ERROR "Couldn't build LLVM from the submodule. You might need to run `git submodule update --init`")
endif()
else()
message(STATUS "Using prebuilt LLVM")
message(STATUS "Using prebuilt or system LLVM")
if (LLVM_DIR AND NOT IS_ABSOLUTE "${LLVM_DIR}")
# change relative LLVM_DIR to be relative to the source dir
set(LLVM_DIR ${CMAKE_SOURCE_DIR}/${LLVM_DIR})
endif()
find_package(LLVM 13.0 CONFIG)
find_package(LLVM 16.0 CONFIG)
if (NOT LLVM_FOUND)
if (LLVM_VERSION AND LLVM_VERSION_MAJOR LESS 11)
message(FATAL_ERROR "Found LLVM version ${LLVM_VERSION}. Required version 11.0. \
Enable BUILD_LLVM_SUBMODULE option to build LLVM from included as a git submodule.")
if (LLVM_VERSION AND LLVM_VERSION_MAJOR LESS 16)
message(FATAL_ERROR "Found LLVM version ${LLVM_VERSION}. Required version 16. \
Enable BUILD_LLVM option to build LLVM from included as a git submodule.")
endif()
message(FATAL_ERROR "Can't find LLVM libraries from the CMAKE_PREFIX_PATH path or LLVM_DIR. \
Enable BUILD_LLVM_SUBMODULE option to build LLVM from included as a git submodule.")
Enable BUILD_LLVM option to build LLVM from included as a git submodule.")
endif()
endif()
set(LLVM_LIBS LLVMMCJIT)
if(COMPILER_X86)
set(LLVM_LIBS ${LLVM_LIBS} LLVMX86CodeGen LLVMX86AsmParser)
endif()
if(COMPILER_ARM)
set(LLVM_LIBS ${LLVM_LIBS} LLVMX86CodeGen LLVMX86AsmParser LLVMAArch64CodeGen LLVMAArch64AsmParser)
endif()
if(WIN32 OR CMAKE_SYSTEM MATCHES "Linux")
set(LLVM_LIBS ${LLVM_LIBS} LLVMIntelJITEvents)
endif()
if(CMAKE_SYSTEM MATCHES "Linux")
set(LLVM_LIBS ${LLVM_LIBS} LLVMPerfJITEvents)
endif()
set(LLVM_LIBS LLVM)
add_library(3rdparty_llvm INTERFACE)
target_link_libraries(3rdparty_llvm INTERFACE ${LLVM_LIBS})

View File

@ -111,7 +111,7 @@ git submodule update --init
Open `rpcs3.sln`. The recommended build configuration is `Release`. (On older revisions: `Release - LLVM`)
You may want to download the precompiled [LLVM libs](https://github.com/RPCS3/llvm-mirror/releases/download/custom-build-win/llvmlibs_mt.7z) and extract them to the root rpcs3 folder (which contains `rpcs3.sln`), as well as download and extract the [additional libs](https://github.com/RPCS3/glslang/releases/download/custom-build-win/glslanglibs_mt.7z) to `lib\%CONFIGURATION%-x64\` to speed up compilation time (unoptimised/debug libs are currently not available precompiled).
You may want to download the precompiled [LLVM libs](https://github.com/RPCS3/llvm-mirror/releases/download/custom-build-win-16.0.0/llvmlibs_mt.7z) and extract them to the root rpcs3 folder (which contains `rpcs3.sln`), as well as download and extract the [additional libs](https://github.com/RPCS3/glslang/releases/download/custom-build-win/glslanglibs_mt.7z) to `lib\%CONFIGURATION%-x64\` to speed up compilation time (unoptimised/debug libs are currently not available precompiled).
If you're not using the precompiled libs, build the following projects in *__BUILD_BEFORE* folder by right-clicking on a project > *Build*.:
* glslang

View File

@ -14,7 +14,7 @@ endif()
option(USE_NATIVE_INSTRUCTIONS "USE_NATIVE_INSTRUCTIONS makes rpcs3 compile with -march=native, which is useful for local builds, but not good for packages." ON)
option(WITH_LLVM "Enable usage of LLVM library" ON)
option(BUILD_LLVM_SUBMODULE "Build LLVM from git submodule" ON)
option(BUILD_LLVM "Build LLVM from git submodule" OFF)
option(USE_FAUDIO "FAudio audio backend" ON)
option(USE_LIBEVDEV "libevdev-based joystick support" ON)
option(USE_DISCORD_RPC "Discord rich presence integration" OFF)

View File

@ -198,6 +198,9 @@ static u8* add_jit_memory(usz size, uint align)
});
}
ensure(pointer + pos >= get_jit_memory() + Off);
ensure(pointer + pos < get_jit_memory() + Off + 0x40000000);
return pointer + pos;
}
@ -1319,7 +1322,10 @@ std::string jit_compiler::cpu(const std::string& _cpu)
m_cpu == "icelake-client" ||
m_cpu == "icelake-server" ||
m_cpu == "tigerlake" ||
m_cpu == "rocketlake")
m_cpu == "rocketlake" ||
m_cpu == "alderlake" ||
m_cpu == "raptorlake" ||
m_cpu == "meteorlake")
{
// Downgrade if AVX is not supported by some chips
if (!utils::has_avx())
@ -1350,6 +1356,18 @@ std::string jit_compiler::cpu(const std::string& _cpu)
// Upgrade
m_cpu = "znver2";
}
if ((m_cpu == "znver3" || m_cpu == "goldmont" || m_cpu == "alderlake" || m_cpu == "raptorlake" || m_cpu == "meteorlake") && utils::has_avx512_icl())
{
// Upgrade
m_cpu = "icelake-client";
}
if (m_cpu == "goldmont" && utils::has_avx2())
{
// Upgrade
m_cpu = "alderlake";
}
}
return m_cpu;
@ -1362,15 +1380,13 @@ jit_compiler::jit_compiler(const std::unordered_map<std::string, u64>& _link, co
std::string result;
auto null_mod = std::make_unique<llvm::Module> ("null_", *m_context);
#if defined(__APPLE__) && defined(ARCH_ARM64)
// Force override triple on Apple arm64 or we'll get linking errors.
null_mod->setTargetTriple(llvm::Triple::normalize(utils::c_llvm_default_triple));
#endif
null_mod->setTargetTriple(llvm::Triple::normalize(llvm::sys::getProcessTriple()));
std::unique_ptr<llvm::RTDyldMemoryManager> mem;
if (_link.empty())
{
std::unique_ptr<llvm::RTDyldMemoryManager> mem;
// Auxiliary JIT (does not use custom memory manager, only writes the objects)
if (flags & 0x1)
{
mem = std::make_unique<MemoryManager1>();
@ -1378,31 +1394,33 @@ jit_compiler::jit_compiler(const std::unordered_map<std::string, u64>& _link, co
else
{
mem = std::make_unique<MemoryManager2>();
null_mod->setTargetTriple(llvm::Triple::normalize(utils::c_llvm_default_triple));
#if defined(_WIN32) && defined(ARCH_X64)
null_mod->setTargetTriple(llvm::Triple::normalize("x86_64-unknown-linux-gnu"));
#endif
}
}
else
{
mem = std::make_unique<MemoryManager1>();
}
// Auxiliary JIT (does not use custom memory manager, only writes the objects)
{
m_engine.reset(llvm::EngineBuilder(std::move(null_mod))
.setErrorStr(&result)
.setEngineKind(llvm::EngineKind::JIT)
.setMCJITMemoryManager(std::move(mem))
.setOptLevel(llvm::CodeGenOpt::Aggressive)
.setCodeModel(flags & 0x2 ? llvm::CodeModel::Large : llvm::CodeModel::Small)
#ifdef __APPLE__
.setCodeModel(llvm::CodeModel::Large)
#endif
.setRelocationModel(llvm::Reloc::Model::PIC_)
.setMCPU(m_cpu)
.create());
}
else
{
// Primary JIT
m_engine.reset(llvm::EngineBuilder(std::move(null_mod))
.setErrorStr(&result)
.setEngineKind(llvm::EngineKind::JIT)
.setMCJITMemoryManager(std::make_unique<MemoryManager1>())
.setOptLevel(llvm::CodeGenOpt::Aggressive)
.setCodeModel(flags & 0x2 ? llvm::CodeModel::Large : llvm::CodeModel::Small)
.setMCPU(m_cpu)
.create());
if (!_link.empty())
{
for (auto&& [name, addr] : _link)
{
m_engine->updateGlobalMapping(name, addr);

View File

@ -16,54 +16,97 @@
<Link>
<AdditionalLibraryDirectories Condition="'$(Configuration)'=='Debug'">%(AdditionalLibraryDirectories);..\llvm_build\Debug\lib</AdditionalLibraryDirectories>
<AdditionalDependencies>%(AdditionalDependencies);
LLVMProfileData.lib;
LLVMDebugInfoCodeView.lib;
LLVMDebugInfoMSF.lib;
LLVMInstrumentation.lib;
LLVMMCJIT.lib;
LLVMRuntimeDyld.lib;
LLVMVectorize.lib;
LLVMX86CodeGen.lib;
LLVMGlobalISel.lib;
LLVMX86Disassembler.lib;
LLVMExecutionEngine.lib;
LLVMAsmPrinter.lib;
LLVMSelectionDAG.lib;
LLVMCodeGen.lib;
LLVMScalarOpts.lib;
LLVMInstCombine.lib;
LLVMTransformUtils.lib;
LLVMAnalysis.lib;
LLVMTarget.lib;
LLVMX86Desc.lib;
LLVMObject.lib;
LLVMMCParser.lib;
LLVMBitReader.lib;
LLVMCore.lib;
LLVMMC.lib;
LLVMX86Info.lib;
LLVMSupport.lib;
LLVMMCDisassembler.lib;
LLVMipo.lib;
LLVMBinaryFormat.lib;
LLVMPasses.lib;
LLVMIRReader.lib;
LLVMLinker.lib;
LLVMAsmParser.lib;
LLVMX86AsmParser.lib;
LLVMDemangle.lib;
LLVMDebugInfoDWARF.lib;
LLVMRemarks.lib;
LLVMBitstreamReader.lib;
LLVMTextAPI.lib;
LLVMCFGuard.lib;
LLVMAggressiveInstCombine.lib;
LLVMBitWriter.lib;
LLVMCoroutines.lib;
LLVMObjCARCOpts.lib;
LLVMIntelJITEvents.lib;
LLVMAggressiveInstCombine.lib;
LLVMAnalysis.lib;
LLVMAsmParser.lib;
LLVMAsmPrinter.lib;
LLVMBinaryFormat.lib;
LLVMBitReader.lib;
LLVMBitstreamReader.lib;
LLVMBitWriter.lib;
LLVMCFGuard.lib;
LLVMCFIVerify.lib;
LLVMCodeGen.lib;
LLVMCore.lib;
LLVMCoroutines.lib;
LLVMCoverage.lib;
LLVMDebugInfoCodeView.lib;
LLVMDebuginfod.lib;
LLVMDebugInfoDWARF.lib;
LLVMDebugInfoGSYM.lib;
LLVMDebugInfoLogicalView.lib;
LLVMDebugInfoMSF.lib;
LLVMDebugInfoPDB.lib;
LLVMDemangle.lib;
LLVMDiff.lib;
LLVMDlltoolDriver.lib;
LLVMDWARFLinker.lib;
LLVMDWARFLinkerParallel.lib;
LLVMDWP.lib;
LLVMExecutionEngine.lib;
LLVMExegesis.lib;
LLVMExegesisX86.lib;
LLVMExtensions.lib;
LLVMFileCheck.lib;
LLVMFrontendHLSL.lib;
LLVMFrontendOpenACC.lib;
LLVMFrontendOpenMP.lib;
LLVMFuzzerCLI.lib;
LLVMFuzzMutate.lib;
LLVMGlobalISel.lib;
LLVMInstCombine.lib;
LLVMInstrumentation.lib;
LLVMIntelJITEvents.lib;
LLVMInterfaceStub.lib;
LLVMInterpreter.lib;
LLVMipo.lib;
LLVMIRPrinter.lib;
LLVMIRReader.lib;
LLVMJITLink.lib;
LLVMLibDriver.lib;
LLVMLineEditor.lib;
LLVMLinker.lib;
LLVMLTO.lib;
LLVMMCA.lib;
LLVMMCDisassembler.lib;
LLVMMCJIT.lib;
LLVMMC.lib;
LLVMMCParser.lib;
LLVMMIRParser.lib;
LLVMObjCARCOpts.lib;
LLVMObjCopy.lib;
LLVMObject.lib;
LLVMObjectYAML.lib;
LLVMOption.lib;
LLVMOrcJIT.lib;
LLVMOrcShared.lib;
LLVMOrcTargetProcess.lib;
LLVMPasses.lib;
LLVMProfileData.lib;
LLVMRemarks.lib;
LLVMRuntimeDyld.lib;
LLVMScalarOpts.lib;
LLVMSelectionDAG.lib;
LLVMSupport.lib;
LLVMSymbolize.lib;
LLVMTableGenGlobalISel.lib;
LLVMTableGen.lib;
LLVMTarget.lib;
LLVMTargetParser.lib;
LLVMTextAPI.lib;
LLVMTransformUtils.lib;
LLVMVectorize.lib;
LLVMWindowsDriver.lib;
LLVMWindowsManifest.lib;
LLVMX86AsmParser.lib;
LLVMX86CodeGen.lib;
LLVMX86Desc.lib;
LLVMX86Disassembler.lib;
LLVMX86Info.lib;
LLVMX86TargetMCA.lib;
LLVMXRay.lib;
</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemGroup />
</Project>
</Project>

View File

@ -17,54 +17,97 @@
<OptimizeReferences>true</OptimizeReferences>
<AdditionalLibraryDirectories Condition="'$(Configuration)'=='Release'">%(AdditionalLibraryDirectories);..\llvm_build\Release\lib</AdditionalLibraryDirectories>
<AdditionalDependencies>%(AdditionalDependencies);
LLVMProfileData.lib;
LLVMDebugInfoCodeView.lib;
LLVMDebugInfoMSF.lib;
LLVMInstrumentation.lib;
LLVMMCJIT.lib;
LLVMRuntimeDyld.lib;
LLVMVectorize.lib;
LLVMX86CodeGen.lib;
LLVMGlobalISel.lib;
LLVMX86Disassembler.lib;
LLVMExecutionEngine.lib;
LLVMAsmPrinter.lib;
LLVMSelectionDAG.lib;
LLVMCodeGen.lib;
LLVMScalarOpts.lib;
LLVMInstCombine.lib;
LLVMTransformUtils.lib;
LLVMAnalysis.lib;
LLVMTarget.lib;
LLVMX86Desc.lib;
LLVMObject.lib;
LLVMMCParser.lib;
LLVMBitReader.lib;
LLVMCore.lib;
LLVMMC.lib;
LLVMX86Info.lib;
LLVMSupport.lib;
LLVMMCDisassembler.lib;
LLVMipo.lib;
LLVMBinaryFormat.lib;
LLVMPasses.lib;
LLVMIRReader.lib;
LLVMLinker.lib;
LLVMAsmParser.lib;
LLVMX86AsmParser.lib;
LLVMDemangle.lib;
LLVMDebugInfoDWARF.lib;
LLVMRemarks.lib;
LLVMBitstreamReader.lib;
LLVMTextAPI.lib;
LLVMCFGuard.lib;
LLVMAggressiveInstCombine.lib;
LLVMBitWriter.lib;
LLVMCoroutines.lib;
LLVMObjCARCOpts.lib;
LLVMIntelJITEvents.lib;
LLVMAggressiveInstCombine.lib;
LLVMAnalysis.lib;
LLVMAsmParser.lib;
LLVMAsmPrinter.lib;
LLVMBinaryFormat.lib;
LLVMBitReader.lib;
LLVMBitstreamReader.lib;
LLVMBitWriter.lib;
LLVMCFGuard.lib;
LLVMCFIVerify.lib;
LLVMCodeGen.lib;
LLVMCore.lib;
LLVMCoroutines.lib;
LLVMCoverage.lib;
LLVMDebugInfoCodeView.lib;
LLVMDebuginfod.lib;
LLVMDebugInfoDWARF.lib;
LLVMDebugInfoGSYM.lib;
LLVMDebugInfoLogicalView.lib;
LLVMDebugInfoMSF.lib;
LLVMDebugInfoPDB.lib;
LLVMDemangle.lib;
LLVMDiff.lib;
LLVMDlltoolDriver.lib;
LLVMDWARFLinker.lib;
LLVMDWARFLinkerParallel.lib;
LLVMDWP.lib;
LLVMExecutionEngine.lib;
LLVMExegesis.lib;
LLVMExegesisX86.lib;
LLVMExtensions.lib;
LLVMFileCheck.lib;
LLVMFrontendHLSL.lib;
LLVMFrontendOpenACC.lib;
LLVMFrontendOpenMP.lib;
LLVMFuzzerCLI.lib;
LLVMFuzzMutate.lib;
LLVMGlobalISel.lib;
LLVMInstCombine.lib;
LLVMInstrumentation.lib;
LLVMIntelJITEvents.lib;
LLVMInterfaceStub.lib;
LLVMInterpreter.lib;
LLVMipo.lib;
LLVMIRPrinter.lib;
LLVMIRReader.lib;
LLVMJITLink.lib;
LLVMLibDriver.lib;
LLVMLineEditor.lib;
LLVMLinker.lib;
LLVMLTO.lib;
LLVMMCA.lib;
LLVMMCDisassembler.lib;
LLVMMCJIT.lib;
LLVMMC.lib;
LLVMMCParser.lib;
LLVMMIRParser.lib;
LLVMObjCARCOpts.lib;
LLVMObjCopy.lib;
LLVMObject.lib;
LLVMObjectYAML.lib;
LLVMOption.lib;
LLVMOrcJIT.lib;
LLVMOrcShared.lib;
LLVMOrcTargetProcess.lib;
LLVMPasses.lib;
LLVMProfileData.lib;
LLVMRemarks.lib;
LLVMRuntimeDyld.lib;
LLVMScalarOpts.lib;
LLVMSelectionDAG.lib;
LLVMSupport.lib;
LLVMSymbolize.lib;
LLVMTableGenGlobalISel.lib;
LLVMTableGen.lib;
LLVMTarget.lib;
LLVMTargetParser.lib;
LLVMTextAPI.lib;
LLVMTransformUtils.lib;
LLVMVectorize.lib;
LLVMWindowsDriver.lib;
LLVMWindowsManifest.lib;
LLVMX86AsmParser.lib;
LLVMX86CodeGen.lib;
LLVMX86Desc.lib;
LLVMX86Disassembler.lib;
LLVMX86Info.lib;
LLVMX86TargetMCA.lib;
LLVMXRay.lib;
</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemGroup />
</Project>
</Project>

2
llvm

@ -1 +1 @@
Subproject commit 9b52b6c39ae9f0759fbce7dd0db4b3290d6ebc56
Subproject commit 89d5468e9505ddb04754eadbfed526f5b6ad4cbd

View File

@ -75,6 +75,14 @@ cpu_translator::cpu_translator(llvm::Module* _module, bool is_be)
return result;
}
});
register_intrinsic("any_select_by_bit4", [&](llvm::CallInst* ci) -> llvm::Value*
{
const auto s = bitcast<s8[16]>(m_ir->CreateShl(bitcast<u64[2]>(ci->getOperand(0)), 3));;
const auto a = bitcast<u8[16]>(ci->getOperand(1));
const auto b = bitcast<u8[16]>(ci->getOperand(2));
return m_ir->CreateSelect(m_ir->CreateICmpSLT(s, llvm::ConstantAggregateZero::get(get_type<s8[16]>())), b, a);
});
}
void cpu_translator::initialize(llvm::LLVMContext& context, llvm::ExecutionEngine& engine)
@ -112,6 +120,8 @@ void cpu_translator::initialize(llvm::LLVMContext& context, llvm::ExecutionEngin
cpu == "broadwell" ||
cpu == "skylake" ||
cpu == "alderlake" ||
cpu == "raptorlake" ||
cpu == "meteorlake" ||
cpu == "bdver2" ||
cpu == "bdver3" ||
cpu == "bdver4" ||
@ -135,7 +145,9 @@ void cpu_translator::initialize(llvm::LLVMContext& context, llvm::ExecutionEngin
// Test VNNI feature (TODO)
if (cpu == "cascadelake" ||
cpu == "cooperlake" ||
cpu == "alderlake")
cpu == "alderlake" ||
cpu == "raptorlake" ||
cpu == "meteorlake")
{
m_use_vnni = true;
}

View File

@ -19,7 +19,9 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/IntrinsicsX86.h"
#include "llvm/IR/IntrinsicsAArch64.h"
@ -59,6 +61,62 @@ concept DSLValue = requires (T& v)
{ v.eval(std::declval<llvm::IRBuilder<>*>()) } -> LLVMValue;
};
template <usz N>
struct get_int_bits
{
};
template <>
struct get_int_bits<1>
{
using utype = bool;
};
template <>
struct get_int_bits<2>
{
using utype = i2;
};
template <>
struct get_int_bits<4>
{
using utype = i4;
};
template <>
struct get_int_bits<8>
{
using utype = u8;
};
template <>
struct get_int_bits<16>
{
using utype = u16;
};
template <>
struct get_int_bits<32>
{
using utype = u32;
};
template <>
struct get_int_bits<64>
{
using utype = u64;
};
template <>
struct get_int_bits<128>
{
using utype = u128;
};
template <usz Bits>
using get_int_vt = typename get_int_bits<Bits>::utype;
template <typename T = void>
struct llvm_value_t
{
@ -3292,10 +3350,41 @@ public:
// Infinite-precision shift left
template <typename T, typename U, typename CT = llvm_common_t<T, U>>
static auto inf_shl(T&& a, U&& b)
auto inf_shl(T&& a, U&& b)
{
static constexpr u32 esz = llvm_value_t<CT>::esize;
if constexpr (esz == 32)
{
#if defined(ARCH_X64)
if (m_use_fma && !llvm::isa<llvm::Constant>(b.eval(m_ir)))
return eval(llvm_calli<CT, T, U>{"llvm.x86.avx2.psllv.d", {std::forward<T>(a), std::forward<U>(b)}});
#endif
}
if constexpr (esz == 16)
{
#if defined(ARCH_X64)
if (m_use_avx512 && !llvm::isa<llvm::Constant>(b.eval(m_ir)))
return eval(llvm_calli<CT, T, U>{"llvm.x86.avx512.psllv.w.128", {std::forward<T>(a), std::forward<U>(b)}});
if (m_use_fma && !llvm::isa<llvm::Constant>(b.eval(m_ir)))
{
using t32 = value_t<u32[4]>;
auto a32 = eval(bitcast<u32[4]>(std::forward<T>(a)));
auto b32 = eval(bitcast<u32[4]>(std::forward<U>(b)));
auto sizeL = eval(b32 & 0xffff);
auto sizeH = eval(b32 >> 16);
auto dataL = eval(llvm_calli<u32[4], t32, t32>{"llvm.x86.avx2.psllv.d", {a32, sizeL}});
auto dataH = eval(llvm_calli<u32[4], t32, t32>{"llvm.x86.avx2.psllv.d", {eval(a32 & 0xffff0000), sizeH}});
return eval(bitcast<CT>((dataL & 0xffff) | dataH));
}
#endif
}
return eval(select(b < esz, a << b, splat<CT>(0)));
/*
return expr(select(b < esz, a << b, splat<CT>(0)), [](llvm::Value*& value, llvm::Module* _m) -> llvm_match_tuple<T, U>
{
static const auto M = match<CT>();
@ -3314,14 +3403,46 @@ public:
value = nullptr;
return {};
});
*/
}
// Infinite-precision logical shift right (unsigned)
template <typename T, typename U, typename CT = llvm_common_t<T, U>>
static auto inf_lshr(T&& a, U&& b)
auto inf_lshr(T&& a, U&& b)
{
static constexpr u32 esz = llvm_value_t<CT>::esize;
if constexpr (esz == 32)
{
#if defined(ARCH_X64)
if (m_use_fma && !llvm::isa<llvm::Constant>(b.eval(m_ir)))
return eval(llvm_calli<CT, T, U>{"llvm.x86.avx2.psrlv.d", {std::forward<T>(a), std::forward<U>(b)}});
#endif
}
if constexpr (esz == 16)
{
#if defined(ARCH_X64)
if (m_use_avx512 && !llvm::isa<llvm::Constant>(b.eval(m_ir)))
return eval(llvm_calli<CT, T, U>{"llvm.x86.avx512.psrlv.w.128", {std::forward<T>(a), std::forward<U>(b)}});
if (m_use_fma && !llvm::isa<llvm::Constant>(b.eval(m_ir)))
{
using t32 = value_t<u32[4]>;
auto a32 = eval(bitcast<u32[4]>(std::forward<T>(a)));
auto b32 = eval(bitcast<u32[4]>(std::forward<U>(b)));
auto sizeL = eval(b32 & 0xffff);
auto sizeH = eval(b32 >> 16);
auto dataL = eval(llvm_calli<u32[4], t32, t32>{"llvm.x86.avx2.psrlv.d", {eval(a32 & 0xffff), sizeL}});
auto dataH = eval(llvm_calli<u32[4], t32, t32>{"llvm.x86.avx2.psrlv.d", {a32, sizeH}});
return eval(bitcast<CT>(dataL | (dataH & 0xffff0000)));
}
#endif
}
return eval(select(b < esz, a >> b, splat<CT>(0)));
/*
return expr(select(b < esz, a >> b, splat<CT>(0)), [](llvm::Value*& value, llvm::Module* _m) -> llvm_match_tuple<T, U>
{
static const auto M = match<CT>();
@ -3340,14 +3461,46 @@ public:
value = nullptr;
return {};
});
*/
}
// Infinite-precision arithmetic shift right (signed)
template <typename T, typename U, typename CT = llvm_common_t<T, U>>
static auto inf_ashr(T&& a, U&& b)
auto inf_ashr(T&& a, U&& b)
{
static constexpr u32 esz = llvm_value_t<CT>::esize;
if constexpr (esz == 32)
{
#if defined(ARCH_X64)
if (m_use_fma && !llvm::isa<llvm::Constant>(b.eval(m_ir)))
return eval(llvm_calli<CT, T, U>{"llvm.x86.avx2.psrav.d", {std::forward<T>(a), std::forward<U>(b)}});
#endif
}
if constexpr (esz == 16)
{
#if defined(ARCH_X64)
if (m_use_avx512 && !llvm::isa<llvm::Constant>(b.eval(m_ir)))
return eval(llvm_calli<CT, T, U>{"llvm.x86.avx512.psrav.w.128", {std::forward<T>(a), std::forward<U>(b)}});
if (m_use_fma && !llvm::isa<llvm::Constant>(b.eval(m_ir)))
{
using t32 = value_t<u32[4]>;
auto a32 = eval(bitcast<u32[4]>(std::forward<T>(a)));
auto b32 = eval(bitcast<u32[4]>(std::forward<U>(b)));
auto sizeL = eval(b32 & 0xffff);
auto sizeH = eval(b32 >> 16);
auto dataL = eval(llvm_calli<u32[4], t32, t32>{"llvm.x86.avx2.psrav.d", {eval(a32 << 16), sizeL}});
auto dataH = eval(llvm_calli<u32[4], t32, t32>{"llvm.x86.avx2.psrav.d", {a32, sizeH}});
return eval(bitcast<CT>((dataL >> 16) | (dataH & 0xffff0000)));
}
#endif
}
return eval(a >> select(b > (esz - 1), splat<CT>(esz - 1), b));
/*
return expr(a >> select(b > (esz - 1), splat<CT>(esz - 1), b), [](llvm::Value*& value, llvm::Module* _m) -> llvm_match_tuple<T, U>
{
static const auto M = match<CT>();
@ -3366,6 +3519,7 @@ public:
value = nullptr;
return {};
});
*/
}
template <typename... Types>
@ -3567,6 +3721,18 @@ public:
template <typename T = v128>
llvm::Constant* make_const_vector(T, llvm::Type*, u32 = __builtin_LINE());
template <typename T>
llvm::KnownBits get_known_bits(T a)
{
return llvm::computeKnownBits(a.eval(m_ir), m_module->getDataLayout());
}
template <typename T>
llvm::KnownBits kbc(T value)
{
return llvm::KnownBits::makeConstant(llvm::APInt(sizeof(T) * 8, u64(value)));
}
private:
// Custom intrinsic table
std::unordered_map<std::string_view, std::function<llvm::Value*(llvm::CallInst*)>> m_intrinsics;
@ -3647,6 +3813,13 @@ public:
});
}
// (m << 3) >= 0 ? a : b
template <typename T, typename U, typename V>
static auto select_by_bit4(T&& m, U&& a, V&& b)
{
return llvm_calli<u8[16], T, U, V>{"any_select_by_bit4", {std::forward<T>(m), std::forward<U>(a), std::forward<V>(b)}};
}
template <typename T, typename = std::enable_if_t<std::is_same_v<llvm_common_t<T>, f32[4]>>>
static auto fre(T&& a)
{

View File

@ -2130,10 +2130,10 @@ static void ppu_check(ppu_thread& ppu, u64 addr)
{
ppu.cia = ::narrow<u32>(addr);
// ppu_check() shall not return directly
if (ppu.test_stopped())
{
return;
}
;
ppu_escape(&ppu);
}
static void ppu_trace(u64 addr)
@ -3368,13 +3368,6 @@ bool ppu_initialize(const ppu_module& info, bool check_only)
{
std::unordered_map<std::string, u64> link_table
{
{ "sys_game_watchdog_start", reinterpret_cast<u64>(ppu_execute_syscall) },
{ "sys_game_watchdog_stop", reinterpret_cast<u64>(ppu_execute_syscall) },
{ "sys_game_watchdog_clear", reinterpret_cast<u64>(ppu_execute_syscall) },
{ "sys_game_get_system_sw_version", reinterpret_cast<u64>(ppu_execute_syscall) },
{ "sys_game_board_storage_read", reinterpret_cast<u64>(ppu_execute_syscall) },
{ "sys_game_board_storage_write", reinterpret_cast<u64>(ppu_execute_syscall) },
{ "sys_game_get_rtc_status", reinterpret_cast<u64>(ppu_execute_syscall) },
{ "__trap", reinterpret_cast<u64>(&ppu_trap) },
{ "__error", reinterpret_cast<u64>(&ppu_error) },
{ "__check", reinterpret_cast<u64>(&ppu_check) },
@ -3388,6 +3381,7 @@ bool ppu_initialize(const ppu_module& info, bool check_only)
{ "__dcbz", reinterpret_cast<u64>(+[](u32 addr){ alignas(64) static constexpr u8 z[128]{}; do_cell_atomic_128_store(addr, z); }) },
{ "__resupdate", reinterpret_cast<u64>(vm::reservation_update) },
{ "__resinterp", reinterpret_cast<u64>(ppu_reservation_fallback) },
{ "__escape", reinterpret_cast<u64>(+ppu_escape) },
};
for (u64 index = 0; index < 1024; index++)
@ -3943,12 +3937,7 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module& module_part, co
std::unique_ptr<Module> _module = std::make_unique<Module>(obj_name, jit.get_context());
// Initialize target
#if defined(__APPLE__) && defined(ARCH_ARM64)
// Force target linux on macOS arm64 to bypass some 64-bit address space linking issues
_module->setTargetTriple(Triple::normalize(utils::c_llvm_default_triple));
#else
_module->setTargetTriple(Triple::normalize(sys::getProcessTriple()));
#endif
_module->setDataLayout(jit.get_engine().getTargetMachine()->createDataLayout());
// Initialize translator
@ -3978,6 +3967,11 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module& module_part, co
}
{
if (g_cfg.core.ppu_debug)
{
translator.build_interpreter();
}
legacy::FunctionPassManager pm(_module.get());
// Basic optimizations

View File

@ -200,7 +200,7 @@ Function* PPUTranslator::Translate(const ppu_function& info)
// Create tail call to the check function
m_ir->SetInsertPoint(vcheck);
Call(GetType<void>(), "__check", m_thread, GetAddr());
Call(GetType<void>(), "__check", m_thread, GetAddr())->setTailCall();
m_ir->CreateRetVoid();
}
else
@ -604,12 +604,12 @@ Value* PPUTranslator::ReadMemory(Value* addr, Type* type, bool is_be, u32 align)
{
// Read, byteswap, bitcast
const auto int_type = m_ir->getIntNTy(size);
const auto value = m_ir->CreateAlignedLoad(int_type, GetMemory(addr, int_type), llvm::MaybeAlign{align}, true);
const auto value = m_ir->CreateAlignedLoad(int_type, GetMemory(addr, int_type), llvm::MaybeAlign{align});
return bitcast(Call(int_type, fmt::format("llvm.bswap.i%u", size), value), type);
}
// Read normally
return m_ir->CreateAlignedLoad(type, GetMemory(addr, type), llvm::MaybeAlign{align}, true);
return m_ir->CreateAlignedLoad(type, GetMemory(addr, type), llvm::MaybeAlign{align});
}
void PPUTranslator::WriteMemory(Value* addr, Value* value, bool is_be, u32 align)
@ -625,7 +625,7 @@ void PPUTranslator::WriteMemory(Value* addr, Value* value, bool is_be, u32 align
}
// Write
m_ir->CreateAlignedStore(value, GetMemory(addr, value->getType()), llvm::MaybeAlign{align}, true);
m_ir->CreateAlignedStore(value, GetMemory(addr, value->getType()), llvm::MaybeAlign{align});
}
void PPUTranslator::CompilationError(const std::string& error)
@ -1945,12 +1945,14 @@ void PPUTranslator::SC(ppu_opcode_t op)
if (index < 1024)
{
Call(GetType<void>(), fmt::format("%s", ppu_syscall_code(index)), m_thread);
//Call(GetType<void>(), "__escape", m_thread)->setTailCall();
m_ir->CreateRetVoid();
return;
}
}
Call(GetType<void>(), op.lev ? "__lv1call" : "__syscall", m_thread, num);
//Call(GetType<void>(), "__escape", m_thread)->setTailCall();
m_ir->CreateRetVoid();
}
@ -2507,6 +2509,7 @@ void PPUTranslator::LWARX(ppu_opcode_t op)
RegStore(Trunc(GetAddr()), m_cia);
FlushRegisters();
Call(GetType<void>(), "__resinterp", m_thread);
//Call(GetType<void>(), "__escape", m_thread)->setTailCall();
m_ir->CreateRetVoid();
return;
}
@ -2649,6 +2652,7 @@ void PPUTranslator::LDARX(ppu_opcode_t op)
RegStore(Trunc(GetAddr()), m_cia);
FlushRegisters();
Call(GetType<void>(), "__resinterp", m_thread);
//Call(GetType<void>(), "__escape", m_thread)->setTailCall();
m_ir->CreateRetVoid();
return;
}
@ -2786,11 +2790,7 @@ void PPUTranslator::MTOCRF(ppu_opcode_t op)
const auto index = m_ir->CreateAnd(m_ir->CreateLShr(value, 28 - i * 4), 15);
const auto src = m_ir->CreateGEP(dyn_cast<GlobalVariable>(m_mtocr_table)->getValueType(), m_mtocr_table, {m_ir->getInt32(0), m_ir->CreateShl(index, 2)});
const auto dst = bitcast(m_ir->CreateStructGEP(m_thread_type, m_thread, static_cast<uint>(m_cr - m_locals) + i * 4), GetType<u8*>());
#if LLVM_VERSION_MAJOR < 15
Call(GetType<void>(), "llvm.memcpy.p0i8.p0i8.i32", dst, src, m_ir->getInt32(4), m_ir->getFalse());
#else
Call(GetType<void>(), "llvm.memcpy.p0.p0.i32", dst, src, m_ir->getInt32(4), m_ir->getFalse());
#endif
}
}
}
@ -3313,7 +3313,7 @@ void PPUTranslator::STVLX(ppu_opcode_t op)
const auto mask = bitcast<bool[16]>(splat<u16>(0xffff) << trunc<u16>(value<u64>(addr) & 0xf));
const auto ptr = value<u8(*)[16]>(GetMemory(m_ir->CreateAnd(addr, ~0xfull), GetType<u8[16]>()));
const auto align = splat<u32>(16);
eval(llvm_calli<void, decltype(data), decltype(ptr), decltype(align), decltype(mask)>{"llvm.masked.store.v16i8.p0v16i8", {data, ptr, align, mask}});
eval(llvm_calli<void, decltype(data), decltype(ptr), decltype(align), decltype(mask)>{"llvm.masked.store.v16i8.p0", {data, ptr, align, mask}});
}
void PPUTranslator::STDBRX(ppu_opcode_t op)
@ -3343,7 +3343,7 @@ void PPUTranslator::STVRX(ppu_opcode_t op)
const auto mask = bitcast<bool[16]>(trunc<u16>(splat<u64>(0xffff) << (value<u64>(addr) & 0xf) >> 16));
const auto ptr = value<u8(*)[16]>(GetMemory(m_ir->CreateAnd(addr, ~0xfull), GetType<u8[16]>()));
const auto align = splat<u32>(16);
eval(llvm_calli<void, decltype(data), decltype(ptr), decltype(align), decltype(mask)>{"llvm.masked.store.v16i8.p0v16i8", {data, ptr, align, mask}});
eval(llvm_calli<void, decltype(data), decltype(ptr), decltype(align), decltype(mask)>{"llvm.masked.store.v16i8.p0", {data, ptr, align, mask}});
}
void PPUTranslator::STFSUX(ppu_opcode_t op)
@ -3524,7 +3524,7 @@ void PPUTranslator::DCBZ(ppu_opcode_t op)
}
else
{
Call(GetType<void>(), "llvm.memset.p0i8.i32", GetMemory(addr, GetType<u8>()), m_ir->getInt8(0), m_ir->getInt32(128), m_ir->getTrue());
Call(GetType<void>(), "llvm.memset.p0.i32", GetMemory(addr, GetType<u8>()), m_ir->getInt8(0), m_ir->getInt32(128), m_ir->getFalse());
}
}
@ -4601,6 +4601,7 @@ void PPUTranslator::UNK(ppu_opcode_t op)
{
FlushRegisters();
Call(GetType<void>(), "__error", m_thread, GetAddr(), m_ir->getInt32(op.opcode));
//Call(GetType<void>(), "__escape", m_thread)->setTailCall();
m_ir->CreateRetVoid();
}
@ -4862,6 +4863,7 @@ Value* PPUTranslator::CheckTrapCondition(u32 to, Value* left, Value* right)
void PPUTranslator::Trap()
{
Call(GetType<void>(), "__trap", m_thread, GetAddr());
//Call(GetType<void>(), "__escape", m_thread)->setTailCall();
m_ir->CreateRetVoid();
}
@ -4909,4 +4911,184 @@ MDNode* PPUTranslator::CheckBranchProbability(u32 bo)
return nullptr;
}
void PPUTranslator::build_interpreter()
{
#define BUILD_VEC_INST(i) { \
m_function = llvm::cast<llvm::Function>(m_module->getOrInsertFunction("op_" #i, get_type<void>(), m_thread_type->getPointerTo()).getCallee()); \
std::fill(std::begin(m_globals), std::end(m_globals), nullptr); \
std::fill(std::begin(m_locals), std::end(m_locals), nullptr); \
IRBuilder<> irb(BasicBlock::Create(m_context, "__entry", m_function)); \
m_ir = &irb; \
m_thread = m_function->getArg(0); \
ppu_opcode_t op{}; \
op.vd = 0; \
op.va = 1; \
op.vb = 2; \
op.vc = 3; \
this->i(op); \
FlushRegisters(); \
m_ir->CreateRetVoid(); \
replace_intrinsics(*m_function); \
}
BUILD_VEC_INST(VADDCUW);
BUILD_VEC_INST(VADDFP);
BUILD_VEC_INST(VADDSBS);
BUILD_VEC_INST(VADDSHS);
BUILD_VEC_INST(VADDSWS);
BUILD_VEC_INST(VADDUBM);
BUILD_VEC_INST(VADDUBS);
BUILD_VEC_INST(VADDUHM);
BUILD_VEC_INST(VADDUHS);
BUILD_VEC_INST(VADDUWM);
BUILD_VEC_INST(VADDUWS);
BUILD_VEC_INST(VAND);
BUILD_VEC_INST(VANDC);
BUILD_VEC_INST(VAVGSB);
BUILD_VEC_INST(VAVGSH);
BUILD_VEC_INST(VAVGSW);
BUILD_VEC_INST(VAVGUB);
BUILD_VEC_INST(VAVGUH);
BUILD_VEC_INST(VAVGUW);
BUILD_VEC_INST(VCFSX);
BUILD_VEC_INST(VCFUX);
BUILD_VEC_INST(VCMPBFP);
BUILD_VEC_INST(VCMPBFP_);
BUILD_VEC_INST(VCMPEQFP);
BUILD_VEC_INST(VCMPEQFP_);
BUILD_VEC_INST(VCMPEQUB);
BUILD_VEC_INST(VCMPEQUB_);
BUILD_VEC_INST(VCMPEQUH);
BUILD_VEC_INST(VCMPEQUH_);
BUILD_VEC_INST(VCMPEQUW);
BUILD_VEC_INST(VCMPEQUW_);
BUILD_VEC_INST(VCMPGEFP);
BUILD_VEC_INST(VCMPGEFP_);
BUILD_VEC_INST(VCMPGTFP);
BUILD_VEC_INST(VCMPGTFP_);
BUILD_VEC_INST(VCMPGTSB);
BUILD_VEC_INST(VCMPGTSB_);
BUILD_VEC_INST(VCMPGTSH);
BUILD_VEC_INST(VCMPGTSH_);
BUILD_VEC_INST(VCMPGTSW);
BUILD_VEC_INST(VCMPGTSW_);
BUILD_VEC_INST(VCMPGTUB);
BUILD_VEC_INST(VCMPGTUB_);
BUILD_VEC_INST(VCMPGTUH);
BUILD_VEC_INST(VCMPGTUH_);
BUILD_VEC_INST(VCMPGTUW);
BUILD_VEC_INST(VCMPGTUW_);
BUILD_VEC_INST(VCTSXS);
BUILD_VEC_INST(VCTUXS);
BUILD_VEC_INST(VEXPTEFP);
BUILD_VEC_INST(VLOGEFP);
BUILD_VEC_INST(VMADDFP);
BUILD_VEC_INST(VMAXFP);
BUILD_VEC_INST(VMAXSB);
BUILD_VEC_INST(VMAXSH);
BUILD_VEC_INST(VMAXSW);
BUILD_VEC_INST(VMAXUB);
BUILD_VEC_INST(VMAXUH);
BUILD_VEC_INST(VMAXUW);
BUILD_VEC_INST(VMHADDSHS);
BUILD_VEC_INST(VMHRADDSHS);
BUILD_VEC_INST(VMINFP);
BUILD_VEC_INST(VMINSB);
BUILD_VEC_INST(VMINSH);
BUILD_VEC_INST(VMINSW);
BUILD_VEC_INST(VMINUB);
BUILD_VEC_INST(VMINUH);
BUILD_VEC_INST(VMINUW);
BUILD_VEC_INST(VMLADDUHM);
BUILD_VEC_INST(VMRGHB);
BUILD_VEC_INST(VMRGHH);
BUILD_VEC_INST(VMRGHW);
BUILD_VEC_INST(VMRGLB);
BUILD_VEC_INST(VMRGLH);
BUILD_VEC_INST(VMRGLW);
BUILD_VEC_INST(VMSUMMBM);
BUILD_VEC_INST(VMSUMSHM);
BUILD_VEC_INST(VMSUMSHS);
BUILD_VEC_INST(VMSUMUBM);
BUILD_VEC_INST(VMSUMUHM);
BUILD_VEC_INST(VMSUMUHS);
BUILD_VEC_INST(VMULESB);
BUILD_VEC_INST(VMULESH);
BUILD_VEC_INST(VMULEUB);
BUILD_VEC_INST(VMULEUH);
BUILD_VEC_INST(VMULOSB);
BUILD_VEC_INST(VMULOSH);
BUILD_VEC_INST(VMULOUB);
BUILD_VEC_INST(VMULOUH);
BUILD_VEC_INST(VNMSUBFP);
BUILD_VEC_INST(VNOR);
BUILD_VEC_INST(VOR);
BUILD_VEC_INST(VPERM);
BUILD_VEC_INST(VPKPX);
BUILD_VEC_INST(VPKSHSS);
BUILD_VEC_INST(VPKSHUS);
BUILD_VEC_INST(VPKSWSS);
BUILD_VEC_INST(VPKSWUS);
BUILD_VEC_INST(VPKUHUM);
BUILD_VEC_INST(VPKUHUS);
BUILD_VEC_INST(VPKUWUM);
BUILD_VEC_INST(VPKUWUS);
BUILD_VEC_INST(VREFP);
BUILD_VEC_INST(VRFIM);
BUILD_VEC_INST(VRFIN);
BUILD_VEC_INST(VRFIP);
BUILD_VEC_INST(VRFIZ);
BUILD_VEC_INST(VRLB);
BUILD_VEC_INST(VRLH);
BUILD_VEC_INST(VRLW);
BUILD_VEC_INST(VRSQRTEFP);
BUILD_VEC_INST(VSEL);
BUILD_VEC_INST(VSL);
BUILD_VEC_INST(VSLB);
BUILD_VEC_INST(VSLDOI);
BUILD_VEC_INST(VSLH);
BUILD_VEC_INST(VSLO);
BUILD_VEC_INST(VSLW);
BUILD_VEC_INST(VSPLTB);
BUILD_VEC_INST(VSPLTH);
BUILD_VEC_INST(VSPLTISB);
BUILD_VEC_INST(VSPLTISH);
BUILD_VEC_INST(VSPLTISW);
BUILD_VEC_INST(VSPLTW);
BUILD_VEC_INST(VSR);
BUILD_VEC_INST(VSRAB);
BUILD_VEC_INST(VSRAH);
BUILD_VEC_INST(VSRAW);
BUILD_VEC_INST(VSRB);
BUILD_VEC_INST(VSRH);
BUILD_VEC_INST(VSRO);
BUILD_VEC_INST(VSRW);
BUILD_VEC_INST(VSUBCUW);
BUILD_VEC_INST(VSUBFP);
BUILD_VEC_INST(VSUBSBS);
BUILD_VEC_INST(VSUBSHS);
BUILD_VEC_INST(VSUBSWS);
BUILD_VEC_INST(VSUBUBM);
BUILD_VEC_INST(VSUBUBS);
BUILD_VEC_INST(VSUBUHM);
BUILD_VEC_INST(VSUBUHS);
BUILD_VEC_INST(VSUBUWM);
BUILD_VEC_INST(VSUBUWS);
BUILD_VEC_INST(VSUMSWS);
BUILD_VEC_INST(VSUM2SWS);
BUILD_VEC_INST(VSUM4SBS);
BUILD_VEC_INST(VSUM4SHS);
BUILD_VEC_INST(VSUM4UBS);
BUILD_VEC_INST(VUPKHPX);
BUILD_VEC_INST(VUPKHSB);
BUILD_VEC_INST(VUPKHSH);
BUILD_VEC_INST(VUPKLPX);
BUILD_VEC_INST(VUPKLSB);
BUILD_VEC_INST(VUPKLSH);
BUILD_VEC_INST(VXOR);
#undef BUILD_VEC_INST
}
#endif

View File

@ -856,6 +856,8 @@ public:
void FCTID_(ppu_opcode_t op) { return FCTID(op); }
void FCTIDZ_(ppu_opcode_t op) { return FCTIDZ(op); }
void FCFID_(ppu_opcode_t op) { return FCFID(op); }
void build_interpreter();
};
#endif

View File

@ -3914,6 +3914,7 @@ void spu_recompiler_base::dump(const spu_program& result, std::string& out)
#if LLVM_VERSION_MAJOR < 17
#include "llvm/ADT/Triple.h"
#endif
#include "llvm/Support/Host.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Verifier.h"
#include "llvm/IR/InlineAsm.h"
@ -5006,7 +5007,11 @@ public:
// Create LLVM module
std::unique_ptr<Module> _module = std::make_unique<Module>(m_hash + ".obj", m_context);
_module->setTargetTriple(Triple::normalize(utils::c_llvm_default_triple));
#if defined(_WIN32) && defined(ARCH_X64)
_module->setTargetTriple(Triple::normalize("x86_64-unknown-linux-gnu"));
#else
_module->setTargetTriple(Triple::normalize(sys::getProcessTriple()));
#endif
_module->setDataLayout(m_jit.get_engine().getTargetMachine()->createDataLayout());
m_module = _module.get();
@ -5227,6 +5232,7 @@ public:
m_ir->CreateRetVoid();
m_ir->SetInsertPoint(label_stop);
call("spu_escape", spu_runtime::g_escape, m_thread)->setTailCall();
m_ir->CreateRetVoid();
m_ir->SetInsertPoint(label_diff);
@ -5681,7 +5687,11 @@ public:
// Create LLVM module
std::unique_ptr<Module> _module = std::make_unique<Module>("spu_interpreter.obj", m_context);
_module->setTargetTriple(Triple::normalize(utils::c_llvm_default_triple));
#if defined(_WIN32) && defined(ARCH_X64)
_module->setTargetTriple(Triple::normalize("x86_64-unknown-linux-gnu"));
#else
_module->setTargetTriple(Triple::normalize(sys::getProcessTriple()));
#endif
_module->setDataLayout(m_jit.get_engine().getTargetMachine()->createDataLayout());
m_module = _module.get();
@ -5982,7 +5992,8 @@ public:
ncall->setTailCall();
m_ir->CreateRetVoid();
m_ir->SetInsertPoint(_stop);
m_ir->CreateStore(m_interp_pc, spu_ptr<u32>(&spu_thread::pc));
m_ir->CreateStore(m_interp_pc, spu_ptr<u32>(&spu_thread::pc), true);
call("spu_escape", spu_runtime::g_escape, m_thread)->setTailCall();
m_ir->CreateRetVoid();
}
}
@ -6355,7 +6366,7 @@ public:
llvm::Value* get_rchcnt(u32 off, u64 inv = 0)
{
const auto val = m_ir->CreateLoad(get_type<u64>(), _ptr<u64>(m_thread, off), true);
const auto val = m_ir->CreateLoad(get_type<u64>(), _ptr<u64>(m_thread, off));
const auto shv = m_ir->CreateLShr(val, spu_channel::off_count);
return m_ir->CreateTrunc(m_ir->CreateXor(shv, u64{inv}), get_type<u32>());
}
@ -6415,20 +6426,20 @@ public:
}
case MFC_Cmd:
{
res.value = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::mfc_size), true);
res.value = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::mfc_size));
res.value = m_ir->CreateSub(m_ir->getInt32(16), res.value);
break;
}
case SPU_RdInMbox:
{
res.value = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::ch_in_mbox), true);
res.value = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::ch_in_mbox));
res.value = m_ir->CreateLShr(res.value, 8);
res.value = m_ir->CreateAnd(res.value, 7);
break;
}
case SPU_RdEventStat:
{
const auto mask = m_ir->CreateTrunc(m_ir->CreateLShr(m_ir->CreateLoad(get_type<u64>(), spu_ptr<u64>(&spu_thread::ch_events), true), 32), get_type<u32>());
const auto mask = m_ir->CreateTrunc(m_ir->CreateLShr(m_ir->CreateLoad(get_type<u64>(), spu_ptr<u64>(&spu_thread::ch_events)), 32), get_type<u32>());
res.value = call("spu_get_events", &exec_get_events, m_thread, mask);
break;
}
@ -6815,7 +6826,7 @@ public:
if (csize > 0 && csize <= 16)
{
// Generate single copy operation
m_ir->CreateStore(m_ir->CreateLoad(vtype, m_ir->CreateBitCast(src, vtype->getPointerTo()), true), m_ir->CreateBitCast(dst, vtype->getPointerTo()), true);
m_ir->CreateStore(m_ir->CreateLoad(vtype, m_ir->CreateBitCast(src, vtype->getPointerTo())), m_ir->CreateBitCast(dst, vtype->getPointerTo()));
}
else if (csize <= stride * 16 && !(csize % 32))
{
@ -6826,7 +6837,7 @@ public:
const auto _dst = m_ir->CreateGEP(get_type<u8>(), dst, m_ir->getInt32(i));
if (csize - i < stride)
{
m_ir->CreateStore(m_ir->CreateLoad(get_type<u8[16]>(), m_ir->CreateBitCast(_src, get_type<u8(*)[16]>()), true), m_ir->CreateBitCast(_dst, get_type<u8(*)[16]>()), true);
m_ir->CreateStore(m_ir->CreateLoad(get_type<u8[16]>(), m_ir->CreateBitCast(_src, get_type<u8(*)[16]>())), m_ir->CreateBitCast(_dst, get_type<u8(*)[16]>()));
}
else
{
@ -7086,18 +7097,51 @@ public:
void ROTM(spu_opcode_t op)
{
const auto [a, b] = get_vrs<u32[4]>(op.ra, op.rb);
set_vr(op.rt, inf_lshr(a, -b & 63));
auto minusb = eval(-b);
if (auto [ok, x] = match_expr(b, -match<u32[4]>()); ok)
{
minusb = eval(x);
}
if (auto k = get_known_bits(minusb); (k & kbc<u32>(32)).isZero())
{
set_vr(op.rt, a >> (minusb & 31));
return;
}
set_vr(op.rt, inf_lshr(a, minusb & 63));
}
void ROTMA(spu_opcode_t op)
{
const auto [a, b] = get_vrs<s32[4]>(op.ra, op.rb);
set_vr(op.rt, inf_ashr(a, -b & 63));
auto minusb = eval(-b);
if (auto [ok, x] = match_expr(b, -match<s32[4]>()); ok)
{
minusb = eval(x);
}
if (auto k = get_known_bits(minusb); (k & kbc<u32>(32)).isZero())
{
set_vr(op.rt, a >> (minusb & 31));
return;
}
set_vr(op.rt, inf_ashr(a, minusb & 63));
}
void SHL(spu_opcode_t op)
{
const auto [a, b] = get_vrs<u32[4]>(op.ra, op.rb);
if (auto k = get_known_bits(b); (k & kbc<u32>(32)).isZero())
{
set_vr(op.rt, a << (b & 31));
return;
}
set_vr(op.rt, inf_shl(a, b & 63));
}
@ -7110,18 +7154,51 @@ public:
void ROTHM(spu_opcode_t op)
{
const auto [a, b] = get_vrs<u16[8]>(op.ra, op.rb);
set_vr(op.rt, inf_lshr(a, -b & 31));
auto minusb = eval(-b);
if (auto [ok, x] = match_expr(b, -match<u16[8]>()); ok)
{
minusb = eval(x);
}
if (auto k = get_known_bits(minusb); (k & kbc<u16>(16)).isZero())
{
set_vr(op.rt, a >> (minusb & 15));
return;
}
set_vr(op.rt, inf_lshr(a, minusb & 31));
}
void ROTMAH(spu_opcode_t op)
{
const auto [a, b] = get_vrs<s16[8]>(op.ra, op.rb);
set_vr(op.rt, inf_ashr(a, -b & 31));
auto minusb = eval(-b);
if (auto [ok, x] = match_expr(b, -match<s16[8]>()); ok)
{
minusb = eval(x);
}
if (auto k = get_known_bits(minusb); (k & kbc<u16>(16)).isZero())
{
set_vr(op.rt, a >> (minusb & 15));
return;
}
set_vr(op.rt, inf_ashr(a, minusb & 31));
}
void SHLH(spu_opcode_t op)
{
const auto [a, b] = get_vrs<u16[8]>(op.ra, op.rb);
if (auto k = get_known_bits(b); (k & kbc<u16>(16)).isZero())
{
set_vr(op.rt, a << (b & 15));
return;
}
set_vr(op.rt, inf_shl(a, b & 31));
}
@ -8093,6 +8170,12 @@ public:
}
}
if (auto [ok, y] = match_expr(x, bitcast<bool[std::extent_v<VT>]>(match<get_int_vt<std::extent_v<VT>>>())); ok)
{
// Don't ruin FSMB/FSM/FSMH instructions
return false;
}
set_vr(op.rt4, select(x, get_vr<VT>(op.rb), get_vr<VT>(op.ra)));
return true;
}
@ -8337,9 +8420,9 @@ public:
const auto bx = pshufb(bs, c);
if (perm_only)
set_vr(op.rt4, select(noncast<s8[16]>(c << 3) >= 0, ax, bx));
set_vr(op.rt4, select_by_bit4(c, ax, bx));
else
set_vr(op.rt4, select(noncast<s8[16]>(c << 3) >= 0, ax, bx) | x);
set_vr(op.rt4, select_by_bit4(c, ax, bx) | x);
return;
}
@ -8352,9 +8435,9 @@ public:
const auto ax = pshufb(as, c);
if (perm_only)
set_vr(op.rt4, select(noncast<s8[16]>(c << 3) >= 0, ax, b));
set_vr(op.rt4, select_by_bit4(c, ax, b));
else
set_vr(op.rt4, select(noncast<s8[16]>(c << 3) >= 0, ax, b) | x);
set_vr(op.rt4, select_by_bit4(c, ax, b) | x);
return;
}
}
@ -8371,9 +8454,9 @@ public:
const auto bx = pshufb(bs, c);
if (perm_only)
set_vr(op.rt4, select(noncast<s8[16]>(c << 3) >= 0, a, bx));
set_vr(op.rt4, select_by_bit4(c, a, bx));
else
set_vr(op.rt4, select(noncast<s8[16]>(c << 3) >= 0, a, bx) | x);
set_vr(op.rt4, select_by_bit4(c, a, bx) | x);
return;
}
}
@ -8401,9 +8484,9 @@ public:
const auto bx = pshufb(b, cr);
if (perm_only)
set_vr(op.rt4, select(noncast<s8[16]>(cr << 3) >= 0, ax, bx));
set_vr(op.rt4, select_by_bit4(cr, ax, bx));
else
set_vr(op.rt4, select(noncast<s8[16]>(cr << 3) >= 0, ax, bx) | x);
set_vr(op.rt4, select_by_bit4(cr, ax, bx) | x);
}
void MPYA(spu_opcode_t op)
@ -9611,13 +9694,13 @@ public:
void make_store_ls(value_t<u64> addr, value_t<u8[16]> data)
{
const auto bswapped = byteswap(data);
m_ir->CreateStore(bswapped.eval(m_ir), m_ir->CreateBitCast(m_ir->CreateGEP(get_type<u8>(), m_lsptr, addr.value), get_type<u8(*)[16]>()), true);
m_ir->CreateStore(bswapped.eval(m_ir), m_ir->CreateBitCast(m_ir->CreateGEP(get_type<u8>(), m_lsptr, addr.value), get_type<u8(*)[16]>()));
}
auto make_load_ls(value_t<u64> addr)
{
value_t<u8[16]> data;
data.value = m_ir->CreateLoad(get_type<u8[16]>(), m_ir->CreateBitCast(m_ir->CreateGEP(get_type<u8>(), m_lsptr, addr.value), get_type<u8(*)[16]>()), true);
data.value = m_ir->CreateLoad(get_type<u8[16]>(), m_ir->CreateBitCast(m_ir->CreateGEP(get_type<u8>(), m_lsptr, addr.value), get_type<u8(*)[16]>()));
return byteswap(data);
}
@ -9839,7 +9922,7 @@ public:
target->addIncoming(e_addr, e_exec);
m_ir->CreateCondBr(get_imm<bool>(op.d).value, d_exec, d_done, m_md_unlikely);
m_ir->SetInsertPoint(d_exec);
m_ir->CreateStore(m_ir->getFalse(), spu_ptr<bool>(&spu_thread::interrupts_enabled))->setVolatile(true);
m_ir->CreateStore(m_ir->getFalse(), spu_ptr<bool>(&spu_thread::interrupts_enabled));
m_ir->CreateBr(d_done);
m_ir->SetInsertPoint(d_done);
m_ir->CreateBr(m_interp_bblock);
@ -9890,7 +9973,7 @@ public:
if (op.d)
{
m_ir->CreateStore(m_ir->getFalse(), spu_ptr<bool>(&spu_thread::interrupts_enabled))->setVolatile(true);
m_ir->CreateStore(m_ir->getFalse(), spu_ptr<bool>(&spu_thread::interrupts_enabled));
}
m_ir->CreateStore(addr.value, spu_ptr<u32>(&spu_thread::pc));
@ -10211,7 +10294,7 @@ public:
// Exit function on unexpected target
m_ir->SetInsertPoint(sw->getDefaultDest());
m_ir->CreateStore(addr.value, spu_ptr<u32>(&spu_thread::pc), true);
m_ir->CreateStore(addr.value, spu_ptr<u32>(&spu_thread::pc));
if (m_finfo && m_finfo->fn)
{

View File

@ -136,7 +136,7 @@ namespace psf
{
std::string_view value{value_array, CharN};
value = value.substr(0, std::min<usz>(value.find_first_of('\0'), value.size()));
return string(CharN, value, allow_truncate);
return string(max_size, value, allow_truncate);
}
// Make array entry

View File

@ -71,16 +71,4 @@ namespace utils
u64 _get_main_tid();
inline const u64 main_tid = _get_main_tid();
#ifdef LLVM_AVAILABLE
#if defined(ARCH_X64)
const std::string c_llvm_default_triple = "x86_64-unknown-linux-gnu";
#elif defined(ARCH_ARM64)
const std::string c_llvm_default_triple = "arm64-unknown-linux-gnu";
#else
const std::string c_llvm_default_triple = "Unimplemented!"
#endif
#endif
}

View File

@ -115,7 +115,7 @@ namespace std
}
#endif
#if defined(__INTELLISENSE__)
#if defined(__INTELLISENSE__) || (defined (__clang__) && (__clang_major__ <= 16))
#define consteval constexpr
#define constinit
#endif