Update ASMJIT (new upstream API)

This commit is contained in:
Nekotekina 2021-12-28 22:25:36 +03:00
parent 900d7df40f
commit cb2748ae08
15 changed files with 550 additions and 458 deletions

2
.gitmodules vendored
View File

@ -5,7 +5,7 @@
[submodule "asmjit"]
path = 3rdparty/asmjit/asmjit
url = ../../asmjit/asmjit.git
branch = oldstable
branch = aarch64
ignore = dirty
[submodule "llvm"]
path = llvm

@ -1 +1 @@
Subproject commit 723f58581afc0f4cb16ba13396ff77e425896847
Subproject commit eae7197fce03fd52a6e71ca89207a88ce270fb1a

View File

@ -11,66 +11,146 @@
</ProjectConfiguration>
</ItemGroup>
<ItemGroup>
<ClCompile Include="asmjit\src\asmjit\base\arch.cpp" />
<ClCompile Include="asmjit\src\asmjit\base\assembler.cpp" />
<ClCompile Include="asmjit\src\asmjit\base\codebuilder.cpp" />
<ClCompile Include="asmjit\src\asmjit\base\codecompiler.cpp" />
<ClCompile Include="asmjit\src\asmjit\base\codeemitter.cpp" />
<ClCompile Include="asmjit\src\asmjit\base\codeholder.cpp" />
<ClCompile Include="asmjit\src\asmjit\base\constpool.cpp" />
<ClCompile Include="asmjit\src\asmjit\base\cpuinfo.cpp" />
<ClCompile Include="asmjit\src\asmjit\base\func.cpp" />
<ClCompile Include="asmjit\src\asmjit\base\globals.cpp" />
<ClCompile Include="asmjit\src\asmjit\base\inst.cpp" />
<ClCompile Include="asmjit\src\asmjit\base\logging.cpp" />
<ClCompile Include="asmjit\src\asmjit\base\osutils.cpp" />
<ClCompile Include="asmjit\src\asmjit\base\operand.cpp" />
<ClCompile Include="asmjit\src\asmjit\base\regalloc.cpp" />
<ClCompile Include="asmjit\src\asmjit\base\runtime.cpp" />
<ClCompile Include="asmjit\src\asmjit\base\string.cpp" />
<ClCompile Include="asmjit\src\asmjit\base\vmem.cpp" />
<ClCompile Include="asmjit\src\asmjit\base\zone.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\archtraits.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\assembler.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\builder.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\codeholder.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\codewriter.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\compiler.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\constpool.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\cpuinfo.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\emithelper.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\emitter.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\emitterutils.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\environment.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\errorhandler.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\formatter.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\funcargscontext.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\func.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\globals.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\inst.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\jitallocator.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\jitruntime.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\logger.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\operand.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\osutils.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\ralocal.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\rapass.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\rastack.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\string.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\support.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\target.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\type.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\virtmem.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\zone.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\zonehash.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\zonelist.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\zonestack.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\zonetree.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\zonevector.cpp" />
<ClCompile Include="asmjit\src\asmjit\x86\x86assembler.cpp" />
<ClCompile Include="asmjit\src\asmjit\x86\x86builder.cpp" />
<ClCompile Include="asmjit\src\asmjit\x86\x86compiler.cpp" />
<ClCompile Include="asmjit\src\asmjit\x86\x86inst.cpp" />
<ClCompile Include="asmjit\src\asmjit\x86\x86instimpl.cpp" />
<ClCompile Include="asmjit\src\asmjit\x86\x86internal.cpp" />
<ClCompile Include="asmjit\src\asmjit\x86\x86logging.cpp" />
<ClCompile Include="asmjit\src\asmjit\x86\x86emithelper.cpp" />
<ClCompile Include="asmjit\src\asmjit\x86\x86formatter.cpp" />
<ClCompile Include="asmjit\src\asmjit\x86\x86func.cpp" />
<ClCompile Include="asmjit\src\asmjit\x86\x86instapi.cpp" />
<ClCompile Include="asmjit\src\asmjit\x86\x86instdb.cpp" />
<ClCompile Include="asmjit\src\asmjit\x86\x86operand.cpp" />
<ClCompile Include="asmjit\src\asmjit\x86\x86operand_regs.cpp" />
<ClCompile Include="asmjit\src\asmjit\x86\x86regalloc.cpp" />
<ClCompile Include="asmjit\src\asmjit\x86\x86rapass.cpp" />
<ClCompile Include="asmjit\src\asmjit\arm\a64assembler.cpp" />
<ClCompile Include="asmjit\src\asmjit\arm\a64builder.cpp" />
<ClCompile Include="asmjit\src\asmjit\arm\a64compiler.cpp" />
<ClCompile Include="asmjit\src\asmjit\arm\a64emithelper.cpp" />
<ClCompile Include="asmjit\src\asmjit\arm\a64instapi.cpp" />
<ClCompile Include="asmjit\src\asmjit\arm\a64instdb.cpp" />
<ClCompile Include="asmjit\src\asmjit\arm\a64operand.cpp" />
<ClCompile Include="asmjit\src\asmjit\arm\a64rapass.cpp" />
<ClCompile Include="asmjit\src\asmjit\arm\armformatter.cpp" />
<ClCompile Include="asmjit\src\asmjit\arm\armfunc.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="asmjit\src\asmjit\base\arch.h" />
<ClInclude Include="asmjit\src\asmjit\base\assembler.h" />
<ClInclude Include="asmjit\src\asmjit\base\codebuilder.h" />
<ClInclude Include="asmjit\src\asmjit\base\codecompiler.h" />
<ClInclude Include="asmjit\src\asmjit\base\codeemitter.h" />
<ClInclude Include="asmjit\src\asmjit\base\codeholder.h" />
<ClInclude Include="asmjit\src\asmjit\base\constpool.h" />
<ClInclude Include="asmjit\src\asmjit\base\context_p.h" />
<ClInclude Include="asmjit\src\asmjit\base\cpuinfo.h" />
<ClInclude Include="asmjit\src\asmjit\base\func.h" />
<ClInclude Include="asmjit\src\asmjit\base\logging.h" />
<ClInclude Include="asmjit\src\asmjit\base\globals.h" />
<ClInclude Include="asmjit\src\asmjit\base\inst.h" />
<ClInclude Include="asmjit\src\asmjit\base\lock.h" />
<ClInclude Include="asmjit\src\asmjit\base\operand.h" />
<ClInclude Include="asmjit\src\asmjit\base\osutils.h" />
<ClInclude Include="asmjit\src\asmjit\base\regalloc_p.h" />
<ClInclude Include="asmjit\src\asmjit\base\runtime.h" />
<ClInclude Include="asmjit\src\asmjit\base\string.h" />
<ClInclude Include="asmjit\src\asmjit\base\vectypes.h" />
<ClInclude Include="asmjit\src\asmjit\base\vmem.h" />
<ClInclude Include="asmjit\src\asmjit\base\zone.h" />
<ClInclude Include="asmjit\src\asmjit\core\api-build_p.h" />
<ClInclude Include="asmjit\src\asmjit\core\api-config.h" />
<ClInclude Include="asmjit\src\asmjit\core\archcommons.h" />
<ClInclude Include="asmjit\src\asmjit\core\archtraits.h" />
<ClInclude Include="asmjit\src\asmjit\core\assembler.h" />
<ClInclude Include="asmjit\src\asmjit\core\builder.h" />
<ClInclude Include="asmjit\src\asmjit\core\codebuffer.h" />
<ClInclude Include="asmjit\src\asmjit\core\codeholder.h" />
<ClInclude Include="asmjit\src\asmjit\core\codewriter_p.h" />
<ClInclude Include="asmjit\src\asmjit\core\compilerdefs.h" />
<ClInclude Include="asmjit\src\asmjit\core\compiler.h" />
<ClInclude Include="asmjit\src\asmjit\core\constpool.h" />
<ClInclude Include="asmjit\src\asmjit\core\cpuinfo.h" />
<ClInclude Include="asmjit\src\asmjit\core\emithelper_p.h" />
<ClInclude Include="asmjit\src\asmjit\core\emitter.h" />
<ClInclude Include="asmjit\src\asmjit\core\emitterutils_p.h" />
<ClInclude Include="asmjit\src\asmjit\core\environment.h" />
<ClInclude Include="asmjit\src\asmjit\core\errorhandler.h" />
<ClInclude Include="asmjit\src\asmjit\core\formatter.h" />
<ClInclude Include="asmjit\src\asmjit\core\formatter_p.h" />
<ClInclude Include="asmjit\src\asmjit\core\funcargscontext_p.h" />
<ClInclude Include="asmjit\src\asmjit\core\func.h" />
<ClInclude Include="asmjit\src\asmjit\core\globals.h" />
<ClInclude Include="asmjit\src\asmjit\core\inst.h" />
<ClInclude Include="asmjit\src\asmjit\core\jitallocator.h" />
<ClInclude Include="asmjit\src\asmjit\core\jitruntime.h" />
<ClInclude Include="asmjit\src\asmjit\core\logger.h" />
<ClInclude Include="asmjit\src\asmjit\core\misc_p.h" />
<ClInclude Include="asmjit\src\asmjit\core\operand.h" />
<ClInclude Include="asmjit\src\asmjit\core\osutils.h" />
<ClInclude Include="asmjit\src\asmjit\core\osutils_p.h" />
<ClInclude Include="asmjit\src\asmjit\core\raassignment_p.h" />
<ClInclude Include="asmjit\src\asmjit\core\rabuilders_p.h" />
<ClInclude Include="asmjit\src\asmjit\core\radefs_p.h" />
<ClInclude Include="asmjit\src\asmjit\core\ralocal_p.h" />
<ClInclude Include="asmjit\src\asmjit\core\rapass_p.h" />
<ClInclude Include="asmjit\src\asmjit\core\rastack_p.h" />
<ClInclude Include="asmjit\src\asmjit\core\string.h" />
<ClInclude Include="asmjit\src\asmjit\core\support.h" />
<ClInclude Include="asmjit\src\asmjit\core\target.h" />
<ClInclude Include="asmjit\src\asmjit\core\type.h" />
<ClInclude Include="asmjit\src\asmjit\core\virtmem.h" />
<ClInclude Include="asmjit\src\asmjit\core\zone.h" />
<ClInclude Include="asmjit\src\asmjit\core\zonehash.h" />
<ClInclude Include="asmjit\src\asmjit\core\zonelist.h" />
<ClInclude Include="asmjit\src\asmjit\core\zonestack.h" />
<ClInclude Include="asmjit\src\asmjit\core\zonestring.h" />
<ClInclude Include="asmjit\src\asmjit\core\zonetree.h" />
<ClInclude Include="asmjit\src\asmjit\core\zonevector.h" />
<ClInclude Include="asmjit\src\asmjit\x86\x86archtraits_p.h" />
<ClInclude Include="asmjit\src\asmjit\x86\x86assembler.h" />
<ClInclude Include="asmjit\src\asmjit\x86\x86builder.h" />
<ClInclude Include="asmjit\src\asmjit\x86\x86compiler.h" />
<ClInclude Include="asmjit\src\asmjit\x86\x86inst.h" />
<ClInclude Include="asmjit\src\asmjit\x86\x86instimpl_p.h" />
<ClInclude Include="asmjit\src\asmjit\x86\x86internal_p.h" />
<ClInclude Include="asmjit\src\asmjit\x86\x86logging_p.h" />
<ClInclude Include="asmjit\src\asmjit\x86\x86emithelper_p.h" />
<ClInclude Include="asmjit\src\asmjit\x86\x86emitter.h" />
<ClInclude Include="asmjit\src\asmjit\x86\x86formatter_p.h" />
<ClInclude Include="asmjit\src\asmjit\x86\x86func_p.h" />
<ClInclude Include="asmjit\src\asmjit\x86\x86globals.h" />
<ClInclude Include="asmjit\src\asmjit\x86\x86instapi_p.h" />
<ClInclude Include="asmjit\src\asmjit\x86\x86instdb.h" />
<ClInclude Include="asmjit\src\asmjit\x86\x86instdb_p.h" />
<ClInclude Include="asmjit\src\asmjit\x86\x86opcode_p.h" />
<ClInclude Include="asmjit\src\asmjit\x86\x86operand.h" />
<ClInclude Include="asmjit\src\asmjit\x86\x86regalloc_p.h" />
<ClInclude Include="asmjit\src\asmjit\x86\x86rapass_p.h" />
<ClInclude Include="asmjit\src\asmjit\arm\a64assembler.h" />
<ClInclude Include="asmjit\src\asmjit\arm\a64builder.h" />
<ClInclude Include="asmjit\src\asmjit\arm\a64compiler.h" />
<ClInclude Include="asmjit\src\asmjit\arm\a64emithelper_p.h" />
<ClInclude Include="asmjit\src\asmjit\arm\a64emitter.h" />
<ClInclude Include="asmjit\src\asmjit\arm\a64globals.h" />
<ClInclude Include="asmjit\src\asmjit\arm\a64instapi_p.h" />
<ClInclude Include="asmjit\src\asmjit\arm\a64instdb.h" />
<ClInclude Include="asmjit\src\asmjit\arm\a64instdb_p.h" />
<ClInclude Include="asmjit\src\asmjit\arm\a64operand.h" />
<ClInclude Include="asmjit\src\asmjit\arm\a64rapass_p.h" />
<ClInclude Include="asmjit\src\asmjit\arm\a64utils.h" />
<ClInclude Include="asmjit\src\asmjit\arm\armarchtraits_p.h" />
<ClInclude Include="asmjit\src\asmjit\arm\armformatter_p.h" />
<ClInclude Include="asmjit\src\asmjit\arm\armfunc_p.h" />
<ClInclude Include="asmjit\src\asmjit\arm\armglobals.h" />
<ClInclude Include="asmjit\src\asmjit\arm\armoperand.h" />
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{AC40FF01-426E-4838-A317-66354CEFAE88}</ProjectGuid>

View File

@ -1,65 +1,145 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<ClCompile Include="asmjit\src\asmjit\base\arch.cpp" />
<ClCompile Include="asmjit\src\asmjit\base\assembler.cpp" />
<ClCompile Include="asmjit\src\asmjit\base\codebuilder.cpp" />
<ClCompile Include="asmjit\src\asmjit\base\codecompiler.cpp" />
<ClCompile Include="asmjit\src\asmjit\base\codeemitter.cpp" />
<ClCompile Include="asmjit\src\asmjit\base\codeholder.cpp" />
<ClCompile Include="asmjit\src\asmjit\base\constpool.cpp" />
<ClCompile Include="asmjit\src\asmjit\base\cpuinfo.cpp" />
<ClCompile Include="asmjit\src\asmjit\base\func.cpp" />
<ClCompile Include="asmjit\src\asmjit\base\globals.cpp" />
<ClCompile Include="asmjit\src\asmjit\base\inst.cpp" />
<ClCompile Include="asmjit\src\asmjit\base\logging.cpp" />
<ClCompile Include="asmjit\src\asmjit\base\osutils.cpp" />
<ClCompile Include="asmjit\src\asmjit\base\operand.cpp" />
<ClCompile Include="asmjit\src\asmjit\base\regalloc.cpp" />
<ClCompile Include="asmjit\src\asmjit\base\runtime.cpp" />
<ClCompile Include="asmjit\src\asmjit\base\string.cpp" />
<ClCompile Include="asmjit\src\asmjit\base\vmem.cpp" />
<ClCompile Include="asmjit\src\asmjit\base\zone.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\archtraits.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\assembler.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\builder.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\codeholder.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\codewriter.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\compiler.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\constpool.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\cpuinfo.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\emithelper.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\emitter.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\emitterutils.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\environment.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\errorhandler.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\formatter.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\funcargscontext.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\func.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\globals.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\inst.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\jitallocator.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\jitruntime.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\logger.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\operand.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\osutils.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\ralocal.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\rapass.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\rastack.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\string.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\support.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\target.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\type.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\virtmem.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\zone.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\zonehash.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\zonelist.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\zonestack.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\zonetree.cpp" />
<ClCompile Include="asmjit\src\asmjit\core\zonevector.cpp" />
<ClCompile Include="asmjit\src\asmjit\x86\x86assembler.cpp" />
<ClCompile Include="asmjit\src\asmjit\x86\x86builder.cpp" />
<ClCompile Include="asmjit\src\asmjit\x86\x86compiler.cpp" />
<ClCompile Include="asmjit\src\asmjit\x86\x86inst.cpp" />
<ClCompile Include="asmjit\src\asmjit\x86\x86instimpl.cpp" />
<ClCompile Include="asmjit\src\asmjit\x86\x86internal.cpp" />
<ClCompile Include="asmjit\src\asmjit\x86\x86logging.cpp" />
<ClCompile Include="asmjit\src\asmjit\x86\x86emithelper.cpp" />
<ClCompile Include="asmjit\src\asmjit\x86\x86formatter.cpp" />
<ClCompile Include="asmjit\src\asmjit\x86\x86func.cpp" />
<ClCompile Include="asmjit\src\asmjit\x86\x86instapi.cpp" />
<ClCompile Include="asmjit\src\asmjit\x86\x86instdb.cpp" />
<ClCompile Include="asmjit\src\asmjit\x86\x86operand.cpp" />
<ClCompile Include="asmjit\src\asmjit\x86\x86operand_regs.cpp" />
<ClCompile Include="asmjit\src\asmjit\x86\x86regalloc.cpp" />
<ClCompile Include="asmjit\src\asmjit\x86\x86rapass.cpp" />
<ClCompile Include="asmjit\src\asmjit\arm\a64assembler.cpp" />
<ClCompile Include="asmjit\src\asmjit\arm\a64builder.cpp" />
<ClCompile Include="asmjit\src\asmjit\arm\a64compiler.cpp" />
<ClCompile Include="asmjit\src\asmjit\arm\a64emithelper.cpp" />
<ClCompile Include="asmjit\src\asmjit\arm\a64instapi.cpp" />
<ClCompile Include="asmjit\src\asmjit\arm\a64instdb.cpp" />
<ClCompile Include="asmjit\src\asmjit\arm\a64operand.cpp" />
<ClCompile Include="asmjit\src\asmjit\arm\a64rapass.cpp" />
<ClCompile Include="asmjit\src\asmjit\arm\armformatter.cpp" />
<ClCompile Include="asmjit\src\asmjit\arm\armfunc.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="asmjit\src\asmjit\base\arch.h" />
<ClInclude Include="asmjit\src\asmjit\base\assembler.h" />
<ClInclude Include="asmjit\src\asmjit\base\codebuilder.h" />
<ClInclude Include="asmjit\src\asmjit\base\codecompiler.h" />
<ClInclude Include="asmjit\src\asmjit\base\codeemitter.h" />
<ClInclude Include="asmjit\src\asmjit\base\codeholder.h" />
<ClInclude Include="asmjit\src\asmjit\base\constpool.h" />
<ClInclude Include="asmjit\src\asmjit\base\context_p.h" />
<ClInclude Include="asmjit\src\asmjit\base\cpuinfo.h" />
<ClInclude Include="asmjit\src\asmjit\base\func.h" />
<ClInclude Include="asmjit\src\asmjit\base\logging.h" />
<ClInclude Include="asmjit\src\asmjit\base\globals.h" />
<ClInclude Include="asmjit\src\asmjit\base\inst.h" />
<ClInclude Include="asmjit\src\asmjit\base\lock.h" />
<ClInclude Include="asmjit\src\asmjit\base\operand.h" />
<ClInclude Include="asmjit\src\asmjit\base\osutils.h" />
<ClInclude Include="asmjit\src\asmjit\base\regalloc_p.h" />
<ClInclude Include="asmjit\src\asmjit\base\runtime.h" />
<ClInclude Include="asmjit\src\asmjit\base\string.h" />
<ClInclude Include="asmjit\src\asmjit\base\vectypes.h" />
<ClInclude Include="asmjit\src\asmjit\base\vmem.h" />
<ClInclude Include="asmjit\src\asmjit\base\zone.h" />
<ClInclude Include="asmjit\src\asmjit\core\api-build_p.h" />
<ClInclude Include="asmjit\src\asmjit\core\api-config.h" />
<ClInclude Include="asmjit\src\asmjit\core\archcommons.h" />
<ClInclude Include="asmjit\src\asmjit\core\archtraits.h" />
<ClInclude Include="asmjit\src\asmjit\core\assembler.h" />
<ClInclude Include="asmjit\src\asmjit\core\builder.h" />
<ClInclude Include="asmjit\src\asmjit\core\codebuffer.h" />
<ClInclude Include="asmjit\src\asmjit\core\codeholder.h" />
<ClInclude Include="asmjit\src\asmjit\core\codewriter_p.h" />
<ClInclude Include="asmjit\src\asmjit\core\compilerdefs.h" />
<ClInclude Include="asmjit\src\asmjit\core\compiler.h" />
<ClInclude Include="asmjit\src\asmjit\core\constpool.h" />
<ClInclude Include="asmjit\src\asmjit\core\cpuinfo.h" />
<ClInclude Include="asmjit\src\asmjit\core\emithelper_p.h" />
<ClInclude Include="asmjit\src\asmjit\core\emitter.h" />
<ClInclude Include="asmjit\src\asmjit\core\emitterutils_p.h" />
<ClInclude Include="asmjit\src\asmjit\core\environment.h" />
<ClInclude Include="asmjit\src\asmjit\core\errorhandler.h" />
<ClInclude Include="asmjit\src\asmjit\core\formatter.h" />
<ClInclude Include="asmjit\src\asmjit\core\formatter_p.h" />
<ClInclude Include="asmjit\src\asmjit\core\funcargscontext_p.h" />
<ClInclude Include="asmjit\src\asmjit\core\func.h" />
<ClInclude Include="asmjit\src\asmjit\core\globals.h" />
<ClInclude Include="asmjit\src\asmjit\core\inst.h" />
<ClInclude Include="asmjit\src\asmjit\core\jitallocator.h" />
<ClInclude Include="asmjit\src\asmjit\core\jitruntime.h" />
<ClInclude Include="asmjit\src\asmjit\core\logger.h" />
<ClInclude Include="asmjit\src\asmjit\core\misc_p.h" />
<ClInclude Include="asmjit\src\asmjit\core\operand.h" />
<ClInclude Include="asmjit\src\asmjit\core\osutils.h" />
<ClInclude Include="asmjit\src\asmjit\core\osutils_p.h" />
<ClInclude Include="asmjit\src\asmjit\core\raassignment_p.h" />
<ClInclude Include="asmjit\src\asmjit\core\rabuilders_p.h" />
<ClInclude Include="asmjit\src\asmjit\core\radefs_p.h" />
<ClInclude Include="asmjit\src\asmjit\core\ralocal_p.h" />
<ClInclude Include="asmjit\src\asmjit\core\rapass_p.h" />
<ClInclude Include="asmjit\src\asmjit\core\rastack_p.h" />
<ClInclude Include="asmjit\src\asmjit\core\string.h" />
<ClInclude Include="asmjit\src\asmjit\core\support.h" />
<ClInclude Include="asmjit\src\asmjit\core\target.h" />
<ClInclude Include="asmjit\src\asmjit\core\type.h" />
<ClInclude Include="asmjit\src\asmjit\core\virtmem.h" />
<ClInclude Include="asmjit\src\asmjit\core\zone.h" />
<ClInclude Include="asmjit\src\asmjit\core\zonehash.h" />
<ClInclude Include="asmjit\src\asmjit\core\zonelist.h" />
<ClInclude Include="asmjit\src\asmjit\core\zonestack.h" />
<ClInclude Include="asmjit\src\asmjit\core\zonestring.h" />
<ClInclude Include="asmjit\src\asmjit\core\zonetree.h" />
<ClInclude Include="asmjit\src\asmjit\core\zonevector.h" />
<ClInclude Include="asmjit\src\asmjit\x86\x86archtraits_p.h" />
<ClInclude Include="asmjit\src\asmjit\x86\x86assembler.h" />
<ClInclude Include="asmjit\src\asmjit\x86\x86builder.h" />
<ClInclude Include="asmjit\src\asmjit\x86\x86compiler.h" />
<ClInclude Include="asmjit\src\asmjit\x86\x86inst.h" />
<ClInclude Include="asmjit\src\asmjit\x86\x86instimpl_p.h" />
<ClInclude Include="asmjit\src\asmjit\x86\x86internal_p.h" />
<ClInclude Include="asmjit\src\asmjit\x86\x86logging_p.h" />
<ClInclude Include="asmjit\src\asmjit\x86\x86emithelper_p.h" />
<ClInclude Include="asmjit\src\asmjit\x86\x86emitter.h" />
<ClInclude Include="asmjit\src\asmjit\x86\x86formatter_p.h" />
<ClInclude Include="asmjit\src\asmjit\x86\x86func_p.h" />
<ClInclude Include="asmjit\src\asmjit\x86\x86globals.h" />
<ClInclude Include="asmjit\src\asmjit\x86\x86instapi_p.h" />
<ClInclude Include="asmjit\src\asmjit\x86\x86instdb.h" />
<ClInclude Include="asmjit\src\asmjit\x86\x86instdb_p.h" />
<ClInclude Include="asmjit\src\asmjit\x86\x86opcode_p.h" />
<ClInclude Include="asmjit\src\asmjit\x86\x86operand.h" />
<ClInclude Include="asmjit\src\asmjit\x86\x86regalloc_p.h" />
<ClInclude Include="asmjit\src\asmjit\x86\x86rapass_p.h" />
<ClInclude Include="asmjit\src\asmjit\arm\a64assembler.h" />
<ClInclude Include="asmjit\src\asmjit\arm\a64builder.h" />
<ClInclude Include="asmjit\src\asmjit\arm\a64compiler.h" />
<ClInclude Include="asmjit\src\asmjit\arm\a64emithelper_p.h" />
<ClInclude Include="asmjit\src\asmjit\arm\a64emitter.h" />
<ClInclude Include="asmjit\src\asmjit\arm\a64globals.h" />
<ClInclude Include="asmjit\src\asmjit\arm\a64instapi_p.h" />
<ClInclude Include="asmjit\src\asmjit\arm\a64instdb.h" />
<ClInclude Include="asmjit\src\asmjit\arm\a64instdb_p.h" />
<ClInclude Include="asmjit\src\asmjit\arm\a64operand.h" />
<ClInclude Include="asmjit\src\asmjit\arm\a64rapass_p.h" />
<ClInclude Include="asmjit\src\asmjit\arm\a64utils.h" />
<ClInclude Include="asmjit\src\asmjit\arm\armarchtraits_p.h" />
<ClInclude Include="asmjit\src\asmjit\arm\armformatter_p.h" />
<ClInclude Include="asmjit\src\asmjit\arm\armfunc_p.h" />
<ClInclude Include="asmjit\src\asmjit\arm\armglobals.h" />
<ClInclude Include="asmjit\src\asmjit\arm\armoperand.h" />
</ItemGroup>
</Project>

View File

@ -113,8 +113,32 @@ static u8* add_jit_memory(usz size, uint align)
return pointer + pos;
}
const asmjit::Environment& jit_runtime_base::environment() const noexcept
{
static const asmjit::Environment g_env = asmjit::Environment::host();
return g_env;
}
void* jit_runtime_base::_add(asmjit::CodeHolder* code) noexcept
{
ensure(!code->flatten());
ensure(!code->resolveUnresolvedLinks());
usz codeSize = ensure(code->codeSize());
auto p = ensure(this->_alloc(codeSize, 64));
ensure(!code->relocateToBase(uptr(p)));
asmjit::VirtMem::ProtectJitReadWriteScope rwScope(p, codeSize);
for (asmjit::Section* section : code->_sections)
{
std::memcpy(p + section->offset(), section->data(), section->bufferSize());
}
return p;
}
jit_runtime::jit_runtime()
: HostRuntime()
{
}
@ -122,38 +146,9 @@ jit_runtime::~jit_runtime()
{
}
asmjit::Error jit_runtime::_add(void** dst, asmjit::CodeHolder* code) noexcept
uchar* jit_runtime::_alloc(usz size, usz align) noexcept
{
usz codeSize = code->getCodeSize();
if (!codeSize) [[unlikely]]
{
*dst = nullptr;
return asmjit::kErrorNoCodeGenerated;
}
void* p = jit_runtime::alloc(codeSize, 16);
if (!p) [[unlikely]]
{
*dst = nullptr;
return asmjit::kErrorNoVirtualMemory;
}
usz relocSize = code->relocate(p);
if (!relocSize) [[unlikely]]
{
*dst = nullptr;
return asmjit::kErrorInvalidState;
}
flush(p, relocSize);
*dst = p;
return asmjit::kErrorOk;
}
asmjit::Error jit_runtime::_release(void*) noexcept
{
return asmjit::kErrorOk;
return jit_runtime::alloc(size, align, true);
}
u8* jit_runtime::alloc(usz size, uint align, bool exec) noexcept
@ -200,12 +195,12 @@ void jit_runtime::finalize() noexcept
std::memcpy(alloc(s_data_init.size(), 1, false), s_data_init.data(), s_data_init.size());
}
asmjit::Runtime& asmjit::get_global_runtime()
jit_runtime_base& asmjit::get_global_runtime()
{
// 16 MiB for internal needs
static constexpr u64 size = 1024 * 1024 * 16;
struct custom_runtime final : asmjit::HostRuntime
struct custom_runtime final : jit_runtime_base
{
custom_runtime() noexcept
{
@ -214,7 +209,7 @@ asmjit::Runtime& asmjit::get_global_runtime()
{
if (auto ptr = utils::memory_reserve(size, reinterpret_cast<void*>(addr)))
{
m_pos.raw() = static_cast<std::byte*>(ptr);
m_pos.raw() = static_cast<uchar*>(ptr);
break;
}
}
@ -226,49 +221,26 @@ asmjit::Runtime& asmjit::get_global_runtime()
utils::memory_commit(m_pos, size, utils::protection::wx);
}
custom_runtime(const custom_runtime&) = delete;
custom_runtime& operator=(const custom_runtime&) = delete;
asmjit::Error _add(void** dst, asmjit::CodeHolder* code) noexcept override
uchar* _alloc(usz size, usz align) noexcept override
{
usz codeSize = code->getCodeSize();
if (!codeSize) [[unlikely]]
return m_pos.atomic_op([&](uchar*& pos) -> uchar*
{
*dst = nullptr;
return asmjit::kErrorNoCodeGenerated;
}
const auto r = reinterpret_cast<uchar*>(utils::align(uptr(pos), align));
void* p = m_pos.fetch_add(utils::align(codeSize, 64));
if (!p || m_pos > m_max) [[unlikely]]
{
*dst = nullptr;
jit_log.fatal("Out of memory (static asmjit)");
return asmjit::kErrorNoVirtualMemory;
}
if (r >= pos && r + size > pos && r + size <= m_max)
{
pos = r + size;
return r;
}
usz relocSize = code->relocate(p);
if (!relocSize) [[unlikely]]
{
*dst = nullptr;
return asmjit::kErrorInvalidState;
}
flush(p, relocSize);
*dst = p;
return asmjit::kErrorOk;
}
asmjit::Error _release(void*) noexcept override
{
return asmjit::kErrorOk;
return nullptr;
});
}
private:
atomic_t<std::byte*> m_pos{};
atomic_t<uchar*> m_pos{};
std::byte* m_max{};
uchar* m_max{};
};
// Magic static
@ -276,37 +248,17 @@ asmjit::Runtime& asmjit::get_global_runtime()
return g_rt;
}
asmjit::Error asmjit::inline_runtime::_add(void** dst, asmjit::CodeHolder* code) noexcept
asmjit::inline_runtime::inline_runtime(uchar* data, usz size)
: m_data(data)
, m_size(size)
{
usz codeSize = code->getCodeSize();
if (!codeSize) [[unlikely]]
{
*dst = nullptr;
return asmjit::kErrorNoCodeGenerated;
}
if (utils::align(codeSize, 4096) > m_size) [[unlikely]]
{
*dst = nullptr;
return asmjit::kErrorNoVirtualMemory;
}
usz relocSize = code->relocate(m_data);
if (!relocSize) [[unlikely]]
{
*dst = nullptr;
return asmjit::kErrorInvalidState;
}
flush(m_data, relocSize);
*dst = m_data;
return asmjit::kErrorOk;
}
asmjit::Error asmjit::inline_runtime::_release(void*) noexcept
uchar* asmjit::inline_runtime::_alloc(usz size, usz align) noexcept
{
return asmjit::kErrorOk;
ensure(align <= 4096);
return size <= m_size ? m_data : nullptr;
}
asmjit::inline_runtime::~inline_runtime()
@ -397,19 +349,19 @@ static u64 make_null_function(const std::string& name)
using namespace asmjit;
// Build a "null" function that contains its name
const auto func = build_function_asm<void (*)()>("NULL", [&](X86Assembler& c, auto& args)
const auto func = build_function_asm<void (*)()>("NULL", [&](x86::Assembler& c, auto& args)
{
Label data = c.newLabel();
c.lea(args[0], x86::qword_ptr(data, 0));
c.jmp(imm_ptr(&null));
c.align(kAlignCode, 16);
c.jmp(Imm(&null));
c.align(AlignMode::kCode, 16);
c.bind(data);
// Copy function name bytes
for (char ch : name)
c.db(ch);
c.db(0);
c.align(kAlignData, 16);
c.align(AlignMode::kData, 16);
});
func_ptr = reinterpret_cast<u64>(func);

View File

@ -4,7 +4,9 @@
// Include asmjit with warnings ignored
#define ASMJIT_EMBED
#define ASMJIT_DEBUG
#define ASMJIT_STATIC
#define ASMJIT_BUILD_DEBUG
#undef Bool
#ifdef _MSC_VER
#pragma warning(push, 0)
@ -49,17 +51,27 @@ enum class jit_class
spu_data,
};
struct jit_runtime_base
{
jit_runtime_base() noexcept = default;
virtual ~jit_runtime_base() = default;
jit_runtime_base(const jit_runtime_base&) = delete;
jit_runtime_base& operator=(const jit_runtime_base&) = delete;
const asmjit::Environment& environment() const noexcept;
void* _add(asmjit::CodeHolder* code) noexcept;
virtual uchar* _alloc(usz size, usz align) noexcept = 0;
};
// ASMJIT runtime for emitting code in a single 2G region
struct jit_runtime final : asmjit::HostRuntime
struct jit_runtime final : jit_runtime_base
{
jit_runtime();
~jit_runtime() override;
// Allocate executable memory
asmjit::Error _add(void** dst, asmjit::CodeHolder* code) noexcept override;
// Do nothing (deallocation is delayed)
asmjit::Error _release(void* p) noexcept override;
uchar* _alloc(usz size, usz align) noexcept override;
// Allocate memory
static u8* alloc(usz size, uint align, bool exec = true) noexcept;
@ -74,35 +86,25 @@ struct jit_runtime final : asmjit::HostRuntime
namespace asmjit
{
// Should only be used to build global functions
asmjit::Runtime& get_global_runtime();
jit_runtime_base& get_global_runtime();
// Don't use directly
class inline_runtime : public HostRuntime
class inline_runtime : public jit_runtime_base
{
uchar* m_data;
usz m_size;
public:
inline_runtime(const inline_runtime&) = delete;
inline_runtime& operator=(const inline_runtime&) = delete;
inline_runtime(uchar* data, usz size)
: m_data(data)
, m_size(size)
{
}
asmjit::Error _add(void** dst, asmjit::CodeHolder* code) noexcept override;
asmjit::Error _release(void*) noexcept override;
inline_runtime(uchar* data, usz size);
~inline_runtime();
uchar* _alloc(usz size, usz align) noexcept override;
};
// Emit xbegin and adjacent loop, return label at xbegin (don't use xabort please)
template <typename F>
[[nodiscard]] inline asmjit::Label build_transaction_enter(asmjit::X86Assembler& c, asmjit::Label fallback, F func)
[[nodiscard]] inline asmjit::Label build_transaction_enter(asmjit::x86::Assembler& c, asmjit::Label fallback, F func)
{
Label fall = c.newLabel();
Label begin = c.newLabel();
@ -117,7 +119,7 @@ namespace asmjit
func();
// Other bad statuses are ignored regardless of repeat flag (TODO)
c.align(kAlignCode, 16);
c.align(AlignMode::kCode, 16);
c.bind(begin);
return fall;
@ -125,7 +127,7 @@ namespace asmjit
}
// Helper to spill RDX (EDX) register for RDTSC
inline void build_swap_rdx_with(asmjit::X86Assembler& c, std::array<X86Gp, 4>& args, const asmjit::X86Gp& with)
inline void build_swap_rdx_with(asmjit::x86::Assembler& c, std::array<x86::Gp, 4>& args, const asmjit::x86::Gp& with)
{
#ifdef _WIN32
c.xchg(args[1], with);
@ -137,7 +139,7 @@ namespace asmjit
}
// Get full RDTSC value into chosen register (clobbers rax/rdx or saves only rax with other target)
inline void build_get_tsc(asmjit::X86Assembler& c, const asmjit::X86Gp& to = asmjit::x86::rax)
inline void build_get_tsc(asmjit::x86::Assembler& c, const asmjit::x86::Gp& to = asmjit::x86::rax)
{
if (&to != &x86::rax && &to != &x86::rdx)
{
@ -164,6 +166,8 @@ namespace asmjit
c.or_(to.r64(), x86::rdx);
}
}
using imm_ptr = Imm;
}
// Build runtime function with asmjit::X86Assembler
@ -175,10 +179,9 @@ inline FT build_function_asm(std::string_view name, F&& builder)
auto& rt = get_global_runtime();
CodeHolder code;
code.init(rt.getCodeInfo());
code._globalHints = asmjit::CodeEmitter::kHintOptimizedAlign;
code.init(rt.environment());
std::array<X86Gp, 4> args;
std::array<x86::Gp, 4> args;
#ifdef _WIN32
args[0] = x86::rcx;
args[1] = x86::rdx;
@ -191,19 +194,12 @@ inline FT build_function_asm(std::string_view name, F&& builder)
args[3] = x86::rcx;
#endif
X86Assembler compiler(&code);
x86::Assembler compiler(&code);
compiler.addEncodingOptions(EncodingOptions::kOptimizedAlign);
builder(std::ref(compiler), args);
ensure(compiler.getLastError() == 0);
FT result;
if (rt.add(&result, &code))
{
return nullptr;
}
jit_announce(result, code.getCodeSize(), name);
return result;
const auto result = rt._add(&code);
jit_announce(result, code.codeSize(), name);
return reinterpret_cast<FT>(uptr(result));
}
#ifdef __APPLE__
@ -253,10 +249,9 @@ public:
inline_runtime rt(m_data, Size);
CodeHolder code;
code.init(rt.getCodeInfo());
code._globalHints = asmjit::CodeEmitter::kHintOptimizedAlign;
code.init(rt.environment());
std::array<X86Gp, 4> args;
std::array<x86::Gp, 4> args;
#ifdef _WIN32
args[0] = x86::rcx;
args[1] = x86::rdx;
@ -269,19 +264,10 @@ public:
args[3] = x86::rcx;
#endif
X86Assembler compiler(&code);
x86::Assembler compiler(&code);
compiler.addEncodingOptions(EncodingOptions::kOptimizedAlign);
builder(std::ref(compiler), args);
FT result;
if (compiler.getLastError() || rt.add(&result, &code))
{
ensure(false);
}
else
{
jit_announce(result, code.getCodeSize(), name);
}
jit_announce(rt._add(&code), code.codeSize(), name);
}
operator FT() const noexcept

View File

@ -2190,7 +2190,7 @@ thread_base::native_entry thread_base::finalize(u64 _self) noexcept
thread_base::native_entry thread_base::make_trampoline(u64(*entry)(thread_base* _base))
{
return build_function_asm<native_entry>("thread_base_trampoline", [&](asmjit::X86Assembler& c, auto& args)
return build_function_asm<native_entry>("thread_base_trampoline", [&](asmjit::x86::Assembler& c, auto& args)
{
using namespace asmjit;
@ -2203,7 +2203,7 @@ thread_base::native_entry thread_base::make_trampoline(u64(*entry)(thread_base*
// Call finalize, return if zero
c.mov(args[0], x86::rax);
c.call(imm_ptr<native_entry(*)(u64)>(finalize));
c.call(imm_ptr(static_cast<native_entry(*)(u64)>(&finalize)));
c.test(x86::rax, x86::rax);
c.jz(_ret);

View File

@ -1910,14 +1910,14 @@ std::vector<ppu_function_t>& ppu_function_manager::access(bool ghc)
static std::vector<ppu_function_t> list_ghc
{
build_function_asm<ppu_function_t>("ppu_unregistered", [](asmjit::X86Assembler& c, auto& args)
build_function_asm<ppu_function_t>("ppu_unregistered", [](asmjit::x86::Assembler& c, auto& args)
{
using namespace asmjit;
c.mov(args[0], x86::rbp);
c.jmp(imm_ptr(list[0]));
}),
build_function_asm<ppu_function_t>("ppu_return", [](asmjit::X86Assembler& c, auto& args)
build_function_asm<ppu_function_t>("ppu_return", [](asmjit::x86::Assembler& c, auto& args)
{
using namespace asmjit;
@ -1937,7 +1937,7 @@ u32 ppu_function_manager::add_function(ppu_function_t function)
list.push_back(function);
// Generate trampoline
list2.push_back(build_function_asm<ppu_function_t>("ppu_trampolinea", [&](asmjit::X86Assembler& c, auto& args)
list2.push_back(build_function_asm<ppu_function_t>("ppu_trampolinea", [&](asmjit::x86::Assembler& c, auto& args)
{
using namespace asmjit;

View File

@ -147,7 +147,7 @@ static bool ppu_break(ppu_thread& ppu, ppu_opcode_t op);
extern void do_cell_atomic_128_store(u32 addr, const void* to_write);
const auto ppu_gateway = built_function<void(*)(ppu_thread*)>("ppu_gateway", [](asmjit::X86Assembler& c, auto& args)
const auto ppu_gateway = built_function<void(*)(ppu_thread*)>("ppu_gateway", [](asmjit::x86::Assembler& c, auto& args)
{
// Gateway for PPU, converts from native to GHC calling convention, also saves RSP value for escape
using namespace asmjit;
@ -248,7 +248,7 @@ const auto ppu_gateway = built_function<void(*)(ppu_thread*)>("ppu_gateway", [](
c.ret();
});
const extern auto ppu_escape = build_function_asm<void(*)(ppu_thread*)>("ppu_escape", [](asmjit::X86Assembler& c, auto& args)
const extern auto ppu_escape = build_function_asm<void(*)(ppu_thread*)>("ppu_escape", [](asmjit::x86::Assembler& c, auto& args)
{
using namespace asmjit;
@ -262,7 +262,7 @@ const extern auto ppu_escape = build_function_asm<void(*)(ppu_thread*)>("ppu_esc
void ppu_recompiler_fallback(ppu_thread& ppu);
const auto ppu_recompiler_fallback_ghc = build_function_asm<void(*)(ppu_thread& ppu)>("ppu_trampolineb", [](asmjit::X86Assembler& c, auto& args)
const auto ppu_recompiler_fallback_ghc = build_function_asm<void(*)(ppu_thread& ppu)>("ppu_trampolineb", [](asmjit::x86::Assembler& c, auto& args)
{
using namespace asmjit;
@ -1817,7 +1817,7 @@ extern u64 ppu_ldarx(ppu_thread& ppu, u32 addr)
return ppu_load_acquire_reservation<u64>(ppu, addr);
}
const auto ppu_stcx_accurate_tx = built_function<u64(*)(u32 raddr, u64 rtime, const void* _old, u64 _new)>("ppu_stcx_accurate_tx", [](asmjit::X86Assembler& c, auto& args)
const auto ppu_stcx_accurate_tx = built_function<u64(*)(u32 raddr, u64 rtime, const void* _old, u64 _new)>("ppu_stcx_accurate_tx", [](asmjit::x86::Assembler& c, auto& args)
{
using namespace asmjit;
@ -1859,10 +1859,10 @@ const auto ppu_stcx_accurate_tx = built_function<u64(*)(u32 raddr, u64 rtime, co
// Prepare data
if (s_tsx_avx)
{
c.vmovups(x86::ymm0, x86::yword_ptr(args[2], 0));
c.vmovups(x86::ymm1, x86::yword_ptr(args[2], 32));
c.vmovups(x86::ymm2, x86::yword_ptr(args[2], 64));
c.vmovups(x86::ymm3, x86::yword_ptr(args[2], 96));
c.vmovups(x86::ymm0, x86::ymmword_ptr(args[2], 0));
c.vmovups(x86::ymm1, x86::ymmword_ptr(args[2], 32));
c.vmovups(x86::ymm2, x86::ymmword_ptr(args[2], 64));
c.vmovups(x86::ymm3, x86::ymmword_ptr(args[2], 96));
}
else
{
@ -1897,10 +1897,10 @@ const auto ppu_stcx_accurate_tx = built_function<u64(*)(u32 raddr, u64 rtime, co
if (s_tsx_avx)
{
c.vxorps(x86::ymm0, x86::ymm0, x86::yword_ptr(x86::rbp, 0));
c.vxorps(x86::ymm1, x86::ymm1, x86::yword_ptr(x86::rbp, 32));
c.vxorps(x86::ymm2, x86::ymm2, x86::yword_ptr(x86::rbp, 64));
c.vxorps(x86::ymm3, x86::ymm3, x86::yword_ptr(x86::rbp, 96));
c.vxorps(x86::ymm0, x86::ymm0, x86::ymmword_ptr(x86::rbp, 0));
c.vxorps(x86::ymm1, x86::ymm1, x86::ymmword_ptr(x86::rbp, 32));
c.vxorps(x86::ymm2, x86::ymm2, x86::ymmword_ptr(x86::rbp, 64));
c.vxorps(x86::ymm3, x86::ymm3, x86::ymmword_ptr(x86::rbp, 96));
c.vorps(x86::ymm0, x86::ymm0, x86::ymm1);
c.vorps(x86::ymm1, x86::ymm2, x86::ymm3);
c.vorps(x86::ymm0, x86::ymm1, x86::ymm0);
@ -1943,10 +1943,10 @@ const auto ppu_stcx_accurate_tx = built_function<u64(*)(u32 raddr, u64 rtime, co
// Load old data to store back in rdata
if (s_tsx_avx)
{
c.vmovaps(x86::ymm0, x86::yword_ptr(x86::rbp, 0));
c.vmovaps(x86::ymm1, x86::yword_ptr(x86::rbp, 32));
c.vmovaps(x86::ymm2, x86::yword_ptr(x86::rbp, 64));
c.vmovaps(x86::ymm3, x86::yword_ptr(x86::rbp, 96));
c.vmovaps(x86::ymm0, x86::ymmword_ptr(x86::rbp, 0));
c.vmovaps(x86::ymm1, x86::ymmword_ptr(x86::rbp, 32));
c.vmovaps(x86::ymm2, x86::ymmword_ptr(x86::rbp, 64));
c.vmovaps(x86::ymm3, x86::ymmword_ptr(x86::rbp, 96));
}
else
{
@ -1974,10 +1974,10 @@ const auto ppu_stcx_accurate_tx = built_function<u64(*)(u32 raddr, u64 rtime, co
// Store previous data back to rdata
if (s_tsx_avx)
{
c.vmovaps(x86::yword_ptr(args[2], 0), x86::ymm0);
c.vmovaps(x86::yword_ptr(args[2], 32), x86::ymm1);
c.vmovaps(x86::yword_ptr(args[2], 64), x86::ymm2);
c.vmovaps(x86::yword_ptr(args[2], 96), x86::ymm3);
c.vmovaps(x86::ymmword_ptr(args[2], 0), x86::ymm0);
c.vmovaps(x86::ymmword_ptr(args[2], 32), x86::ymm1);
c.vmovaps(x86::ymmword_ptr(args[2], 64), x86::ymm2);
c.vmovaps(x86::ymmword_ptr(args[2], 96), x86::ymm3);
}
else
{

View File

@ -96,15 +96,15 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
using namespace asmjit;
StringLogger logger;
logger.addOptions(Logger::kOptionBinaryForm);
logger.addFlags(FormatFlags::kMachineCode);
std::string log;
CodeHolder code;
code.init(m_asmrt.getCodeInfo());
code._globalHints = asmjit::CodeEmitter::kHintOptimizedAlign;
code.init(m_asmrt.environment());
X86Assembler compiler(&code);
x86::Assembler compiler(&code);
compiler.addEncodingOptions(EncodingOptions::kOptimizedAlign);
this->c = &compiler;
if (g_cfg.core.spu_debug && !add_loc->logged.exchange(1))
@ -137,7 +137,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
this->qw1 = &x86::rcx;
#endif
const std::array<const X86Xmm*, 16> vec_vars
const std::array<const x86::Xmm*, 16> vec_vars
{
&x86::xmm0,
&x86::xmm1,
@ -333,20 +333,20 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
});
c->setExtraReg(x86::k7);
c->z().vmovdqa32(x86::zmm0, x86::zword_ptr(*qw1, j - ls_off));
c->z().vmovdqa32(x86::zmm0, x86::zmmword_ptr(*qw1, j - ls_off));
}
else
{
c->vmovdqa32(x86::zmm0, x86::zword_ptr(*qw1, j - ls_off));
c->vmovdqa32(x86::zmm0, x86::zmmword_ptr(*qw1, j - ls_off));
}
if (first)
{
c->vpcmpud(x86::k1, x86::zmm0, x86::zword_ptr(x86::rax, code_off), 4);
c->vpcmpud(x86::k1, x86::zmm0, x86::zmmword_ptr(x86::rax, code_off), 4);
}
else
{
c->vpcmpud(x86::k3, x86::zmm0, x86::zword_ptr(x86::rax, code_off), 4);
c->vpcmpud(x86::k3, x86::zmm0, x86::zmmword_ptr(x86::rax, code_off), 4);
c->korw(x86::k1, x86::k3, x86::k1);
}
@ -378,15 +378,15 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
if (cmask == 0xff)
{
c->vmovdqa(x86::ymm0, x86::yword_ptr(*ls, starta));
c->vmovdqa(x86::ymm0, x86::ymmword_ptr(*ls, starta));
}
else
{
c->vpxor(x86::ymm0, x86::ymm0, x86::ymm0);
c->vpblendd(x86::ymm0, x86::ymm0, x86::yword_ptr(*ls, starta), cmask);
c->vpblendd(x86::ymm0, x86::ymm0, x86::ymmword_ptr(*ls, starta), cmask);
}
c->vpxor(x86::ymm0, x86::ymm0, x86::yword_ptr(label_code));
c->vpxor(x86::ymm0, x86::ymm0, x86::ymmword_ptr(label_code));
c->vptest(x86::ymm0, x86::ymm0);
c->jnz(label_diff);
@ -401,9 +401,9 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
const u32 cmask1 = get_code_mask(starta + 32, enda);
c->vpxor(x86::ymm0, x86::ymm0, x86::ymm0);
c->vpblendd(x86::ymm0, x86::ymm0, x86::yword_ptr(*ls, starta), cmask0);
c->vpblendd(x86::ymm0, x86::ymm0, x86::yword_ptr(*ls, starta + 32), cmask1);
c->vpxor(x86::ymm0, x86::ymm0, x86::yword_ptr(label_code));
c->vpblendd(x86::ymm0, x86::ymm0, x86::ymmword_ptr(*ls, starta), cmask0);
c->vpblendd(x86::ymm0, x86::ymm0, x86::ymmword_ptr(*ls, starta + 32), cmask1);
c->vpxor(x86::ymm0, x86::ymm0, x86::ymmword_ptr(label_code));
c->vptest(x86::ymm0, x86::ymm0);
c->jnz(label_diff);
@ -453,21 +453,21 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
xmm2z = true;
}
c->vpblendd(x86::ymm1, x86::ymm2, x86::yword_ptr(*qw1, j - ls_off), cmask);
c->vpblendd(x86::ymm1, x86::ymm2, x86::ymmword_ptr(*qw1, j - ls_off), cmask);
}
else
{
c->vmovdqa32(x86::ymm1, x86::yword_ptr(*qw1, j - ls_off));
c->vmovdqa32(x86::ymm1, x86::ymmword_ptr(*qw1, j - ls_off));
}
// Perform bitwise comparison and accumulate
if (first)
{
c->vpxor(x86::ymm0, x86::ymm1, x86::yword_ptr(x86::rax, code_off));
c->vpxor(x86::ymm0, x86::ymm1, x86::ymmword_ptr(x86::rax, code_off));
}
else
{
c->vpternlogd(x86::ymm0, x86::ymm1, x86::yword_ptr(x86::rax, code_off), 0xf6 /* orAxorBC */);
c->vpternlogd(x86::ymm0, x86::ymm1, x86::ymmword_ptr(x86::rax, code_off), 0xf6 /* orAxorBC */);
}
for (u32 i = j; i < j + 32; i += 4)
@ -500,15 +500,15 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
if (cmask == 0xff)
{
c->vmovaps(x86::ymm0, x86::yword_ptr(*ls, starta));
c->vmovaps(x86::ymm0, x86::ymmword_ptr(*ls, starta));
}
else
{
c->vxorps(x86::ymm0, x86::ymm0, x86::ymm0);
c->vblendps(x86::ymm0, x86::ymm0, x86::yword_ptr(*ls, starta), cmask);
c->vblendps(x86::ymm0, x86::ymm0, x86::ymmword_ptr(*ls, starta), cmask);
}
c->vxorps(x86::ymm0, x86::ymm0, x86::yword_ptr(label_code));
c->vxorps(x86::ymm0, x86::ymm0, x86::ymmword_ptr(label_code));
c->vptest(x86::ymm0, x86::ymm0);
c->jnz(label_diff);
@ -523,9 +523,9 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
const u32 cmask1 = get_code_mask(starta + 32, enda);
c->vxorps(x86::ymm0, x86::ymm0, x86::ymm0);
c->vblendps(x86::ymm0, x86::ymm0, x86::yword_ptr(*ls, starta), cmask0);
c->vblendps(x86::ymm0, x86::ymm0, x86::yword_ptr(*ls, starta + 32), cmask1);
c->vxorps(x86::ymm0, x86::ymm0, x86::yword_ptr(label_code));
c->vblendps(x86::ymm0, x86::ymm0, x86::ymmword_ptr(*ls, starta), cmask0);
c->vblendps(x86::ymm0, x86::ymm0, x86::ymmword_ptr(*ls, starta + 32), cmask1);
c->vxorps(x86::ymm0, x86::ymm0, x86::ymmword_ptr(label_code));
c->vptest(x86::ymm0, x86::ymm0);
c->jnz(label_diff);
@ -586,21 +586,21 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
xmm2z = true;
}
c->vblendps(reg1, x86::ymm2, x86::yword_ptr(*ls, j - ls_off), cmask);
c->vblendps(reg1, x86::ymm2, x86::ymmword_ptr(*ls, j - ls_off), cmask);
}
else
{
c->vmovaps(reg1, x86::yword_ptr(*ls, j - ls_off));
c->vmovaps(reg1, x86::ymmword_ptr(*ls, j - ls_off));
}
// Perform bitwise comparison and accumulate
if (!order++)
{
c->vxorps(reg0, reg1, x86::yword_ptr(x86::rax, code_off));
c->vxorps(reg0, reg1, x86::ymmword_ptr(x86::rax, code_off));
}
else
{
c->vxorps(reg1, reg1, x86::yword_ptr(x86::rax, code_off));
c->vxorps(reg1, reg1, x86::ymmword_ptr(x86::rax, code_off));
c->vorps(reg0, reg1, reg0);
}
@ -800,7 +800,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
{
if (m_preds.count(pos))
{
c->align(kAlignCode, 16);
c->align(AlignMode::kCode, 16);
}
c->bind(found->second);
@ -832,7 +832,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
}
// Simply return
c->align(kAlignCode, 16);
c->align(AlignMode::kCode, 16);
c->bind(label_stop);
c->add(x86::rsp, 0x28);
c->ret();
@ -840,7 +840,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
if (g_cfg.core.spu_verification)
{
// Dispatch
c->align(kAlignCode, 16);
c->align(AlignMode::kCode, 16);
c->bind(label_diff);
c->inc(SPU_OFF_64(block_failure));
c->add(x86::rsp, 0x28);
@ -855,7 +855,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
// Build instruction dispatch table
if (instr_table.isValid())
{
c->align(kAlignData, 8);
c->align(AlignMode::kData, 8);
c->bind(instr_table);
// Get actual instruction table bounds
@ -877,7 +877,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
}
}
c->align(kAlignData, words_align);
c->align(AlignMode::kData, words_align);
c->bind(label_code);
for (u32 d : words)
c->dd(d);
@ -893,20 +893,15 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
xmm_consts.clear();
// Compile and get function address
spu_function_t fn;
spu_function_t fn = reinterpret_cast<spu_function_t>(m_asmrt._add(&code));
if (auto err = m_asmrt.add(&fn, &code))
if (!fn)
{
if (err == asmjit::ErrorCode::kErrorNoVirtualMemory)
{
return nullptr;
}
spu_log.fatal("Failed to build a function");
}
else
{
jit_announce(fn, code.getCodeSize(), fmt::format("spu-b-%s", fmt::base57(be_t<u64>(m_hash_start))));
jit_announce(fn, code.codeSize(), fmt::format("spu-b-%s", fmt::base57(be_t<u64>(m_hash_start))));
}
// Install compiled function pointer
@ -927,7 +922,7 @@ spu_function_t spu_recompiler::compile(spu_program&& _func)
{
// Add ASMJIT logs
fmt::append(log, "Address: %p\n\n", fn);
log += logger.getString();
log.append(logger._content.data(), logger._content.size());
log += "\n\n\n";
// Append log file
@ -962,7 +957,7 @@ spu_recompiler::XmmLink spu_recompiler::XmmGet(s8 reg, XmmType type) // get xmm
return result;
}
inline asmjit::X86Mem spu_recompiler::XmmConst(const v128& data)
inline asmjit::x86::Mem spu_recompiler::XmmConst(const v128& data)
{
// Find existing const
auto& xmm_label = xmm_consts[std::make_pair(data._u64[0], data._u64[1])];
@ -973,7 +968,7 @@ inline asmjit::X86Mem spu_recompiler::XmmConst(const v128& data)
consts.emplace_back([=, this]
{
c->align(asmjit::kAlignData, 16);
c->align(asmjit::AlignMode::kData, 16);
c->bind(xmm_label);
c->dq(data._u64[0]);
c->dq(data._u64[1]);
@ -983,17 +978,17 @@ inline asmjit::X86Mem spu_recompiler::XmmConst(const v128& data)
return asmjit::x86::oword_ptr(xmm_label);
}
inline asmjit::X86Mem spu_recompiler::XmmConst(const __m128& data)
inline asmjit::x86::Mem spu_recompiler::XmmConst(const __m128& data)
{
return XmmConst(v128::fromF(data));
}
inline asmjit::X86Mem spu_recompiler::XmmConst(const __m128i& data)
inline asmjit::x86::Mem spu_recompiler::XmmConst(const __m128i& data)
{
return XmmConst(v128::fromV(data));
}
inline asmjit::X86Mem spu_recompiler::get_pc(u32 addr)
inline asmjit::x86::Mem spu_recompiler::get_pc(u32 addr)
{
return asmjit::x86::qword_ptr(*pc0, addr - m_base);
}
@ -1108,7 +1103,7 @@ void spu_recompiler::branch_indirect(spu_opcode_t op, bool jt, bool ret)
c->mov(SPU_OFF_32(pc), *addr);
c->mov(*arg0, *cpu);
c->add(x86::rsp, 0x28);
c->jmp(imm_ptr<void(*)(spu_thread*)>(_throw));
c->jmp(imm_ptr(+_throw));
// Save addr in srr0 and disable interrupts
c->bind(intr);
@ -1123,7 +1118,7 @@ void spu_recompiler::branch_indirect(spu_opcode_t op, bool jt, bool ret)
c->test(*addr, 0xff80007f);
c->cmovnz(*addr, rip->r32());
c->shr(*addr, 5);
c->align(kAlignCode, 16);
c->align(AlignMode::kCode, 16);
c->bind(no_intr);
}
@ -1210,7 +1205,7 @@ void spu_recompiler::branch_set_link(u32 target)
after.emplace_back([=, this, target = local->second]
{
// Clear return info after use
c->align(kAlignCode, 16);
c->align(AlignMode::kCode, 16);
c->bind(ret);
c->mov(qw1->r32(), SPU_OFF_32(gpr, 1, &v128::_u32, 3));
c->and_(qw1->r32(), 0x3fff0);
@ -1246,9 +1241,9 @@ void spu_recompiler::fall(spu_opcode_t op)
c->and_(*addr, 0x3fffc);
c->mov(SPU_OFF_32(pc), *addr);
c->mov(arg1->r32(), op.opcode);
c->mov(*qw0, asmjit::imm_ptr(asmjit::Internal::ptr_cast<void*>(g_spu_interpreter_fast.decode(op.opcode))));
c->mov(*qw0, asmjit::imm_ptr(g_spu_interpreter_fast.decode(op.opcode)));
c->mov(*arg0, *cpu);
c->call(asmjit::imm_ptr<void(*)(spu_thread*, u32, spu_inter_func_t)>(gate));
c->call(asmjit::imm_ptr(+gate));
}
void spu_recompiler::UNK(spu_opcode_t op)
@ -1266,7 +1261,7 @@ void spu_recompiler::UNK(spu_opcode_t op)
c->mov(arg1->r32(), op.opcode);
c->mov(*arg0, *cpu);
c->add(asmjit::x86::rsp, 0x28);
c->jmp(asmjit::imm_ptr<void(*)(spu_thread*, u32)>(gate));
c->jmp(asmjit::imm_ptr(+gate));
m_pos = -1;
}
@ -1295,7 +1290,7 @@ void spu_recompiler::STOP(spu_opcode_t op)
c->mov(arg1->r32(), op.opcode & 0x3fff);
c->mov(*arg0, *cpu);
c->call(imm_ptr(spu_stop));
c->align(kAlignCode, 16);
c->align(AlignMode::kCode, 16);
c->bind(ret);
c->add(SPU_OFF_32(pc), 4);
@ -1362,14 +1357,14 @@ void spu_recompiler::RDCH(spu_opcode_t op)
{
using namespace asmjit;
auto read_channel = [&](X86Mem channel_ptr, bool sync = true)
auto read_channel = [&](x86::Mem channel_ptr, bool sync = true)
{
Label wait = c->newLabel();
Label again = c->newLabel();
Label ret = c->newLabel();
c->mov(addr->r64(), channel_ptr);
c->xor_(qw0->r32(), qw0->r32());
c->align(kAlignCode, 16);
c->align(AlignMode::kCode, 16);
c->bind(again);
c->bt(addr->r64(), spu_channel::off_count);
c->jnc(wait);
@ -1380,7 +1375,7 @@ void spu_recompiler::RDCH(spu_opcode_t op)
c->lea(addr->r64(), get_pc(pos));
c->and_(*addr, 0x3fffc);
c->mov(SPU_OFF_32(pc), *addr);
c->mov(arg1->r32(), op.ra);
c->mov(arg1->r32(), +op.ra);
c->mov(*arg0, *cpu);
c->call(imm_ptr(spu_rdch));
c->jmp(ret);
@ -1482,13 +1477,12 @@ void spu_recompiler::RDCH(spu_opcode_t op)
*_res = v128::from32r(out);
};
using ftype = void (*)(spu_thread*, v128*);
c->lea(addr->r64(), get_pc(m_pos));
c->and_(*addr, 0x3fffc);
c->mov(SPU_OFF_32(pc), *addr);
c->lea(*arg1, SPU_OFF_128(gpr, op.rt));
c->mov(*arg0, *cpu);
c->call(g_cfg.core.spu_loop_detection ? asmjit::imm_ptr<ftype>(sub1) : asmjit::imm_ptr<ftype>(sub2));
c->call(asmjit::imm_ptr(g_cfg.core.spu_loop_detection ? +sub1 : +sub2));
return;
}
case SPU_RdEventMask:
@ -1523,7 +1517,7 @@ void spu_recompiler::RDCH(spu_opcode_t op)
c->lea(addr->r64(), get_pc(m_pos));
c->and_(*addr, 0x3fffc);
c->mov(SPU_OFF_32(pc), *addr);
c->mov(arg1->r32(), op.ra);
c->mov(arg1->r32(), +op.ra);
c->mov(*arg0, *cpu);
c->call(imm_ptr(spu_rdch));
c->movd(x86::xmm0, *addr);
@ -1540,7 +1534,7 @@ void spu_recompiler::RCHCNT(spu_opcode_t op)
{
using namespace asmjit;
auto ch_cnt = [&](X86Mem channel_ptr, bool inv = false)
auto ch_cnt = [&](x86::Mem channel_ptr, bool inv = false)
{
// Load channel count
const XmmLink& vr = XmmAlloc();
@ -1631,7 +1625,7 @@ void spu_recompiler::RCHCNT(spu_opcode_t op)
c->lea(addr->r64(), get_pc(m_pos));
c->and_(*addr, 0x3fffc);
c->mov(SPU_OFF_32(pc), *addr);
c->mov(arg1->r32(), op.ra);
c->mov(arg1->r32(), +op.ra);
c->mov(*arg0, *cpu);
c->call(imm_ptr(spu_rchcnt));
break;
@ -2320,7 +2314,7 @@ void spu_recompiler::WRCH(spu_opcode_t op)
Label ret = c->newLabel();
c->mov(qw0->r32(), SPU_OFF_32(gpr, op.rt, &v128::_u32, 3));
c->mov(addr->r64(), SPU_OFF_64(ch_out_mbox));
c->align(kAlignCode, 16);
c->align(AlignMode::kCode, 16);
c->bind(again);
c->mov(qw0->r32(), qw0->r32());
c->bt(addr->r64(), spu_channel::off_count);
@ -2332,7 +2326,7 @@ void spu_recompiler::WRCH(spu_opcode_t op)
c->lea(addr->r64(), get_pc(pos));
c->and_(*addr, 0x3fffc);
c->mov(SPU_OFF_32(pc), *addr);
c->mov(arg1->r32(), op.ra);
c->mov(arg1->r32(), +op.ra);
c->mov(*arg0, *cpu);
c->call(imm_ptr(spu_wrch));
c->jmp(ret);
@ -2359,7 +2353,7 @@ void spu_recompiler::WRCH(spu_opcode_t op)
c->lea(addr->r64(), get_pc(pos));
c->and_(*addr, 0x3fffc);
c->mov(SPU_OFF_32(pc), *addr);
c->lea(arg1->r32(), MFC_WrTagMask);
c->mov(arg1->r32(), MFC_WrTagMask);
c->mov(*arg0, *cpu);
c->call(imm_ptr(spu_wrch));
c->jmp(ret);
@ -2383,7 +2377,7 @@ void spu_recompiler::WRCH(spu_opcode_t op)
c->lea(addr->r64(), get_pc(pos));
c->and_(*addr, 0x3fffc);
c->mov(SPU_OFF_32(pc), *addr);
c->mov(arg1->r32(), op.ra);
c->mov(arg1->r32(), +op.ra);
c->mov(*arg0, *cpu);
c->call(imm_ptr(spu_wrch));
c->jmp(ret);
@ -2476,7 +2470,7 @@ void spu_recompiler::WRCH(spu_opcode_t op)
c->btr(SPU_OFF_32(ch_stall_mask), arg1->r32());
c->jnc(ret);
c->mov(*arg0, *cpu);
c->call(imm_ptr<void(*)(spu_thread*, u32)>(sub));
c->call(imm_ptr(+sub));
c->bind(ret);
return;
}
@ -2488,7 +2482,7 @@ void spu_recompiler::WRCH(spu_opcode_t op)
};
c->mov(*arg0, *cpu);
c->call(imm_ptr<void(*)(spu_thread*)>(sub));
c->call(imm_ptr(+sub));
c->mov(qw0->r32(), SPU_OFF_32(gpr, op.rt, &v128::_u32, 3));
c->mov(SPU_OFF_32(ch_dec_value), qw0->r32());
return;
@ -2515,7 +2509,7 @@ void spu_recompiler::WRCH(spu_opcode_t op)
c->lea(addr->r64(), get_pc(m_pos));
c->and_(*addr, 0x3fffc);
c->mov(SPU_OFF_32(pc), *addr);
c->mov(arg1->r32(), op.ra);
c->mov(arg1->r32(), +op.ra);
c->mov(qw0->r32(), SPU_OFF_32(gpr, op.rt, &v128::_u32, 3));
c->mov(*arg0, *cpu);
c->call(imm_ptr(spu_wrch));
@ -2529,7 +2523,7 @@ void spu_recompiler::BIZ(spu_opcode_t op)
after.emplace_back([=, this, jt = m_targets[m_pos].size() > 1]
{
c->align(asmjit::kAlignCode, 16);
c->align(asmjit::AlignMode::kCode, 16);
c->bind(branch_label);
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
c->and_(*addr, 0x3fffc);
@ -2545,7 +2539,7 @@ void spu_recompiler::BINZ(spu_opcode_t op)
after.emplace_back([=, this, jt = m_targets[m_pos].size() > 1]
{
c->align(asmjit::kAlignCode, 16);
c->align(asmjit::AlignMode::kCode, 16);
c->bind(branch_label);
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
c->and_(*addr, 0x3fffc);
@ -2561,7 +2555,7 @@ void spu_recompiler::BIHZ(spu_opcode_t op)
after.emplace_back([=, this, jt = m_targets[m_pos].size() > 1]
{
c->align(asmjit::kAlignCode, 16);
c->align(asmjit::AlignMode::kCode, 16);
c->bind(branch_label);
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
c->and_(*addr, 0x3fffc);
@ -2577,7 +2571,7 @@ void spu_recompiler::BIHNZ(spu_opcode_t op)
after.emplace_back([=, this, jt = m_targets[m_pos].size() > 1]
{
c->align(asmjit::kAlignCode, 16);
c->align(asmjit::AlignMode::kCode, 16);
c->bind(branch_label);
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
c->and_(*addr, 0x3fffc);
@ -2669,13 +2663,13 @@ void spu_recompiler::BISLED(spu_opcode_t op)
asmjit::Label branch_label = c->newLabel();
c->mov(*arg0, *cpu);
c->call(asmjit::imm_ptr<u32(*)(spu_thread*)>(get_events));
c->call(asmjit::imm_ptr(+get_events));
c->test(*addr, 1);
c->jne(branch_label);
after.emplace_back([=, this]()
{
c->align(asmjit::kAlignCode, 16);
c->align(asmjit::AlignMode::kCode, 16);
c->bind(branch_label);
c->and_(*addr, 0x3fffc);
branch_indirect(op, true, false);
@ -2895,7 +2889,7 @@ void spu_recompiler::CDX(spu_opcode_t op)
const XmmLink& vr = XmmAlloc();
c->movdqa(vr, XmmConst(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f)));
c->movdqa(SPU_OFF_128(gpr, op.rt), vr);
c->mov(*qw0, asmjit::imm_u(0x0001020304050607));
c->mov(*qw0, asmjit::Imm(0x0001020304050607ull));
c->mov(asmjit::x86::qword_ptr(*cpu, addr->r64(), 0, offset32(&spu_thread::gpr, op.rt)), *qw0);
}
@ -3028,7 +3022,7 @@ void spu_recompiler::CBD(spu_opcode_t op)
//}
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
if (op.i7) c->add(*addr, op.i7);
if (op.i7) c->add(*addr, +op.i7);
c->not_(*addr);
c->and_(*addr, 0xf);
@ -3052,7 +3046,7 @@ void spu_recompiler::CHD(spu_opcode_t op)
//}
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
if (op.i7) c->add(*addr, op.i7);
if (op.i7) c->add(*addr, +op.i7);
c->not_(*addr);
c->and_(*addr, 0xe);
@ -3076,7 +3070,7 @@ void spu_recompiler::CWD(spu_opcode_t op)
//}
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
if (op.i7) c->add(*addr, op.i7);
if (op.i7) c->add(*addr, +op.i7);
c->not_(*addr);
c->and_(*addr, 0xc);
@ -3100,14 +3094,14 @@ void spu_recompiler::CDD(spu_opcode_t op)
//}
c->mov(*addr, SPU_OFF_32(gpr, op.ra, &v128::_u32, 3));
if (op.i7) c->add(*addr, op.i7);
if (op.i7) c->add(*addr, +op.i7);
c->not_(*addr);
c->and_(*addr, 0x8);
const XmmLink& vr = XmmAlloc();
c->movdqa(vr, XmmConst(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f)));
c->movdqa(SPU_OFF_128(gpr, op.rt), vr);
c->mov(*qw0, asmjit::imm_u(0x0001020304050607));
c->mov(*qw0, asmjit::Imm(0x0001020304050607ull));
c->mov(asmjit::x86::qword_ptr(*cpu, addr->r64(), 0, offset32(&spu_thread::gpr, op.rt)), *qw0);
}
@ -4048,7 +4042,7 @@ void spu_recompiler::BRZ(spu_opcode_t op)
after.emplace_back([=, this]()
{
c->align(asmjit::kAlignCode, 16);
c->align(asmjit::AlignMode::kCode, 16);
c->bind(branch_label);
branch_fixed(target);
});
@ -4088,7 +4082,7 @@ void spu_recompiler::BRNZ(spu_opcode_t op)
after.emplace_back([=, this]()
{
c->align(asmjit::kAlignCode, 16);
c->align(asmjit::AlignMode::kCode, 16);
c->bind(branch_label);
branch_fixed(target);
});
@ -4109,7 +4103,7 @@ void spu_recompiler::BRHZ(spu_opcode_t op)
after.emplace_back([=, this]()
{
c->align(asmjit::kAlignCode, 16);
c->align(asmjit::AlignMode::kCode, 16);
c->bind(branch_label);
branch_fixed(target);
});
@ -4130,7 +4124,7 @@ void spu_recompiler::BRHNZ(spu_opcode_t op)
after.emplace_back([=, this]()
{
c->align(asmjit::kAlignCode, 16);
c->align(asmjit::AlignMode::kCode, 16);
c->bind(branch_label);
branch_fixed(target);
});
@ -4459,7 +4453,7 @@ void spu_recompiler::CGTBI(spu_opcode_t op)
void spu_recompiler::HGTI(spu_opcode_t op)
{
c->cmp(SPU_OFF_32(gpr, op.ra, &v128::_s32, 3), op.si10);
c->cmp(SPU_OFF_32(gpr, op.ra, &v128::_s32, 3), +op.si10);
asmjit::Label label = c->newLabel();
asmjit::Label ret = c->newLabel();
@ -4503,7 +4497,7 @@ void spu_recompiler::CLGTBI(spu_opcode_t op)
void spu_recompiler::HLGTI(spu_opcode_t op)
{
c->cmp(SPU_OFF_32(gpr, op.ra, &v128::_u32, 3), op.si10);
c->cmp(SPU_OFF_32(gpr, op.ra, &v128::_u32, 3), +op.si10);
asmjit::Label label = c->newLabel();
asmjit::Label ret = c->newLabel();
@ -4565,7 +4559,7 @@ void spu_recompiler::CEQBI(spu_opcode_t op)
void spu_recompiler::HEQI(spu_opcode_t op)
{
c->cmp(SPU_OFF_32(gpr, op.ra, &v128::_u32, 3), op.si10);
c->cmp(SPU_OFF_32(gpr, op.ra, &v128::_u32, 3), +op.si10);
asmjit::Label label = c->newLabel();
asmjit::Label ret = c->newLabel();
@ -4636,12 +4630,12 @@ void spu_recompiler::SHUFB(spu_opcode_t op)
c->vpcmpub(asmjit::x86::k1, vc, XmmConst(_mm_set1_epi8(-0x40)), 5 /* GE */);
c->vpxor(vm, vc, XmmConst(_mm_set1_epi8(0xf)));
c->setExtraReg(asmjit::x86::k1);
c->z().vblendmb(vc, vc, XmmConst(_mm_set1_epi8(-1))); // {k1}
c->z().vpblendmb(vc, vc, XmmConst(_mm_set1_epi8(-1))); // {k1}
c->vpcmpub(asmjit::x86::k2, vm, XmmConst(_mm_set1_epi8(-0x20)), 5 /* GE */);
c->vptestmb(asmjit::x86::k1, vm, XmmConst(_mm_set1_epi8(0x10)));
c->vpshufb(vt, va, vm);
c->setExtraReg(asmjit::x86::k2);
c->z().vblendmb(va, va, XmmConst(_mm_set1_epi8(0x7f))); // {k2}
c->z().vpblendmb(va, va, XmmConst(_mm_set1_epi8(0x7f))); // {k2}
c->setExtraReg(asmjit::x86::k1);
c->vpshufb(vt, vb, vm); // {k1}
c->vpternlogd(vt, va, vc, 0xf6 /* orAxorBC */);

View File

@ -24,23 +24,23 @@ private:
u32 m_base;
// emitter:
asmjit::X86Assembler* c;
asmjit::x86::Assembler* c;
// arguments:
const asmjit::X86Gp* cpu;
const asmjit::X86Gp* ls;
const asmjit::X86Gp* rip;
const asmjit::X86Gp* pc0;
const asmjit::x86::Gp* cpu;
const asmjit::x86::Gp* ls;
const asmjit::x86::Gp* rip;
const asmjit::x86::Gp* pc0;
// Native args or temp variables:
const asmjit::X86Gp* arg0;
const asmjit::X86Gp* arg1;
const asmjit::X86Gp* qw0;
const asmjit::X86Gp* qw1;
const asmjit::x86::Gp* arg0;
const asmjit::x86::Gp* arg1;
const asmjit::x86::Gp* qw0;
const asmjit::x86::Gp* qw1;
// temporary:
const asmjit::X86Gp* addr;
std::array<const asmjit::X86Xmm*, 16> vec;
const asmjit::x86::Gp* addr;
std::array<const asmjit::x86::Xmm*, 16> vec;
// workload for the end of function:
std::vector<std::function<void()>> after;
@ -60,10 +60,10 @@ private:
class XmmLink
{
const asmjit::X86Xmm* m_var;
const asmjit::x86::Xmm* m_var;
public:
XmmLink(const asmjit::X86Xmm*& xmm_var)
XmmLink(const asmjit::x86::Xmm*& xmm_var)
: m_var(xmm_var)
{
xmm_var = nullptr;
@ -71,7 +71,7 @@ private:
XmmLink(XmmLink&&) = default; // MoveConstructible + delete copy constructor and copy/move operators
operator const asmjit::X86Xmm&() const
operator const asmjit::x86::Xmm&() const
{
return *m_var;
}
@ -87,11 +87,11 @@ private:
XmmLink XmmAlloc();
XmmLink XmmGet(s8 reg, XmmType type);
asmjit::X86Mem XmmConst(const v128& data);
asmjit::X86Mem XmmConst(const __m128& data);
asmjit::X86Mem XmmConst(const __m128i& data);
asmjit::x86::Mem XmmConst(const v128& data);
asmjit::x86::Mem XmmConst(const __m128& data);
asmjit::x86::Mem XmmConst(const __m128i& data);
asmjit::X86Mem get_pc(u32 addr);
asmjit::x86::Mem get_pc(u32 addr);
void branch_fixed(u32 target, bool absolute = false);
void branch_indirect(spu_opcode_t op, bool jt = false, bool ret = true);
void branch_set_link(u32 target);

View File

@ -43,7 +43,7 @@ namespace asmjit
static constexpr spu_opcode_t s_op{};
template <uint I, uint N>
static void build_spu_gpr_load(X86Assembler& c, X86Xmm x, const bf_t<u32, I, N>&, bool store = false)
static void build_spu_gpr_load(x86::Assembler& c, x86::Xmm x, const bf_t<u32, I, N>&, bool store = false)
{
static_assert(N == 7, "Invalid bitfield");
@ -87,7 +87,7 @@ namespace asmjit
}
template <uint I, uint N>
static void build_spu_gpr_store(X86Assembler& c, X86Xmm x, const bf_t<u32, I, N>&, bool store = true)
static void build_spu_gpr_store(x86::Assembler& c, x86::Xmm x, const bf_t<u32, I, N>&, bool store = true)
{
build_spu_gpr_load(c, x, bf_t<u32, I, N>{}, store);
}
@ -1733,7 +1733,7 @@ bool spu_interpreter::SHUFB(spu_thread& spu, spu_opcode_t op)
return true;
}
const spu_inter_func_t optimized_shufb = build_function_asm<spu_inter_func_t>("spu_shufb", [](asmjit::X86Assembler& c, auto& /*args*/)
const spu_inter_func_t optimized_shufb = build_function_asm<spu_inter_func_t>("spu_shufb", [](asmjit::x86::Assembler& c, auto& /*args*/)
{
using namespace asmjit;
@ -1793,7 +1793,7 @@ const spu_inter_func_t optimized_shufb = build_function_asm<spu_inter_func_t>("s
c.mov(x86::eax, 1);
c.ret();
c.align(kAlignData, 16);
c.align(AlignMode::kData, 16);
c.bind(xc0);
c.dq(0xc0c0c0c0c0c0c0c0);
c.dq(0xc0c0c0c0c0c0c0c0);

View File

@ -160,7 +160,7 @@ DECLARE(spu_runtime::tr_all) = []
return reinterpret_cast<spu_function_t>(trptr);
}();
DECLARE(spu_runtime::g_gateway) = built_function<spu_function_t>("spu_gateway", [](asmjit::X86Assembler& c, auto& args)
DECLARE(spu_runtime::g_gateway) = built_function<spu_function_t>("spu_gateway", [](asmjit::x86::Assembler& c, auto& args)
{
// Gateway for SPU dispatcher, converts from native to GHC calling convention, also saves RSP value for spu_escape
using namespace asmjit;
@ -249,7 +249,7 @@ DECLARE(spu_runtime::g_gateway) = built_function<spu_function_t>("spu_gateway",
c.ret();
});
DECLARE(spu_runtime::g_escape) = build_function_asm<void(*)(spu_thread*)>("spu_escape", [](asmjit::X86Assembler& c, auto& args)
DECLARE(spu_runtime::g_escape) = build_function_asm<void(*)(spu_thread*)>("spu_escape", [](asmjit::x86::Assembler& c, auto& args)
{
using namespace asmjit;
@ -261,7 +261,7 @@ DECLARE(spu_runtime::g_escape) = build_function_asm<void(*)(spu_thread*)>("spu_e
c.ret();
});
DECLARE(spu_runtime::g_tail_escape) = build_function_asm<void(*)(spu_thread*, spu_function_t, u8*)>("spu_tail_escape", [](asmjit::X86Assembler& c, auto& args)
DECLARE(spu_runtime::g_tail_escape) = build_function_asm<void(*)(spu_thread*, spu_function_t, u8*)>("spu_tail_escape", [](asmjit::x86::Assembler& c, auto& args)
{
using namespace asmjit;

View File

@ -405,7 +405,7 @@ std::array<u32, 2> op_branch_targets(u32 pc, spu_opcode_t op)
return res;
}
const auto spu_putllc_tx = built_function<u64(*)(u32 raddr, u64 rtime, void* _old, const void* _new)>("spu_putllc_tx", [](asmjit::X86Assembler& c, auto& args)
const auto spu_putllc_tx = built_function<u64(*)(u32 raddr, u64 rtime, void* _old, const void* _new)>("spu_putllc_tx", [](asmjit::x86::Assembler& c, auto& args)
{
using namespace asmjit;
@ -455,14 +455,14 @@ const auto spu_putllc_tx = built_function<u64(*)(u32 raddr, u64 rtime, void* _ol
// Prepare data
if (s_tsx_avx)
{
c.vmovups(x86::ymm0, x86::yword_ptr(args[2], 0));
c.vmovups(x86::ymm1, x86::yword_ptr(args[2], 32));
c.vmovups(x86::ymm2, x86::yword_ptr(args[2], 64));
c.vmovups(x86::ymm3, x86::yword_ptr(args[2], 96));
c.vmovups(x86::ymm4, x86::yword_ptr(args[3], 0));
c.vmovups(x86::ymm5, x86::yword_ptr(args[3], 32));
c.vmovups(x86::ymm6, x86::yword_ptr(args[3], 64));
c.vmovups(x86::ymm7, x86::yword_ptr(args[3], 96));
c.vmovups(x86::ymm0, x86::ymmword_ptr(args[2], 0));
c.vmovups(x86::ymm1, x86::ymmword_ptr(args[2], 32));
c.vmovups(x86::ymm2, x86::ymmword_ptr(args[2], 64));
c.vmovups(x86::ymm3, x86::ymmword_ptr(args[2], 96));
c.vmovups(x86::ymm4, x86::ymmword_ptr(args[3], 0));
c.vmovups(x86::ymm5, x86::ymmword_ptr(args[3], 32));
c.vmovups(x86::ymm6, x86::ymmword_ptr(args[3], 64));
c.vmovups(x86::ymm7, x86::ymmword_ptr(args[3], 96));
}
else
{
@ -506,10 +506,10 @@ const auto spu_putllc_tx = built_function<u64(*)(u32 raddr, u64 rtime, void* _ol
if (s_tsx_avx)
{
c.vxorps(x86::ymm0, x86::ymm0, x86::yword_ptr(args[1], 0));
c.vxorps(x86::ymm1, x86::ymm1, x86::yword_ptr(args[1], 32));
c.vxorps(x86::ymm2, x86::ymm2, x86::yword_ptr(args[1], 64));
c.vxorps(x86::ymm3, x86::ymm3, x86::yword_ptr(args[1], 96));
c.vxorps(x86::ymm0, x86::ymm0, x86::ymmword_ptr(args[1], 0));
c.vxorps(x86::ymm1, x86::ymm1, x86::ymmword_ptr(args[1], 32));
c.vxorps(x86::ymm2, x86::ymm2, x86::ymmword_ptr(args[1], 64));
c.vxorps(x86::ymm3, x86::ymm3, x86::ymmword_ptr(args[1], 96));
c.vorps(x86::ymm0, x86::ymm0, x86::ymm1);
c.vorps(x86::ymm1, x86::ymm2, x86::ymm3);
c.vorps(x86::ymm0, x86::ymm1, x86::ymm0);
@ -539,10 +539,10 @@ const auto spu_putllc_tx = built_function<u64(*)(u32 raddr, u64 rtime, void* _ol
if (s_tsx_avx)
{
c.vmovaps(x86::yword_ptr(args[1], 0), x86::ymm4);
c.vmovaps(x86::yword_ptr(args[1], 32), x86::ymm5);
c.vmovaps(x86::yword_ptr(args[1], 64), x86::ymm6);
c.vmovaps(x86::yword_ptr(args[1], 96), x86::ymm7);
c.vmovaps(x86::ymmword_ptr(args[1], 0), x86::ymm4);
c.vmovaps(x86::ymmword_ptr(args[1], 32), x86::ymm5);
c.vmovaps(x86::ymmword_ptr(args[1], 64), x86::ymm6);
c.vmovaps(x86::ymmword_ptr(args[1], 96), x86::ymm7);
}
else
{
@ -569,10 +569,10 @@ const auto spu_putllc_tx = built_function<u64(*)(u32 raddr, u64 rtime, void* _ol
// Load previous data to store back to rdata
if (s_tsx_avx)
{
c.vmovaps(x86::ymm0, x86::yword_ptr(args[1], 0));
c.vmovaps(x86::ymm1, x86::yword_ptr(args[1], 32));
c.vmovaps(x86::ymm2, x86::yword_ptr(args[1], 64));
c.vmovaps(x86::ymm3, x86::yword_ptr(args[1], 96));
c.vmovaps(x86::ymm0, x86::ymmword_ptr(args[1], 0));
c.vmovaps(x86::ymm1, x86::ymmword_ptr(args[1], 32));
c.vmovaps(x86::ymm2, x86::ymmword_ptr(args[1], 64));
c.vmovaps(x86::ymm3, x86::ymmword_ptr(args[1], 96));
}
else
{
@ -601,10 +601,10 @@ const auto spu_putllc_tx = built_function<u64(*)(u32 raddr, u64 rtime, void* _ol
// Store previous data back to rdata
if (s_tsx_avx)
{
c.vmovaps(x86::yword_ptr(args[2], 0), x86::ymm0);
c.vmovaps(x86::yword_ptr(args[2], 32), x86::ymm1);
c.vmovaps(x86::yword_ptr(args[2], 64), x86::ymm2);
c.vmovaps(x86::yword_ptr(args[2], 96), x86::ymm3);
c.vmovaps(x86::ymmword_ptr(args[2], 0), x86::ymm0);
c.vmovaps(x86::ymmword_ptr(args[2], 32), x86::ymm1);
c.vmovaps(x86::ymmword_ptr(args[2], 64), x86::ymm2);
c.vmovaps(x86::ymmword_ptr(args[2], 96), x86::ymm3);
}
else
{
@ -663,7 +663,7 @@ const auto spu_putllc_tx = built_function<u64(*)(u32 raddr, u64 rtime, void* _ol
c.ret();
});
const auto spu_putlluc_tx = built_function<u64(*)(u32 raddr, const void* rdata, u64* _stx, u64* _ftx)>("spu_putlluc_tx", [](asmjit::X86Assembler& c, auto& args)
const auto spu_putlluc_tx = built_function<u64(*)(u32 raddr, const void* rdata, u64* _stx, u64* _ftx)>("spu_putlluc_tx", [](asmjit::x86::Assembler& c, auto& args)
{
using namespace asmjit;
@ -694,10 +694,10 @@ const auto spu_putlluc_tx = built_function<u64(*)(u32 raddr, const void* rdata,
// Prepare data
if (s_tsx_avx)
{
c.vmovups(x86::ymm0, x86::yword_ptr(args[1], 0));
c.vmovups(x86::ymm1, x86::yword_ptr(args[1], 32));
c.vmovups(x86::ymm2, x86::yword_ptr(args[1], 64));
c.vmovups(x86::ymm3, x86::yword_ptr(args[1], 96));
c.vmovups(x86::ymm0, x86::ymmword_ptr(args[1], 0));
c.vmovups(x86::ymm1, x86::ymmword_ptr(args[1], 32));
c.vmovups(x86::ymm2, x86::ymmword_ptr(args[1], 64));
c.vmovups(x86::ymm3, x86::ymmword_ptr(args[1], 96));
}
else
{
@ -733,10 +733,10 @@ const auto spu_putlluc_tx = built_function<u64(*)(u32 raddr, const void* rdata,
if (s_tsx_avx)
{
c.vmovaps(x86::yword_ptr(x86::r11, 0), x86::ymm0);
c.vmovaps(x86::yword_ptr(x86::r11, 32), x86::ymm1);
c.vmovaps(x86::yword_ptr(x86::r11, 64), x86::ymm2);
c.vmovaps(x86::yword_ptr(x86::r11, 96), x86::ymm3);
c.vmovaps(x86::ymmword_ptr(x86::r11, 0), x86::ymm0);
c.vmovaps(x86::ymmword_ptr(x86::r11, 32), x86::ymm1);
c.vmovaps(x86::ymmword_ptr(x86::r11, 64), x86::ymm2);
c.vmovaps(x86::ymmword_ptr(x86::r11, 96), x86::ymm3);
}
else
{
@ -789,7 +789,7 @@ const auto spu_putlluc_tx = built_function<u64(*)(u32 raddr, const void* rdata,
c.ret();
});
const auto spu_getllar_tx = built_function<u64(*)(u32 raddr, void* rdata, cpu_thread* _cpu, u64 rtime)>("spu_getllar_tx", [](asmjit::X86Assembler& c, auto& args)
const auto spu_getllar_tx = built_function<u64(*)(u32 raddr, void* rdata, cpu_thread* _cpu, u64 rtime)>("spu_getllar_tx", [](asmjit::x86::Assembler& c, auto& args)
{
using namespace asmjit;
@ -847,10 +847,10 @@ const auto spu_getllar_tx = built_function<u64(*)(u32 raddr, void* rdata, cpu_th
// Just read data to registers
if (s_tsx_avx)
{
c.vmovups(x86::ymm0, x86::yword_ptr(x86::rbp, 0));
c.vmovups(x86::ymm1, x86::yword_ptr(x86::rbp, 32));
c.vmovups(x86::ymm2, x86::yword_ptr(x86::rbp, 64));
c.vmovups(x86::ymm3, x86::yword_ptr(x86::rbp, 96));
c.vmovups(x86::ymm0, x86::ymmword_ptr(x86::rbp, 0));
c.vmovups(x86::ymm1, x86::ymmword_ptr(x86::rbp, 32));
c.vmovups(x86::ymm2, x86::ymmword_ptr(x86::rbp, 64));
c.vmovups(x86::ymm3, x86::ymmword_ptr(x86::rbp, 96));
}
else
{
@ -872,10 +872,10 @@ const auto spu_getllar_tx = built_function<u64(*)(u32 raddr, void* rdata, cpu_th
// Store data
if (s_tsx_avx)
{
c.vmovaps(x86::yword_ptr(args[1], 0), x86::ymm0);
c.vmovaps(x86::yword_ptr(args[1], 32), x86::ymm1);
c.vmovaps(x86::yword_ptr(args[1], 64), x86::ymm2);
c.vmovaps(x86::yword_ptr(args[1], 96), x86::ymm3);
c.vmovaps(x86::ymmword_ptr(args[1], 0), x86::ymm0);
c.vmovaps(x86::ymmword_ptr(args[1], 32), x86::ymm1);
c.vmovaps(x86::ymmword_ptr(args[1], 64), x86::ymm2);
c.vmovaps(x86::ymmword_ptr(args[1], 96), x86::ymm3);
}
else
{

View File

@ -144,7 +144,7 @@ namespace
}
template <bool Compare, int Size, typename RT>
void build_copy_data_swap_u32_avx3(asmjit::X86Assembler& c, std::array<asmjit::X86Gp, 4>& args, const RT& rmask, const RT& rload, const RT& rtest)
void build_copy_data_swap_u32_avx3(asmjit::x86::Assembler& c, std::array<asmjit::x86::Gp, 4>& args, const RT& rmask, const RT& rload, const RT& rtest)
{
using namespace asmjit;
@ -173,26 +173,26 @@ namespace
c.and_(args[0], -Size * 4);
c.add(args[2].r32(), args[3].r32());
c.k(x86::k1).z().vmovdqu32(rload, X86Mem(args[1], 0, Size * 4u));
c.k(x86::k1).z().vmovdqu32(rload, x86::Mem(args[1], 0, Size * 4u));
c.vpshufb(rload, rload, rmask);
if (Compare)
c.k(x86::k1).z().vpxord(rtest, rload, X86Mem(args[0], 0, Size * 4u));
c.k(x86::k1).vmovdqa32(X86Mem(args[0], 0, Size * 4u), rload);
c.k(x86::k1).z().vpxord(rtest, rload, x86::Mem(args[0], 0, Size * 4u));
c.k(x86::k1).vmovdqa32(x86::Mem(args[0], 0, Size * 4u), rload);
c.lea(args[0], x86::qword_ptr(args[0], Size * 4));
c.lea(args[1], x86::qword_ptr(args[1], Size * 4));
c.sub(args[2].r32(), Size);
c.or_(x86::eax, -1);
c.align(kAlignCode, 16);
c.align(AlignMode::kCode, 16);
c.bind(loop);
c.cmp(args[2].r32(), Size);
c.jbe(tail);
c.vmovdqu32(rload, X86Mem(args[1], 0, Size * 4u));
c.vmovdqu32(rload, x86::Mem(args[1], 0, Size * 4u));
c.vpshufb(rload, rload, rmask);
if (Compare)
c.vpternlogd(rtest, rload, X86Mem(args[0], 0, Size * 4u), 0xf6); // orAxorBC
c.vmovdqa32(X86Mem(args[0], 0, Size * 4u), rload);
c.vpternlogd(rtest, rload, x86::Mem(args[0], 0, Size * 4u), 0xf6); // orAxorBC
c.vmovdqa32(x86::Mem(args[0], 0, Size * 4u), rload);
c.lea(args[0], x86::qword_ptr(args[0], Size * 4));
c.lea(args[1], x86::qword_ptr(args[1], Size * 4));
c.sub(args[2].r32(), Size);
@ -202,11 +202,11 @@ namespace
c.shlx(x86::eax, x86::eax, args[2].r32());
c.not_(x86::eax);
c.kmovw(x86::k1, x86::eax);
c.k(x86::k1).z().vmovdqu32(rload, X86Mem(args[1], 0, Size * 4u));
c.k(x86::k1).z().vmovdqu32(rload, x86::Mem(args[1], 0, Size * 4u));
c.vpshufb(rload, rload, rmask);
if (Compare)
c.k(x86::k1).vpternlogd(rtest, rload, X86Mem(args[0], 0, Size * 4u), 0xf6);
c.k(x86::k1).vmovdqu32(X86Mem(args[0], 0, Size * 4u), rload);
c.k(x86::k1).vpternlogd(rtest, rload, x86::Mem(args[0], 0, Size * 4u), 0xf6);
c.k(x86::k1).vmovdqu32(x86::Mem(args[0], 0, Size * 4u), rload);
if (Compare)
{
@ -230,7 +230,7 @@ namespace
}
template <bool Compare>
void build_copy_data_swap_u32(asmjit::X86Assembler& c, std::array<asmjit::X86Gp, 4>& args)
void build_copy_data_swap_u32(asmjit::x86::Assembler& c, std::array<asmjit::x86::Gp, 4>& args)
{
using namespace asmjit;