Compilation fixes

This commit is contained in:
Nekotekina 2014-04-25 16:48:27 +04:00
parent 15b0b6830e
commit 555d053e32
4 changed files with 49 additions and 8 deletions

View File

@ -22,6 +22,7 @@
<ClCompile Include="asmjit\src\asmjit\base\assembler.cpp" /> <ClCompile Include="asmjit\src\asmjit\base\assembler.cpp" />
<ClCompile Include="asmjit\src\asmjit\base\codegen.cpp" /> <ClCompile Include="asmjit\src\asmjit\base\codegen.cpp" />
<ClCompile Include="asmjit\src\asmjit\base\compiler.cpp" /> <ClCompile Include="asmjit\src\asmjit\base\compiler.cpp" />
<ClCompile Include="asmjit\src\asmjit\base\constpool.cpp" />
<ClCompile Include="asmjit\src\asmjit\base\context.cpp" /> <ClCompile Include="asmjit\src\asmjit\base\context.cpp" />
<ClCompile Include="asmjit\src\asmjit\base\cpuinfo.cpp" /> <ClCompile Include="asmjit\src\asmjit\base\cpuinfo.cpp" />
<ClCompile Include="asmjit\src\asmjit\base\cputicks.cpp" /> <ClCompile Include="asmjit\src\asmjit\base\cputicks.cpp" />
@ -44,6 +45,9 @@
<ClCompile Include="asmjit\src\asmjit\x86\x86defs.cpp" /> <ClCompile Include="asmjit\src\asmjit\x86\x86defs.cpp" />
<ClCompile Include="asmjit\src\asmjit\x86\x86func.cpp" /> <ClCompile Include="asmjit\src\asmjit\x86\x86func.cpp" />
</ItemGroup> </ItemGroup>
<ItemGroup>
<ClInclude Include="asmjit\src\asmjit\base\constpool.h" />
</ItemGroup>
<PropertyGroup Label="Globals"> <PropertyGroup Label="Globals">
<ProjectGuid>{AC40FF01-426E-4838-A317-66354CEFAE88}</ProjectGuid> <ProjectGuid>{AC40FF01-426E-4838-A317-66354CEFAE88}</ProjectGuid>
<RootNamespace>asmjit</RootNamespace> <RootNamespace>asmjit</RootNamespace>

View File

@ -25,5 +25,9 @@
<ClCompile Include="asmjit\src\asmjit\base\string.cpp" /> <ClCompile Include="asmjit\src\asmjit\base\string.cpp" />
<ClCompile Include="asmjit\src\asmjit\base\vmem.cpp" /> <ClCompile Include="asmjit\src\asmjit\base\vmem.cpp" />
<ClCompile Include="asmjit\src\asmjit\base\zone.cpp" /> <ClCompile Include="asmjit\src\asmjit\base\zone.cpp" />
<ClCompile Include="asmjit\src\asmjit\base\constpool.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="asmjit\src\asmjit\base\constpool.h" />
</ItemGroup> </ItemGroup>
</Project> </Project>

View File

@ -16,7 +16,7 @@ using namespace asmjit::host;
struct g_imm_table_struct struct g_imm_table_struct
{ {
u16 cntb_table[65536]; //u16 cntb_table[65536];
__m128i fsmb_table[65536]; __m128i fsmb_table[65536];
__m128i fsmh_table[256]; __m128i fsmh_table[256];
@ -28,7 +28,7 @@ struct g_imm_table_struct
g_imm_table_struct() g_imm_table_struct()
{ {
static_assert(offsetof(g_imm_table_struct, cntb_table) == 0, "offsetof(cntb_table) != 0"); /*static_assert(offsetof(g_imm_table_struct, cntb_table) == 0, "offsetof(cntb_table) != 0");
for (u32 i = 0; i < sizeof(cntb_table) / sizeof(cntb_table[0]); i++) for (u32 i = 0; i < sizeof(cntb_table) / sizeof(cntb_table[0]); i++)
{ {
u32 cnt_low = 0, cnt_high = 0; u32 cnt_low = 0, cnt_high = 0;
@ -38,7 +38,7 @@ struct g_imm_table_struct
cnt_high += (i >> (j + 8)) & 1; cnt_high += (i >> (j + 8)) & 1;
} }
cntb_table[i] = (cnt_high << 8) | cnt_low; cntb_table[i] = (cnt_high << 8) | cnt_low;
} }*/
for (u32 i = 0; i < sizeof(fsm_table) / sizeof(fsm_table[0]); i++) for (u32 i = 0; i < sizeof(fsm_table) / sizeof(fsm_table[0]); i++)
{ {
for (u32 j = 0; j < 4; j++) fsm_table[i].m128i_u32[j] = (i & (1 << j)) ? ~0 : 0; for (u32 j = 0; j < 4; j++) fsm_table[i].m128i_u32[j] = (i & (1 << j)) ? ~0 : 0;
@ -660,6 +660,7 @@ private:
} }
void ROTMA(u32 rt, u32 ra, u32 rb) void ROTMA(u32 rt, u32 ra, u32 rb)
{ {
#ifdef _M_X64
XmmInvalidate(rt); XmmInvalidate(rt);
for (u32 i = 0; i < 4; i++) for (u32 i = 0; i < 4; i++)
{ {
@ -670,6 +671,14 @@ private:
c.mov(cpu_dword(GPR[rt]._u32[i]), qw0->r32()); c.mov(cpu_dword(GPR[rt]._u32[i]), qw0->r32());
} }
LOG_OPCODE(); LOG_OPCODE();
#else
WRAPPER_BEGIN(rt, ra, rb, zz);
CPU.GPR[rt]._i32[0] = ((0 - CPU.GPR[rb]._u32[0]) & 0x3f) < 32 ? CPU.GPR[ra]._i32[0] >> ((0 - CPU.GPR[rb]._u32[0]) & 0x3f) : CPU.GPR[ra]._i32[0] >> 31;
CPU.GPR[rt]._i32[1] = ((0 - CPU.GPR[rb]._u32[1]) & 0x3f) < 32 ? CPU.GPR[ra]._i32[1] >> ((0 - CPU.GPR[rb]._u32[1]) & 0x3f) : CPU.GPR[ra]._i32[1] >> 31;
CPU.GPR[rt]._i32[2] = ((0 - CPU.GPR[rb]._u32[2]) & 0x3f) < 32 ? CPU.GPR[ra]._i32[2] >> ((0 - CPU.GPR[rb]._u32[2]) & 0x3f) : CPU.GPR[ra]._i32[2] >> 31;
CPU.GPR[rt]._i32[3] = ((0 - CPU.GPR[rb]._u32[3]) & 0x3f) < 32 ? CPU.GPR[ra]._i32[3] >> ((0 - CPU.GPR[rb]._u32[3]) & 0x3f) : CPU.GPR[ra]._i32[3] >> 31;
WRAPPER_END(rt, ra, rb, 0);
#endif
} }
void SHL(u32 rt, u32 ra, u32 rb) void SHL(u32 rt, u32 ra, u32 rb)
{ {
@ -1922,12 +1931,19 @@ private:
} }
void XSWD(u32 rt, u32 ra) void XSWD(u32 rt, u32 ra)
{ {
#ifdef _M_X64
c.movsxd(*qw0, cpu_dword(GPR[ra]._i32[0])); c.movsxd(*qw0, cpu_dword(GPR[ra]._i32[0]));
c.movsxd(*qw1, cpu_dword(GPR[ra]._i32[2])); c.movsxd(*qw1, cpu_dword(GPR[ra]._i32[2]));
c.mov(cpu_qword(GPR[rt]._i64[0]), *qw0); c.mov(cpu_qword(GPR[rt]._i64[0]), *qw0);
c.mov(cpu_qword(GPR[rt]._i64[1]), *qw1); c.mov(cpu_qword(GPR[rt]._i64[1]), *qw1);
XmmInvalidate(rt); XmmInvalidate(rt);
LOG_OPCODE(); LOG_OPCODE();
#else
WRAPPER_BEGIN(rt, ra, yy, zz);
CPU.GPR[rt]._i64[0] = (s64)CPU.GPR[ra]._i32[0];
CPU.GPR[rt]._i64[1] = (s64)CPU.GPR[ra]._i32[2];
WRAPPER_END(rt, ra, 0, 0);
#endif
} }
void XSHW(u32 rt, u32 ra) void XSHW(u32 rt, u32 ra)
{ {
@ -1939,13 +1955,27 @@ private:
} }
void CNTB(u32 rt, u32 ra) void CNTB(u32 rt, u32 ra)
{ {
XmmInvalidate(rt); /*XmmInvalidate(rt);
for (u32 i = 0; i < 8; i++) for (u32 i = 0; i < 8; i++)
{ {
c.movzx(*addr, cpu_word(GPR[ra]._u16[i])); c.movzx(*addr, cpu_word(GPR[ra]._u16[i]));
c.movzx(*addr, word_ptr(*g_imm_var, *addr, 1, offsetof(g_imm_table_struct, cntb_table[0]))); c.movzx(*addr, word_ptr(*g_imm_var, *addr, 1, offsetof(g_imm_table_struct, cntb_table[0])));
c.mov(cpu_word(GPR[rt]._u16[i]), addr->r16()); c.mov(cpu_word(GPR[rt]._u16[i]), addr->r16());
} }*/
const XmmLink& va = XmmGet(ra, rt);
const XmmLink& v1 = XmmCopy(va);
const XmmLink& vm = XmmAlloc();
c.psrlw(v1.get(), 4);
c.pand(va.get(), XmmConst(_mm_set1_epi8(0xf)));
c.pand(v1.get(), XmmConst(_mm_set1_epi8(0xf)));
c.movdqa(vm.get(), XmmConst(_mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0)));
c.pshufb(vm.get(), va.get());
c.movdqa(va.get(), XmmConst(_mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0)));
c.pshufb(va.get(), v1.get());
c.paddb(va.get(), vm.get());
XmmFinalize(va, rt);
XmmFinalize(v1);
XmmFinalize(vm);
LOG_OPCODE(); LOG_OPCODE();
} }
void XSBH(u32 rt, u32 ra) void XSBH(u32 rt, u32 ra)

View File

@ -299,10 +299,12 @@ int cellAudioInit()
// convert the data from float to u16 with clipping: // convert the data from float to u16 with clipping:
if (!first_mix) if (!first_mix)
{ {
/*for (u32 i = 0; i < (sizeof(buffer) / sizeof(float)); i++) #ifndef _M_X64
for (u32 i = 0; i < (sizeof(buf2ch) / sizeof(float)); i++)
{ {
oal_buffer[oal_pos][oal_buffer_offset + i] = (s16)(min<float>(max<float>(buffer[i] * 0x8000, -0x8000), 0x7fff)); oal_buffer[oal_pos][oal_buffer_offset + i] = (s16)(min<float>(max<float>(buf2ch[i] * 0x8000, -0x8000), 0x7fff));
}*/ }
#else
// 2x MULPS // 2x MULPS
// 2x MAXPS (optional) // 2x MAXPS (optional)
// 2x MINPS (optional) // 2x MINPS (optional)
@ -315,6 +317,7 @@ int cellAudioInit()
_mm_cvtps_epi32(_mm_mul_ps((__m128&)(buf2ch[i]), float2u16)), _mm_cvtps_epi32(_mm_mul_ps((__m128&)(buf2ch[i]), float2u16)),
_mm_cvtps_epi32(_mm_mul_ps((__m128&)(buf2ch[i + 4]), float2u16))); _mm_cvtps_epi32(_mm_mul_ps((__m128&)(buf2ch[i + 4]), float2u16)));
} }
#endif
} }
const u64 stamp1 = get_system_time(); const u64 stamp1 = get_system_time();