From 156d3629e69b0a01adcdd9982337eaf62499e413 Mon Sep 17 00:00:00 2001 From: twinaphex Date: Tue, 28 Apr 2020 16:26:26 +0200 Subject: [PATCH] libretro-common Update --- libretro-common/libco/amd64.c | 93 ++++++++++++++++++----------------- 1 file changed, 49 insertions(+), 44 deletions(-) diff --git a/libretro-common/libco/amd64.c b/libretro-common/libco/amd64.c index c2d6ee0c88..124c319761 100644 --- a/libretro-common/libco/amd64.c +++ b/libretro-common/libco/amd64.c @@ -25,51 +25,56 @@ static void (*co_swap)(cothread_t, cothread_t) = 0; #ifdef _WIN32 /* ABI: Win64 */ + /* On windows handle is allocated by malloc and there it's guaranteed to + have at least 16-byte alignment. Hence we don't need to align + it in order to use movaps. */ static unsigned char co_swap_function[] = { - 0x48, 0x89, 0x22, /* mov [rdx],rsp */ - 0x48, 0x8b, 0x21, /* mov rsp,[rcx] */ - 0x58, /* pop rax */ - 0x48, 0x89, 0x6a, 0x08, /* mov [rdx+0x8],rbp */ - 0x48, 0x89, 0x72, 0x10, /* mov [rdx+0x10],rsi */ - 0x48, 0x89, 0x7a, 0x18, /* mov [rdx+0x18],rdi */ - 0x48, 0x89, 0x5a, 0x20, /* mov [rdx+0x20],rbx */ - 0x4c, 0x89, 0x62, 0x28, /* mov [rdx+0x28],r12 */ - 0x4c, 0x89, 0x6a, 0x30, /* mov [rdx+0x30],r13 */ - 0x4c, 0x89, 0x72, 0x38, /* mov [rdx+0x38],r14 */ - 0x4c, 0x89, 0x7a, 0x40, /* mov [rdx+0x40],r15 */ - 0x48, 0x81, 0xc2, 0x80, 0x00, 0x00, 0x00, /* add rdx,0x80 */ - 0x48, 0x83, 0xe2, 0xf0, /* and rdx,-0x10 */ - 0x0f, 0x29, 0x32, /* movaps [rdx],xmm6 */ - 0x0f, 0x29, 0x7a, 0x10, /* movaps [rdx+0x10],xmm7 */ - 0x44, 0x0f, 0x29, 0x42, 0x20, /* movaps [rdx+0x20],xmm8 */ - 0x44, 0x0f, 0x29, 0x4a, 0x30, /* movaps [rdx+0x30],xmm9 */ - 0x44, 0x0f, 0x29, 0x52, 0x40, /* movaps [rdx+0x40],xmm10 */ - 0x44, 0x0f, 0x29, 0x5a, 0x50, /* movaps [rdx+0x50],xmm11 */ - 0x44, 0x0f, 0x29, 0x62, 0x60, /* movaps [rdx+0x60],xmm12 */ - 0x44, 0x0f, 0x29, 0x6a, 0x70, /* movaps [rdx+0x70],xmm13 */ - 0x44, 0x0f, 0x29, 0xb2, 0x80, 0x00, 0x00, 0x00, /* movaps [rdx+0x80],xmm14 */ - 0x44, 0x0f, 0x29, 0xba, 0x90, 0x00, 0x00, 0x00, /* movaps [rdx+0x90],xmm15 */ - 0x48, 0x8b, 0x69, 0x08, /* mov rbp,[rcx+0x8] */ - 0x48, 0x8b, 0x71, 0x10, /* mov rsi,[rcx+0x10] */ - 0x48, 0x8b, 0x79, 0x18, /* mov rdi,[rcx+0x18] */ - 0x48, 0x8b, 0x59, 0x20, /* mov rbx,[rcx+0x20] */ - 0x4c, 0x8b, 0x61, 0x28, /* mov r12,[rcx+0x28] */ - 0x4c, 0x8b, 0x69, 0x30, /* mov r13,[rcx+0x30] */ - 0x4c, 0x8b, 0x71, 0x38, /* mov r14,[rcx+0x38] */ - 0x4c, 0x8b, 0x79, 0x40, /* mov r15,[rcx+0x40] */ - 0x48, 0x81, 0xc1, 0x80, 0x00, 0x00, 0x00, /* add rcx,0x80 */ - 0x48, 0x83, 0xe1, 0xf0, /* and rcx,-0x10 */ - 0x0f, 0x29, 0x31, /* movaps [rcx],xmm6 */ - 0x0f, 0x29, 0x79, 0x10, /* movaps [rcx+0x10],xmm7 */ - 0x44, 0x0f, 0x29, 0x41, 0x20, /* movaps [rcx+0x20],xmm8 */ - 0x44, 0x0f, 0x29, 0x49, 0x30, /* movaps [rcx+0x30],xmm9 */ - 0x44, 0x0f, 0x29, 0x51, 0x40, /* movaps [rcx+0x40],xmm10 */ - 0x44, 0x0f, 0x29, 0x59, 0x50, /* movaps [rcx+0x50],xmm11 */ - 0x44, 0x0f, 0x29, 0x61, 0x60, /* movaps [rcx+0x60],xmm12 */ - 0x44, 0x0f, 0x29, 0x69, 0x70, /* movaps [rcx+0x70],xmm13 */ - 0x44, 0x0f, 0x29, 0xb1, 0x80, 0x00, 0x00, 0x00, /* movaps [rcx+0x80],xmm14 */ - 0x44, 0x0f, 0x29, 0xb9, 0x90, 0x00, 0x00, 0x00, /* movaps [rcx+0x90],xmm15 */ - 0xff, 0xe0, /* jmp rax */ + 0x48, 0x89, 0x22, /* mov [rdx],rsp */ + 0x48, 0x8b, 0x21, /* mov rsp,[rcx] */ + 0x58, /* pop rax */ + 0x48, 0x89, 0x6a, 0x08, /* mov [rdx+ 8],rbp */ + 0x48, 0x89, 0x72, 0x10, /* mov [rdx+16],rsi */ + 0x48, 0x89, 0x7a, 0x18, /* mov [rdx+24],rdi */ + 0x48, 0x89, 0x5a, 0x20, /* mov [rdx+32],rbx */ + 0x4c, 0x89, 0x62, 0x28, /* mov [rdx+40],r12 */ + 0x4c, 0x89, 0x6a, 0x30, /* mov [rdx+48],r13 */ + 0x4c, 0x89, 0x72, 0x38, /* mov [rdx+56],r14 */ + 0x4c, 0x89, 0x7a, 0x40, /* mov [rdx+64],r15 */ + #if !defined(LIBCO_NO_SSE) + 0x0f, 0x29, 0x72, 0x50, /* movaps [rdx+ 80],xmm6 */ + 0x0f, 0x29, 0x7a, 0x60, /* movaps [rdx+ 96],xmm7 */ + 0x44, 0x0f, 0x29, 0x42, 0x70, /* movaps [rdx+112],xmm8 */ + 0x48, 0x83, 0xc2, 0x70, /* add rdx,112 */ + 0x44, 0x0f, 0x29, 0x4a, 0x10, /* movaps [rdx+ 16],xmm9 */ + 0x44, 0x0f, 0x29, 0x52, 0x20, /* movaps [rdx+ 32],xmm10 */ + 0x44, 0x0f, 0x29, 0x5a, 0x30, /* movaps [rdx+ 48],xmm11 */ + 0x44, 0x0f, 0x29, 0x62, 0x40, /* movaps [rdx+ 64],xmm12 */ + 0x44, 0x0f, 0x29, 0x6a, 0x50, /* movaps [rdx+ 80],xmm13 */ + 0x44, 0x0f, 0x29, 0x72, 0x60, /* movaps [rdx+ 96],xmm14 */ + 0x44, 0x0f, 0x29, 0x7a, 0x70, /* movaps [rdx+112],xmm15 */ + #endif + 0x48, 0x8b, 0x69, 0x08, /* mov rbp,[rcx+ 8] */ + 0x48, 0x8b, 0x71, 0x10, /* mov rsi,[rcx+16] */ + 0x48, 0x8b, 0x79, 0x18, /* mov rdi,[rcx+24] */ + 0x48, 0x8b, 0x59, 0x20, /* mov rbx,[rcx+32] */ + 0x4c, 0x8b, 0x61, 0x28, /* mov r12,[rcx+40] */ + 0x4c, 0x8b, 0x69, 0x30, /* mov r13,[rcx+48] */ + 0x4c, 0x8b, 0x71, 0x38, /* mov r14,[rcx+56] */ + 0x4c, 0x8b, 0x79, 0x40, /* mov r15,[rcx+64] */ + #if !defined(LIBCO_NO_SSE) + 0x0f, 0x28, 0x71, 0x50, /* movaps xmm6, [rcx+ 80] */ + 0x0f, 0x28, 0x79, 0x60, /* movaps xmm7, [rcx+ 96] */ + 0x44, 0x0f, 0x28, 0x41, 0x70, /* movaps xmm8, [rcx+112] */ + 0x48, 0x83, 0xc1, 0x70, /* add rcx,112 */ + 0x44, 0x0f, 0x28, 0x49, 0x10, /* movaps xmm9, [rcx+ 16] */ + 0x44, 0x0f, 0x28, 0x51, 0x20, /* movaps xmm10,[rcx+ 32] */ + 0x44, 0x0f, 0x28, 0x59, 0x30, /* movaps xmm11,[rcx+ 48] */ + 0x44, 0x0f, 0x28, 0x61, 0x40, /* movaps xmm12,[rcx+ 64] */ + 0x44, 0x0f, 0x28, 0x69, 0x50, /* movaps xmm13,[rcx+ 80] */ + 0x44, 0x0f, 0x28, 0x71, 0x60, /* movaps xmm14,[rcx+ 96] */ + 0x44, 0x0f, 0x28, 0x79, 0x70, /* movaps xmm15,[rcx+112] */ + #endif + 0xff, 0xe0, /* jmp rax */ }; #include