From d03b2774030d96e491f182de82fc33dea650a735 Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Sun, 5 Jun 2022 19:33:28 -0700 Subject: [PATCH] DSP LLE Recompiler: Fix LOOP and BLOOP when the counter is between 0x8001 and 0xFFFF --- Source/Core/Core/DSP/Jit/x64/DSPJitBranch.cpp | 4 +- Source/DSPSpy/tests/loop_counter_test.ds | 64 +++++++++++++++++++ 2 files changed, 66 insertions(+), 2 deletions(-) create mode 100644 Source/DSPSpy/tests/loop_counter_test.ds diff --git a/Source/Core/Core/DSP/Jit/x64/DSPJitBranch.cpp b/Source/Core/Core/DSP/Jit/x64/DSPJitBranch.cpp index 79e2da9a91..7ba437484b 100644 --- a/Source/Core/Core/DSP/Jit/x64/DSPJitBranch.cpp +++ b/Source/Core/Core/DSP/Jit/x64/DSPJitBranch.cpp @@ -322,14 +322,14 @@ void DSPEmitter::HandleLoop() MOVZX(32, 16, ECX, M_SDSP_r_st(3)); TEST(32, R(RCX), R(RCX)); - FixupBranch rLoopCntG = J_CC(CC_LE, true); + FixupBranch rLoopCntG = J_CC(CC_E, true); CMP(16, R(RAX), Imm16(m_compile_pc - 1)); FixupBranch rLoopAddrG = J_CC(CC_NE, true); SUB(16, M_SDSP_r_st(3), Imm16(1)); CMP(16, M_SDSP_r_st(3), Imm16(0)); - FixupBranch loadStack = J_CC(CC_LE, true); + FixupBranch loadStack = J_CC(CC_E, true); MOVZX(32, 16, ECX, M_SDSP_r_st(0)); MOV(16, M_SDSP_pc(), R(RCX)); FixupBranch loopUpdated = J(true); diff --git a/Source/DSPSpy/tests/loop_counter_test.ds b/Source/DSPSpy/tests/loop_counter_test.ds new file mode 100644 index 0000000000..51f06c0e09 --- /dev/null +++ b/Source/DSPSpy/tests/loop_counter_test.ds @@ -0,0 +1,64 @@ +; This test checks how LOOP and BLOOP handle their arguments by running loops with the count +; ranging from 0 to 0xffff. The current counter is shown via mail at the top of the screen. +; This test gets slower as the counter gets larger (it runs in O(n^2)). +incdir "tests" +include "dsp_base.inc" + +test_main: + CLR $acc0 + LRI $ar0, #0 + LRI $ix0, #0 + LRI $ar1, #0 + LRI $ix1, #0 + +main_loop: + CLR $acc1 + ; Incrementing $acc1 $ac0.l times sets $acc1 to 1 * $ac0.l, which is just $ac0.l + LOOP $ac0.l + INC $acc1 + ; We are now done looping. Check that the results match what we want... + CMP + JZ check_bloop + ; Did not match. + IAR $ar0 + LRI $ix0, #1 + CALL send_back + +check_bloop: + CLR $acc1 + ; Same deal as above. Here we only have one instruction that is repeated via BLOOP. + BLOOP $ac0.l, last_bloop_ins + ; TODO: This NOP is needed for things to behave properly; if the last_bloop_ins label + ; is immediately after the BLOOP instruction things break on real hardware. + ; There's no reason to do this normally though since the LOOP instruction does the same thing + ; without needing to provide a label. But it's worth checking eventually (along with how these + ; instructions behave when a 2-word long instruction is at the end). + NOP +last_bloop_ins: + INC $acc1 + ; We are now done looping. Check that the results match what we want... + CMP + JZ advance_main_loop + ; Did not match. + IAR $ar1 + LRI $ix1, #1 + CALL send_back + +advance_main_loop: + ; Report progress as mail + SI @DMBH, #0 + SR @DMBL, $ac0.l + SI @DIRQ, #0x0001 + + ; Move on to the next value. + ; CMPIS (and CMPI) check the middle of the accumulator, so CMPIS $acc0, #1 + ; checks if the full accumulator is 0x10000 - which is our end point. + INC $acc0 + CMPIS $ac0.m, #1 + JNZ main_loop + + ; Done with the test. $ar0, $ix0, $ar1, and $ix1 should all be 0. + CALL send_back + + ; We're done, DO NOT DELETE THIS LINE + JMP end_of_test