From f9e36bfa67793bf743ff1a515b2ca03905ecbd02 Mon Sep 17 00:00:00 2001
From: Pokechu22 <Pokechu022@gmail.com>
Date: Sun, 15 Aug 2021 15:31:23 -0700
Subject: [PATCH] DSPLLE: Split SRS into SRS and SRSH

Hardware testing indicated that SRS uses a different list of registers than LRS (specifically, acS.h can be used with SRSH but not LRS, and SRS does not support AX registers, and there are 2 encodings that do nothing).
---
 Source/Core/Core/DSP/DSPTables.cpp            |   5 +-
 Source/Core/Core/DSP/DSPTables.h              |   7 +-
 .../Core/DSP/Interpreter/DSPIntLoadStore.cpp  |  22 ++-
 .../Core/DSP/Interpreter/DSPIntTables.cpp     |   5 +-
 .../Core/DSP/Interpreter/DSPInterpreter.h     |   1 +
 Source/Core/Core/DSP/Jit/x64/DSPEmitter.h     |   1 +
 .../Core/Core/DSP/Jit/x64/DSPJitLoadStore.cpp |  27 +++-
 Source/Core/Core/DSP/Jit/x64/DSPJitTables.cpp |   5 +-
 Source/DSPSpy/tests/srs_test.ds               | 131 ++++++++++++++++++
 9 files changed, 186 insertions(+), 18 deletions(-)
 create mode 100644 Source/DSPSpy/tests/srs_test.ds

diff --git a/Source/Core/Core/DSP/DSPTables.cpp b/Source/Core/Core/DSP/DSPTables.cpp
index 133fe570ed..05baa7a89a 100644
--- a/Source/Core/Core/DSP/DSPTables.cpp
+++ b/Source/Core/Core/DSP/DSPTables.cpp
@@ -18,7 +18,7 @@
 namespace DSP
 {
 // clang-format off
-const std::array<DSPOPCTemplate, 214> s_opcodes =
+const std::array<DSPOPCTemplate, 215> s_opcodes =
 {{
   //              # of parameters----+   {type, size, loc, lshift, mask}                                                               branch        reads PC       // instruction approximation
   // name      opcode  mask  size-V  V   param 1                       param 2                       param 3                    extendable    uncond.       updates SR
@@ -192,7 +192,8 @@ const std::array<DSPOPCTemplate, 214> s_opcodes =
 
   //2
   {"LRS",      0x2000, 0xf800,    1, 2, {{P_REG18, 1, 0, 8, 0x0700},   {P_MEM, 1, 0, 0, 0x00ff}},                               false, false, false, false, false}, // $(D+24) = MEM[($cr[0-7] << 8) | I]
-  {"SRS",      0x2800, 0xf800,    1, 2, {{P_MEM,   1, 0, 0, 0x00ff},   {P_REG18, 1, 0, 8, 0x0700}},                             false, false, false, false, false}, // MEM[($cr[0-7] << 8) | I] = $(S+24)
+  {"SRSH",     0x2800, 0xfe00,    1, 2, {{P_MEM,   1, 0, 0, 0x00ff},   {P_ACCH, 1, 0, 8, 0x0100}},                              false, false, false, false, false}, // MEM[($cr[0-7] << 8) | I] = $acS.h
+  {"SRS",      0x2c00, 0xfc00,    1, 2, {{P_MEM,   1, 0, 0, 0x00ff},   {P_REG1C, 1, 0, 8, 0x0300}},                             false, false, false, false, false}, // MEM[($cr[0-7] << 8) | I] = $(S+24)
 
   // opcodes that can be extended
 
diff --git a/Source/Core/Core/DSP/DSPTables.h b/Source/Core/Core/DSP/DSPTables.h
index 89c224a339..2dead9094e 100644
--- a/Source/Core/Core/DSP/DSPTables.h
+++ b/Source/Core/Core/DSP/DSPTables.h
@@ -44,16 +44,13 @@ enum partype_t
   P_ACCM = P_REG | 0x1e00,   // used for mid part of accum
   // The following are not in gcdsptool
   P_ACCM_D = P_REG | 0x1e80,
-  P_ACC = P_REG | 0x2000,  // used for full accum.
+  P_ACC = P_REG | 0x2000,   // used for full accum.
+  P_ACCH = P_REG | 0x1000,  // used for high part of accum
   P_ACC_D = P_REG | 0x2080,
   P_AX = P_REG | 0x2200,
   P_REGS_MASK = 0x03f80,  // gcdsptool's value = 0x01f80
   P_REF = P_REG | 0x4000,
   P_PRG = P_REF | P_REG,
-
-  // The following seem like junk:
-  // P_REG10     = P_REG | 0x1000,
-  // P_AX_D      = P_REG | 0x2280,
 };
 
 struct param2_t
diff --git a/Source/Core/Core/DSP/Interpreter/DSPIntLoadStore.cpp b/Source/Core/Core/DSP/Interpreter/DSPIntLoadStore.cpp
index 744dffb70b..df6fb4c322 100644
--- a/Source/Core/Core/DSP/Interpreter/DSPIntLoadStore.cpp
+++ b/Source/Core/Core/DSP/Interpreter/DSPIntLoadStore.cpp
@@ -8,15 +8,29 @@
 
 namespace DSP::Interpreter
 {
-// SRS @M, $(0x18+S)
-// 0010 1sss mmmm mmmm
-// Move value from register $(0x18+S) to data memory pointed by address
+// SRSH @M, $acS.h
+// 0010 10ss mmmm mmmm
+// Move value from register $acS.h to data memory pointed by address
+// CR[0-7] | M. That is, the upper 8 bits of the address are the
+// bottom 8 bits from CR, and the lower 8 bits are from the 8-bit immediate.
+void Interpreter::srsh(const UDSPInstruction opc)
+{
+  auto& state = m_dsp_core.DSPState();
+  const auto reg = static_cast<u8>(((opc >> 8) & 0x1) + DSP_REG_ACH0);
+  const auto addr = static_cast<u16>((state.r.cr << 8) | (opc & 0xFF));
+
+  state.WriteDMEM(addr, OpReadRegister(reg));
+}
+
+// SRS @M, $(0x1C+S)
+// 0010 11ss mmmm mmmm
+// Move value from register $(0x1C+S) to data memory pointed by address
 // CR[0-7] | M. That is, the upper 8 bits of the address are the
 // bottom 8 bits from CR, and the lower 8 bits are from the 8-bit immediate.
 void Interpreter::srs(const UDSPInstruction opc)
 {
   auto& state = m_dsp_core.DSPState();
-  const auto reg = static_cast<u8>(((opc >> 8) & 0x7) + 0x18);
+  const auto reg = static_cast<u8>(((opc >> 8) & 0x3) + DSP_REG_ACL0);
   const auto addr = static_cast<u16>((state.r.cr << 8) | (opc & 0xFF));
 
   if (reg >= DSP_REG_ACM0)
diff --git a/Source/Core/Core/DSP/Interpreter/DSPIntTables.cpp b/Source/Core/Core/DSP/Interpreter/DSPIntTables.cpp
index eaca44d5a4..c51dc74ab7 100644
--- a/Source/Core/Core/DSP/Interpreter/DSPIntTables.cpp
+++ b/Source/Core/Core/DSP/Interpreter/DSPIntTables.cpp
@@ -19,7 +19,7 @@ struct InterpreterOpInfo
 };
 
 // clang-format off
-constexpr std::array<InterpreterOpInfo, 124> s_opcodes
+constexpr std::array<InterpreterOpInfo, 125> s_opcodes
 {{
   {0x0000, 0xfffc, &Interpreter::nop},
 
@@ -101,7 +101,8 @@ constexpr std::array<InterpreterOpInfo, 124> s_opcodes
 
   // 2
   {0x2000, 0xf800, &Interpreter::lrs},
-  {0x2800, 0xf800, &Interpreter::srs},
+  {0x2800, 0xfe00, &Interpreter::srsh},
+  {0x2c00, 0xfc00, &Interpreter::srs},
 
   // opcodes that can be extended
 
diff --git a/Source/Core/Core/DSP/Interpreter/DSPInterpreter.h b/Source/Core/Core/DSP/Interpreter/DSPInterpreter.h
index b7b82100a8..119c509f2a 100644
--- a/Source/Core/Core/DSP/Interpreter/DSPInterpreter.h
+++ b/Source/Core/Core/DSP/Interpreter/DSPInterpreter.h
@@ -149,6 +149,7 @@ public:
   void srri(UDSPInstruction opc);
   void srrn(UDSPInstruction opc);
   void srs(UDSPInstruction opc);
+  void srsh(UDSPInstruction opc);
   void sub(UDSPInstruction opc);
   void subarn(UDSPInstruction opc);
   void subax(UDSPInstruction opc);
diff --git a/Source/Core/Core/DSP/Jit/x64/DSPEmitter.h b/Source/Core/Core/DSP/Jit/x64/DSPEmitter.h
index 145fa149db..12dcfba29d 100644
--- a/Source/Core/Core/DSP/Jit/x64/DSPEmitter.h
+++ b/Source/Core/Core/DSP/Jit/x64/DSPEmitter.h
@@ -88,6 +88,7 @@ public:
   void bloopi(UDSPInstruction opc);
 
   // Load/Store
+  void srsh(UDSPInstruction opc);
   void srs(UDSPInstruction opc);
   void lrs(UDSPInstruction opc);
   void lr(UDSPInstruction opc);
diff --git a/Source/Core/Core/DSP/Jit/x64/DSPJitLoadStore.cpp b/Source/Core/Core/DSP/Jit/x64/DSPJitLoadStore.cpp
index 7c60d6d1d2..282bc16c3b 100644
--- a/Source/Core/Core/DSP/Jit/x64/DSPJitLoadStore.cpp
+++ b/Source/Core/Core/DSP/Jit/x64/DSPJitLoadStore.cpp
@@ -12,14 +12,35 @@ using namespace Gen;
 
 namespace DSP::JIT::x64
 {
-// SRS @M, $(0x18+S)
+// SRSH @M, $acS.h
 // 0010 1sss mmmm mmmm
-// Move value from register $(0x18+S) to data memory pointed by address
+// Move value from register $acS.h to data memory pointed by address
+// CR[0-7] | M. That is, the upper 8 bits of the address are the
+// bottom 8 bits from CR, and the lower 8 bits are from the 8-bit immediate.
+void DSPEmitter::srsh(const UDSPInstruction opc)
+{
+  u8 reg = ((opc >> 8) & 0x1) + DSP_REG_ACH0;
+  // u16 addr = (g_dsp.r.cr << 8) | (opc & 0xFF);
+
+  X64Reg tmp1 = m_gpr.GetFreeXReg();
+
+  dsp_op_read_reg(reg, tmp1, RegisterExtension::Zero);
+  dsp_op_read_reg(DSP_REG_CR, RAX, RegisterExtension::Zero);
+  SHL(16, R(EAX), Imm8(8));
+  OR(16, R(EAX), Imm16(opc & 0xFF));
+  dmem_write(tmp1);
+
+  m_gpr.PutXReg(tmp1);
+}
+
+// SRS @M, $(0x1C+S)
+// 0010 1sss mmmm mmmm
+// Move value from register $(0x1C+S) to data memory pointed by address
 // CR[0-7] | M. That is, the upper 8 bits of the address are the
 // bottom 8 bits from CR, and the lower 8 bits are from the 8-bit immediate.
 void DSPEmitter::srs(const UDSPInstruction opc)
 {
-  u8 reg = ((opc >> 8) & 0x7) + 0x18;
+  u8 reg = ((opc >> 8) & 0x3) + DSP_REG_ACL0;
   // u16 addr = (g_dsp.r.cr << 8) | (opc & 0xFF);
 
   X64Reg tmp1 = m_gpr.GetFreeXReg();
diff --git a/Source/Core/Core/DSP/Jit/x64/DSPJitTables.cpp b/Source/Core/Core/DSP/Jit/x64/DSPJitTables.cpp
index b47cc7bd8d..664cbd95d8 100644
--- a/Source/Core/Core/DSP/Jit/x64/DSPJitTables.cpp
+++ b/Source/Core/Core/DSP/Jit/x64/DSPJitTables.cpp
@@ -19,7 +19,7 @@ struct JITOpInfo
 };
 
 // clang-format off
-const std::array<JITOpInfo, 124> s_opcodes =
+const std::array<JITOpInfo, 125> s_opcodes =
 {{
   {0x0000, 0xfffc, &DSPEmitter::nop},
 
@@ -101,7 +101,8 @@ const std::array<JITOpInfo, 124> s_opcodes =
 
   // 2
   {0x2000, 0xf800, &DSPEmitter::lrs},
-  {0x2800, 0xf800, &DSPEmitter::srs},
+  {0x2800, 0xfe00, &DSPEmitter::srsh},
+  {0x2c00, 0xfc00, &DSPEmitter::srs},
 
   // opcodes that can be extended
 
diff --git a/Source/DSPSpy/tests/srs_test.ds b/Source/DSPSpy/tests/srs_test.ds
new file mode 100644
index 0000000000..38febf7f1a
--- /dev/null
+++ b/Source/DSPSpy/tests/srs_test.ds
@@ -0,0 +1,131 @@
+incdir "tests"
+include "dsp_base.inc"
+
+test_main:
+; Test registers used by LRS and SRS
+	LRI $CR, #0x0000
+	CALL clear_regs
+	CALL store_mem_sr
+
+	; Write with SR, read with LR
+	LRI $AR0, #0xA00A
+	CALL create_pattern
+	CALL store_mem_sr
+	CALL send_back
+	CALL clear_regs
+	CALL read_mem_lr
+	CALL send_back
+
+	; Write with SR, read with LRS
+	LRI $AR0, #0xB00B
+	CALL create_pattern
+	CALL store_mem_sr
+	CALL send_back
+	CALL clear_regs
+	CALL read_mem_lrs
+	CALL send_back
+
+	; Write with SRS, read with LR
+	LRI $AR0, #0xC00C
+	CALL create_pattern
+	CALL store_mem_srs
+	CALL send_back
+	CALL clear_regs
+	CALL read_mem_lr
+	CALL send_back
+
+	; Write with SR, read with LRS
+	LRI $AR0, #0xD00D
+	CALL create_pattern
+	CALL store_mem_srs
+	CALL send_back
+	CALL clear_regs
+	CALL read_mem_lrs
+	CALL send_back
+
+; We're done, DO NOT DELETE THIS LINE
+	JMP end_of_test
+
+create_pattern:
+	LRI $IX0, #0x0110
+	MRR $AX0.L, $AR0
+	ADDARN $AR0, $IX0
+	MRR $AX1.L, $AR0
+	ADDARN $AR0, $IX0
+	MRR $AX0.H, $AR0
+	ADDARN $AR0, $IX0
+	MRR $AX1.H, $AR0
+	ADDARN $AR0, $IX0
+	MRR $AC0.L, $AR0
+	ADDARN $AR0, $IX0
+	MRR $AC1.L, $AR0
+	ADDARN $AR0, $IX0
+	MRR $AC0.M, $AR0
+	ADDARN $AR0, $IX0
+	MRR $AC1.M, $AR0
+	ADDARN $AR0, $IX0
+	; AC0.H and AC1.H have odd results since they're 8-bit sign-extended, but that's fine.
+	MRR $AC0.H, $AR0
+	ADDARN $AR0, $IX0
+	MRR $AC1.H, $AR0
+	RET
+
+clear_regs:
+	LRI $AX0.L, #0x0000
+	LRI $AX1.L, #0x0000
+	LRI $AX0.H, #0x0000
+	LRI $AX1.H, #0x0000
+	LRI $AC0.L, #0x0000
+	LRI $AC1.L, #0x0000
+	LRI $AC0.M, #0x0000
+	LRI $AC1.M, #0x0000
+	LRI $AC0.H, #0x0000
+	LRI $AC1.H, #0x0000
+	RET
+
+read_mem_lr:
+	LR $AX0.L, @0x0000
+	LR $AX1.L, @0x0001
+	LR $AX0.H, @0x0002
+	LR $AX1.H, @0x0003
+	LR $AC0.L, @0x0004
+	LR $AC1.L, @0x0005
+	LR $AC0.M, @0x0006
+	LR $AC1.M, @0x0007
+	RET
+
+read_mem_lrs:
+	LRS $AX0.L, @0x00
+	LRS $AX1.L, @0x01
+	LRS $AX0.H, @0x02
+	LRS $AX1.H, @0x03
+	LRS $AC0.L, @0x04
+	LRS $AC1.L, @0x05
+	LRS $AC0.M, @0x06
+	LRS $AC1.M, @0x07
+	RET
+
+store_mem_sr:
+	SR @0x0000, $AX0.L
+	SR @0x0001, $AX1.L
+	SR @0x0002, $AX0.H
+	SR @0x0003, $AX1.H
+	SR @0x0004, $AC0.L
+	SR @0x0005, $AC1.L
+	SR @0x0006, $AC0.M
+	SR @0x0007, $AC1.M
+	RET
+
+store_mem_srs:
+	; For future compatibility these have been changed to cw.
+	; The way the instructions were originally encoded is commented,
+	; but this does not match their behavior.
+	cw 0x2800 ; SRS @0x00, $AX0.L - actually SRSH @0x00, $AC0.H
+	cw 0x2901 ; SRS @0x01, $AX1.L - actually SRSH @0x01, $AC1.H
+	cw 0x2A02 ; SRS @0x02, $AX0.H - actually unknown, no store performed
+	cw 0x2B03 ; SRS @0x03, $AX1.H - actually unknown, no store performed
+	cw 0x2C04 ; SRS @0x04, $AC0.L
+	cw 0x2D05 ; SRS @0x05, $AC1.L
+	cw 0x2E06 ; SRS @0x06, $AC0.M
+	cw 0x2F07 ; SRS @0x07, $AC1.M
+	RET