diff --git a/Source/Core/Core/Src/HW/DSP.cpp b/Source/Core/Core/Src/HW/DSP.cpp index 53e8008dea..f3b894f0fd 100644 --- a/Source/Core/Core/Src/HW/DSP.cpp +++ b/Source/Core/Core/Src/HW/DSP.cpp @@ -575,25 +575,25 @@ void GenerateDSPInterruptFromPlugin(DSPInterruptType type, bool _bSet) // This happens at 4 khz, since 32 bytes at 4khz = 4 bytes at 32 khz (16bit stereo pcm) void UpdateAudioDMA() { - if (g_audioDMA.AudioDMAControl.Enable && g_audioDMA.BlocksLeft) + if (g_audioDMA.BlocksLeft) { // Read audio at g_audioDMA.ReadAddress in RAM and push onto an // external audio fifo in the emulator, to be mixed with the disc // streaming output. If that audio queue fills up, we delay the // emulator. + dsp_plugin->DSP_SendAIBuffer(g_audioDMA.ReadAddress, 8); - // AyuanX: let's do it in a bundle to speed up - if (g_audioDMA.BlocksLeft == g_audioDMA.AudioDMAControl.NumBlocks) - dsp_plugin->DSP_SendAIBuffer(g_audioDMA.SourceAddress, g_audioDMA.AudioDMAControl.NumBlocks * 8); - - //g_audioDMA.ReadAddress += 32; + g_audioDMA.ReadAddress += 32; g_audioDMA.BlocksLeft--; if (g_audioDMA.BlocksLeft == 0) { GenerateDSPInterrupt(DSP::INT_AID); - //g_audioDMA.ReadAddress = g_audioDMA.SourceAddress; - g_audioDMA.BlocksLeft = g_audioDMA.AudioDMAControl.NumBlocks; + if (g_audioDMA.AudioDMAControl.Enable) + { + g_audioDMA.BlocksLeft = g_audioDMA.AudioDMAControl.NumBlocks; + g_audioDMA.ReadAddress = g_audioDMA.SourceAddress; + } //DEBUG_LOG(DSPLLE, "ADMA read addresses: %08x", g_audioDMA.ReadAddress); } } diff --git a/Source/Core/DSPCore/Src/DSPCore.cpp b/Source/Core/DSPCore/Src/DSPCore.cpp index e9c4d5f464..5d2b5edae1 100644 --- a/Source/Core/DSPCore/Src/DSPCore.cpp +++ b/Source/Core/DSPCore/Src/DSPCore.cpp @@ -187,6 +187,7 @@ void DSPCore_CheckExternalInterrupt() void DSPCore_CheckExceptions() { + // Early out to skip the loop in the common case. if (g_dsp.exceptions == 0) return; @@ -215,14 +216,19 @@ void DSPCore_CheckExceptions() } } -// Delegate to JIT or interpreter as appropriate. +// Delegate to JIT or interpreter as appropriate. // Handle state changes and stepping. int DSPCore_RunCycles(int cycles) { - if(jit) { - jit->RunBlock(cycles); + static int spare_cycles = 0; + if (jit) + { + // DSPCore_CheckExceptions(); + // DSPCore_CheckExternalInterrupt(); + spare_cycles = jit->RunForCycles(cycles + spare_cycles); return 0; } + while (cycles > 0) { reswitch: switch (core_state) diff --git a/Source/Core/DSPCore/Src/DSPEmitter.cpp b/Source/Core/DSPCore/Src/DSPEmitter.cpp index 361eae5344..5bb3b6f6dd 100644 --- a/Source/Core/DSPCore/Src/DSPEmitter.cpp +++ b/Source/Core/DSPCore/Src/DSPEmitter.cpp @@ -215,53 +215,52 @@ const u8 *DSPEmitter::Compile(int start_addr) { void STACKALIGN DSPEmitter::CompileDispatcher() { + /* // TODO + enterDispatcher = GetCodePtr(); + AlignCode16(); + ABI_PushAllCalleeSavedRegsAndAdjustStack(); + + const u8 *outer_loop = GetCodePtr(); + + + //Landing pad for drec space + ABI_PopAllCalleeSavedRegsAndAdjustStack(); + RET();*/ } // Don't use the % operator in the inner loop. It's slow. -void STACKALIGN DSPEmitter::RunBlock(int cycles) +int STACKALIGN DSPEmitter::RunForCycles(int cycles) { - // How does this variable work? - static int idleskip = 0; + const int idle_cycles = 1000; -#define BURST_LENGTH 512 // Must be a power of two - u16 block_cycles = BURST_LENGTH + 1; - - // Trigger an external interrupt at the start of the cycle while (!(g_dsp.cr & CR_HALT)) { - if (block_cycles > BURST_LENGTH) - { - block_cycles = 0; - } - + DSPCore_CheckExternalInterrupt(); + DSPCore_CheckExceptions(); // Compile the block if needed - if (!blocks[g_dsp.pc]) + u16 block_addr = g_dsp.pc; + if (!blocks[block_addr]) { - blockSize[g_dsp.pc] = 0; CompileCurrent(); } - + int block_size = blockSize[block_addr]; // Execute the block if we have enough cycles - if (cycles > blockSize[g_dsp.pc]) + if (cycles > block_size) { - u16 start_addr = g_dsp.pc; - - // 5%. Not sure where the rationale originally came from. - if (((idleskip & 127) > 121) && - (DSPAnalyzer::code_flags[g_dsp.pc] & DSPAnalyzer::CODE_IDLE_SKIP)) { - block_cycles = 0; + blocks[block_addr](); + if (DSPAnalyzer::code_flags[block_addr] & DSPAnalyzer::CODE_IDLE_SKIP) { + if (cycles > idle_cycles) + cycles -= idle_cycles; + else + cycles = 0; } else { - blocks[g_dsp.pc](); + cycles -= block_size; } - idleskip++; - if ((idleskip & (BURST_LENGTH - 1)) == 0) - idleskip = 0; - block_cycles += blockSize[start_addr]; - cycles -= blockSize[start_addr]; } else { break; } } + return cycles; } diff --git a/Source/Core/DSPCore/Src/DSPEmitter.h b/Source/Core/DSPCore/Src/DSPEmitter.h index 8995144e4d..e09e072bae 100644 --- a/Source/Core/DSPCore/Src/DSPEmitter.h +++ b/Source/Core/DSPCore/Src/DSPEmitter.h @@ -44,7 +44,7 @@ public: const u8 *Compile(int start_addr); - void STACKALIGN RunBlock(int cycles); + int STACKALIGN RunForCycles(int cycles); // Register helpers void setCompileSR(u16 bit); @@ -101,11 +101,19 @@ private: u16 *blockSize; u16 compileSR; + // CALL this to start the dispatcher + u8 *enterDispatcher; + + // JMP here when a block should be dispatches. make sure you're in a block + // or at the same stack level already. u8 *dispatcher; // The index of the last stored ext value (compile time). int storeIndex; + // Counts down. + // int cycles; + DISALLOW_COPY_AND_ASSIGN(DSPEmitter); void ToMask(Gen::X64Reg value_reg = Gen::EDI, Gen::X64Reg temp_reg = Gen::ESI); diff --git a/Source/Core/DSPCore/Src/DSPInterpreter.cpp b/Source/Core/DSPCore/Src/DSPInterpreter.cpp index 6be0557896..f2b1c8f181 100644 --- a/Source/Core/DSPCore/Src/DSPInterpreter.cpp +++ b/Source/Core/DSPCore/Src/DSPInterpreter.cpp @@ -104,8 +104,8 @@ void Run() gdsp_running = true; while (!(g_dsp.cr & CR_HALT) && gdsp_running) { - if(jit) - jit->RunBlock(1); + if (jit) + jit->RunForCycles(1); else { // Automatically let the other threads work if we're idle skipping if(DSPAnalyzer::code_flags[g_dsp.pc] & DSPAnalyzer::CODE_IDLE_SKIP) @@ -180,6 +180,7 @@ int RunCyclesDebug(int cycles) cycles--; if (cycles < 0) return 0; + // We don't bother directly supporting pause - if the main emu pauses, // it just won't call this function anymore. } diff --git a/Source/Plugins/Plugin_DSP_LLE/Src/main.cpp b/Source/Plugins/Plugin_DSP_LLE/Src/main.cpp index 717138c0f3..63cc969a47 100644 --- a/Source/Plugins/Plugin_DSP_LLE/Src/main.cpp +++ b/Source/Plugins/Plugin_DSP_LLE/Src/main.cpp @@ -17,6 +17,7 @@ #include "Common.h" // Common +#include "Atomic.h" #include "CommonTypes.h" #include "LogManager.h" #include "Thread.h" @@ -49,7 +50,7 @@ SoundStream *soundStream = NULL; bool g_InitMixer = false; bool bIsRunning = false; -u32 cycle_count = 0; +volatile u32 cycle_count = 0; // Standard crap to make wxWidgets happy #ifdef _WIN32 @@ -219,17 +220,19 @@ THREAD_RETURN dsp_thread(void* lpParameter) { while (bIsRunning) { - u32 cycles = 0; - - if (jit) - { - cycles = cycle_count; - DSPCore_RunCycles(cycles); + int cycles = (int)cycle_count; + if (cycles > 0) { + if (jit) + { + cycles -= DSPCore_RunCycles(cycles); + } + else { + cycles -= DSPInterpreter::RunCycles(cycles); + } + Common::AtomicAdd(cycle_count, -cycles); } - else - DSPInterpreter::Run(); - cycle_count -= cycles; + // yield? } return 0; } @@ -374,7 +377,8 @@ void DSP_WriteMailboxLow(bool _CPUMailbox, u16 _uLowMail) void DSP_Update(int cycles) { - int cyclesRatio = cycles / (jit?20:6); + int dsp_cycles = cycles / 6; //(jit?20:6); + // Sound stream update job has been handled by AudioDMA routine, which is more efficient /* // This gets called VERY OFTEN. The soundstream update might be expensive so only do it 200 times per second or something. @@ -398,11 +402,14 @@ void DSP_Update(int cycles) if (!g_dspInitialize.bOnThread) { // ~1/6th as many cycles as the period PPC-side. - DSPCore_RunCycles(cyclesRatio);; + DSPCore_RunCycles(dsp_cycles); } else { - cycle_count += (cyclesRatio); + // Wait for dsp thread to catch up reasonably. Note: this logic should be thought through. + while (cycle_count > dsp_cycles) + ; + Common::AtomicAdd(cycle_count, dsp_cycles); } }