From 31f7020b2dc269c58628ee736d6ef0a8907111b2 Mon Sep 17 00:00:00 2001 From: hrydgard Date: Sat, 13 Jun 2009 22:08:01 +0000 Subject: [PATCH] Add mini unit testing framework to Dolphin itself - use it to find bugs and verify the portable powerpc fp number classifier. also random cleanup. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@3432 8ced0084-cf51-0410-be5f-012b33b47a6e --- .../Core/Core/Src/HW/SI_DeviceGCController.h | 1 + .../Interpreter/Interpreter_FloatingPoint.cpp | 42 +- Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp | 701 +++++++++--------- .../Core/Src/PowerPC/Jit64/Jit_Branch.cpp | 478 ++++++------ .../Core/Src/PowerPC/Jit64/Jit_Integer.cpp | 2 + .../Core/Core/Src/PowerPC/Jit64IL/JitAsm.cpp | 28 +- Source/Core/Core/Src/PowerPC/PPCTables.cpp | 4 +- Source/Core/Core/Src/PowerPC/PowerPC.cpp | 22 +- Source/Core/Core/Src/PowerPC/PowerPC.h | 2 + Source/Core/DolphinWX/DolphinWX.vcproj | 4 +- Source/Core/InputCommon/Src/EventHandler.cpp | 6 +- Source/Dolphin.sln | 21 + Source/Plugins/Plugin_VideoOGL/Src/GLUtil.h | 2 +- .../Plugin_VideoOGL/Src/OnScreenDisplay.cpp | 5 + Source/Plugins/Plugin_VideoOGL/Src/Render.cpp | 9 + .../Plugin_VideoOGL/Src/rasterfont.cpp | 34 +- Source/UnitTests/UnitTests.cpp | 89 +++ Source/UnitTests/UnitTests.vcproj | 329 ++++++++ 18 files changed, 1159 insertions(+), 620 deletions(-) create mode 100644 Source/UnitTests/UnitTests.cpp create mode 100644 Source/UnitTests/UnitTests.vcproj diff --git a/Source/Core/Core/Src/HW/SI_DeviceGCController.h b/Source/Core/Core/Src/HW/SI_DeviceGCController.h index 2b2bdf5008..e4fed7839f 100644 --- a/Source/Core/Core/Src/HW/SI_DeviceGCController.h +++ b/Source/Core/Core/Src/HW/SI_DeviceGCController.h @@ -19,6 +19,7 @@ #define _SI_DEVICEGCCONTROLLER_H #include "../PluginManager.h" +#include "SI_Device.h" ////////////////////////////////////////////////////////////////////////// // standard gamecube controller diff --git a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp index 0f169ac878..15b020bdb4 100644 --- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp +++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp @@ -405,25 +405,45 @@ void faddsx(UGeckoInstruction _inst) void fdivx(UGeckoInstruction _inst) { - rPS0(_inst.FD) = rPS0(_inst.FA) / rPS0(_inst.FB); + double a = rPS0(_inst.FA); + double b = rPS0(_inst.FB); + if (a == 0.0f && b == 0.0f) + rPS0(_inst.FD) = rPS1(_inst.FD) = 0.0; // NAN? + else + rPS0(_inst.FD) = rPS1(_inst.FD) = a / b; if (fabs(rPS0(_inst.FB)) == 0.0) { + if (!FPSCR.ZX) + FPSCR.FX = 1; FPSCR.ZX = 1; + FPSCR.XX = 1; } if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } void fdivsx(UGeckoInstruction _inst) { - rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast(rPS0(_inst.FA) / rPS0(_inst.FB)); - if (fabs(rPS0(_inst.FB)) == 0.0) { + float a = rPS0(_inst.FA); + float b = rPS0(_inst.FB); + if (a != a || b != b) + rPS0(_inst.FD) = rPS1(_inst.FD) = 0.0; // NAN? + else + rPS0(_inst.FD) = rPS1(_inst.FD) = a / b; + if (b == 0.0) { + if (!FPSCR.ZX) + FPSCR.FX = 1; FPSCR.ZX = 1; + FPSCR.XX = 1; } if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } void fresx(UGeckoInstruction _inst) { - rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast(1.0f / rPS0(_inst.FB)); + double b = rPS0(_inst.FB); + rPS0(_inst.FD) = rPS1(_inst.FD) = 1.0 / b; if (fabs(rPS0(_inst.FB)) == 0.0) { + if (!FPSCR.ZX) + FPSCR.FX = 1; FPSCR.ZX = 1; + FPSCR.XX = 1; } if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } @@ -480,16 +500,24 @@ void fsubsx(UGeckoInstruction _inst) if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } - void frsqrtex(UGeckoInstruction _inst) { - rPS0(_inst.FD) = 1.0f / (sqrt(rPS0(_inst.FB))); + double b = rPS0(_inst.FB); + if (b <= 0.0) + rPS0(_inst.FD) = 0.0; + else + rPS0(_inst.FD) = 1.0f / (sqrt(b)); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } void fsqrtx(UGeckoInstruction _inst) { - rPS0(_inst.FD) = sqrt(rPS0(_inst.FB)); + double b = rPS0(_inst.FB); + if (b < 0.0) + { + FPSCR.VXSQRT = 1; + } + rPS0(_inst.FD) = sqrt(b); if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD)); } diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp index e1f70835c3..549f83b32b 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp @@ -161,6 +161,23 @@ ps_adds1 */ +//#define NAN_CHECK + +static void CheckForNans() +{ + static bool lastNan[32]; + for (int i = 0; i < 32; i++) { + double v = rPS0(i); + if (v != v) { + if (!lastNan[i]) { + lastNan[i] = true; + PanicAlert("PC = %08x Got NAN in R%i", PC, i); + } + } else { + lastNan[i] = false; + } + } +} Jit64 jit; @@ -171,376 +188,382 @@ namespace CPUCompare extern u32 m_BlockStart; } - void Jit(u32 em_address) - { - jit.Jit(em_address); - } +void Jit(u32 em_address) +{ + jit.Jit(em_address); +} - void Jit64::Init() - { - asm_routines.compareEnabled = ::Core::g_CoreStartupParameter.bRunCompareClient; +void Jit64::Init() +{ + asm_routines.compareEnabled = ::Core::g_CoreStartupParameter.bRunCompareClient; - jo.optimizeStack = true; - /* This will enable block linking in JitBlockCache::FinalizeBlock(), it gives faster execution but may not - be as stable as the alternative (to not link the blocks). However, I have not heard about any good examples - where this cause problems, so I'm enabling this by default, since I seem to get perhaps as much as 20% more - fps with this option enabled. If you suspect that this option cause problems you can also disable it from the - debugging window. */ - jo.enableBlocklink = true; + jo.optimizeStack = true; + /* This will enable block linking in JitBlockCache::FinalizeBlock(), it gives faster execution but may not + be as stable as the alternative (to not link the blocks). However, I have not heard about any good examples + where this cause problems, so I'm enabling this by default, since I seem to get perhaps as much as 20% more + fps with this option enabled. If you suspect that this option cause problems you can also disable it from the + debugging window. */ + jo.enableBlocklink = true; #ifdef _M_X64 - jo.enableFastMem = Core::GetStartupParameter().bUseFastMem; + jo.enableFastMem = Core::GetStartupParameter().bUseFastMem; #else - jo.enableFastMem = false; + jo.enableFastMem = false; #endif - jo.assumeFPLoadFromMem = true; - jo.fpAccurateFlags = true; - jo.optimizeGatherPipe = true; - jo.fastInterrupts = false; - jo.accurateSinglePrecision = true; + jo.assumeFPLoadFromMem = true; + jo.fpAccurateFlags = true; + jo.optimizeGatherPipe = true; + jo.fastInterrupts = false; + jo.accurateSinglePrecision = true; - gpr.SetEmitter(this); - fpr.SetEmitter(this); + gpr.SetEmitter(this); + fpr.SetEmitter(this); - // Custom settings - if (Core::g_CoreStartupParameter.bJITUnlimitedCache) - CODE_SIZE = 1024*1024*8*8; - if (Core::g_CoreStartupParameter.bJITBlockLinking) - { jo.enableBlocklink = false; SuccessAlert("Your game was started without JIT Block Linking"); } + // Custom settings + if (Core::g_CoreStartupParameter.bJITUnlimitedCache) + CODE_SIZE = 1024*1024*8*8; + if (Core::g_CoreStartupParameter.bJITBlockLinking) + { jo.enableBlocklink = false; SuccessAlert("Your game was started without JIT Block Linking"); } - trampolines.Init(); - AllocCodeSpace(CODE_SIZE); + trampolines.Init(); + AllocCodeSpace(CODE_SIZE); - blocks.Init(); - asm_routines.Init(); - } + blocks.Init(); + asm_routines.Init(); +} - void Jit64::ClearCache() +void Jit64::ClearCache() +{ + blocks.Clear(); + trampolines.ClearCodeSpace(); + ClearCodeSpace(); +} + + +void Jit64::Shutdown() +{ + FreeCodeSpace(); + + blocks.Shutdown(); + trampolines.Shutdown(); + asm_routines.Shutdown(); +} + +// This is only called by Default() in this file. It will execute an instruction with the interpreter functions. +void Jit64::WriteCallInterpreter(UGeckoInstruction inst) +{ + + + gpr.Flush(FLUSH_ALL); + fpr.Flush(FLUSH_ALL); + if (js.isLastInstruction) { - blocks.Clear(); - trampolines.ClearCodeSpace(); - ClearCodeSpace(); + MOV(32, M(&PC), Imm32(js.compilerPC)); + MOV(32, M(&NPC), Imm32(js.compilerPC + 4)); } + Interpreter::_interpreterInstruction instr = GetInterpreterOp(inst); + ABI_CallFunctionC((void*)instr, inst.hex); - - void Jit64::Shutdown() + if (js.isLastInstruction && SConfig::GetInstance().m_EnableRE0Fix ) { - FreeCodeSpace(); - - blocks.Shutdown(); - trampolines.Shutdown(); - asm_routines.Shutdown(); - } - - // This is only called by Default() in this file. It will execute an instruction with the interpreter functions. - void Jit64::WriteCallInterpreter(UGeckoInstruction inst) - { - - - gpr.Flush(FLUSH_ALL); - fpr.Flush(FLUSH_ALL); - if (js.isLastInstruction) - { - MOV(32, M(&PC), Imm32(js.compilerPC)); - MOV(32, M(&NPC), Imm32(js.compilerPC + 4)); - } - Interpreter::_interpreterInstruction instr = GetInterpreterOp(inst); - ABI_CallFunctionC((void*)instr, inst.hex); - - if (js.isLastInstruction && SConfig::GetInstance().m_EnableRE0Fix ) - { - - SConfig::GetInstance().LoadSettingsHLE();//Make sure the settings are up to date - MOV(32, R(EAX), M(&NPC)); - WriteRfiExitDestInEAX(); - } - } - - void Jit64::unknown_instruction(UGeckoInstruction inst) - { - // CCPU::Break(); - PanicAlert("unknown_instruction %08x - Fix me ;)", inst.hex); - } - - void Jit64::Default(UGeckoInstruction _inst) - { - WriteCallInterpreter(_inst.hex); - } - - void Jit64::HLEFunction(UGeckoInstruction _inst) - { - gpr.Flush(FLUSH_ALL); - fpr.Flush(FLUSH_ALL); - ABI_CallFunctionCC((void*)&HLE::Execute, js.compilerPC, _inst.hex); - MOV(32, R(EAX), M(&NPC)); - WriteExitDestInEAX(0); - } - - void Jit64::DoNothing(UGeckoInstruction _inst) - { - // Yup, just don't do anything. - } - - void Jit64::NotifyBreakpoint(u32 em_address, bool set) - { - int block_num = blocks.GetBlockNumberFromStartAddress(em_address); - if (block_num >= 0) - { - blocks.DestroyBlock(block_num, false); - } - } - - static const bool ImHereDebug = false; - static const bool ImHereLog = false; - static std::map been_here; - - void ImHere() - { - static FILE *f = 0; - if (ImHereLog) { - if (!f) - { -#ifdef _M_X64 - f = fopen("log64.txt", "w"); -#else - f = fopen("log32.txt", "w"); -#endif - } - fprintf(f, "%08x\n", PC); - } - if (been_here.find(PC) != been_here.end()) { - been_here.find(PC)->second++; - if ((been_here.find(PC)->second) & 1023) - return; - } - DEBUG_LOG(DYNA_REC, "I'm here - PC = %08x , LR = %08x", PC, LR); - //printf("I'm here - PC = %08x , LR = %08x", PC, LR); - been_here[PC] = 1; - } - - void Jit64::Cleanup() - { - if (jo.optimizeGatherPipe && js.fifoBytesThisBlock > 0) - ABI_CallFunction((void *)&GPFifo::CheckGatherPipe); - } - - void Jit64::WriteExit(u32 destination, int exit_num) - { - Cleanup(); - SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount)); - - //If nobody has taken care of this yet (this can be removed when all branches are done) - JitBlock *b = js.curBlock; - b->exitAddress[exit_num] = destination; - b->exitPtrs[exit_num] = GetWritableCodePtr(); - // Link opportunity! - int block = blocks.GetBlockNumberFromStartAddress(destination); - if (block >= 0 && jo.enableBlocklink) - { - // It exists! Joy of joy! - JMP(blocks.GetBlock(block)->checkedEntry, true); - b->linkStatus[exit_num] = true; - } - else - { - MOV(32, M(&PC), Imm32(destination)); - JMP(asm_routines.dispatcher, true); - } + SConfig::GetInstance().LoadSettingsHLE();//Make sure the settings are up to date + MOV(32, R(EAX), M(&NPC)); + WriteRfiExitDestInEAX(); } +} - void Jit64::WriteExitDestInEAX(int exit_num) +void Jit64::unknown_instruction(UGeckoInstruction inst) +{ + // CCPU::Break(); + PanicAlert("unknown_instruction %08x - Fix me ;)", inst.hex); +} + +void Jit64::Default(UGeckoInstruction _inst) +{ + WriteCallInterpreter(_inst.hex); +} + +void Jit64::HLEFunction(UGeckoInstruction _inst) +{ + gpr.Flush(FLUSH_ALL); + fpr.Flush(FLUSH_ALL); + ABI_CallFunctionCC((void*)&HLE::Execute, js.compilerPC, _inst.hex); + MOV(32, R(EAX), M(&NPC)); + WriteExitDestInEAX(0); +} + +void Jit64::DoNothing(UGeckoInstruction _inst) +{ + // Yup, just don't do anything. +} + +void Jit64::NotifyBreakpoint(u32 em_address, bool set) +{ + int block_num = blocks.GetBlockNumberFromStartAddress(em_address); + if (block_num >= 0) { - MOV(32, M(&PC), R(EAX)); - Cleanup(); - SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount)); + blocks.DestroyBlock(block_num, false); + } +} + +static const bool ImHereDebug = false; +static const bool ImHereLog = false; +static std::map been_here; + +void ImHere() +{ + static FILE *f = 0; + if (ImHereLog) { + if (!f) + { +#ifdef _M_X64 + f = fopen("log64.txt", "w"); +#else + f = fopen("log32.txt", "w"); +#endif + } + fprintf(f, "%08x\n", PC); + } + if (been_here.find(PC) != been_here.end()) { + been_here.find(PC)->second++; + if ((been_here.find(PC)->second) & 1023) + return; + } + DEBUG_LOG(DYNA_REC, "I'm here - PC = %08x , LR = %08x", PC, LR); + //printf("I'm here - PC = %08x , LR = %08x", PC, LR); + been_here[PC] = 1; +} + +void Jit64::Cleanup() +{ + if (jo.optimizeGatherPipe && js.fifoBytesThisBlock > 0) + ABI_CallFunction((void *)&GPFifo::CheckGatherPipe); + if (GetAsyncKeyState(VK_LSHIFT)) + ABI_CallFunction(thunks.ProtectFunction((void *)&CheckForNans, 0)); +} + +void Jit64::WriteExit(u32 destination, int exit_num) +{ + Cleanup(); + SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount)); + + //If nobody has taken care of this yet (this can be removed when all branches are done) + JitBlock *b = js.curBlock; + b->exitAddress[exit_num] = destination; + b->exitPtrs[exit_num] = GetWritableCodePtr(); + + // Link opportunity! + int block = blocks.GetBlockNumberFromStartAddress(destination); + if (block >= 0 && jo.enableBlocklink) + { + // It exists! Joy of joy! + JMP(blocks.GetBlock(block)->checkedEntry, true); + b->linkStatus[exit_num] = true; + } + else + { + MOV(32, M(&PC), Imm32(destination)); JMP(asm_routines.dispatcher, true); } +} - void Jit64::WriteRfiExitDestInEAX() - { - MOV(32, M(&PC), R(EAX)); - Cleanup(); - SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount)); - JMP(asm_routines.testExceptions, true); - } +void Jit64::WriteExitDestInEAX(int exit_num) +{ + MOV(32, M(&PC), R(EAX)); + Cleanup(); + SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount)); + JMP(asm_routines.dispatcher, true); +} - void Jit64::WriteExceptionExit(u32 exception) +void Jit64::WriteRfiExitDestInEAX() +{ + MOV(32, M(&PC), R(EAX)); + Cleanup(); + SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount)); + JMP(asm_routines.testExceptions, true); +} + +void Jit64::WriteExceptionExit(u32 exception) +{ + Cleanup(); + OR(32, M(&PowerPC::ppcState.Exceptions), Imm32(exception)); + MOV(32, M(&PC), Imm32(js.compilerPC + 4)); + JMP(asm_routines.testExceptions, true); +} + +void STACKALIGN Jit64::Run() +{ + CompiledCode pExecAddr = (CompiledCode)asm_routines.enterCode; + pExecAddr(); + //Will return when PowerPC::state changes +} + +void Jit64::SingleStep() +{ + // NOT USED, NOT TESTED, PROBABLY NOT WORKING YET + // PanicAlert("Single"); + /* + JitBlock temp_block; + PPCAnalyst::CodeBuffer temp_codebuffer(1); // Only room for one instruction! Single step! + const u8 *code = DoJit(PowerPC::ppcState.pc, &temp_codebuffer, &temp_block); + CompiledCode pExecAddr = (CompiledCode)code; + pExecAddr();*/ +} + +void STACKALIGN Jit64::Jit(u32 em_address) +{ + if (GetSpaceLeft() < 0x10000 || blocks.IsFull()) { - Cleanup(); - OR(32, M(&PowerPC::ppcState.Exceptions), Imm32(exception)); - MOV(32, M(&PC), Imm32(js.compilerPC + 4)); - JMP(asm_routines.testExceptions, true); + WARN_LOG(DYNA_REC, "JIT cache full - clearing.") + if (Core::g_CoreStartupParameter.bJITUnlimitedCache) + { + ERROR_LOG(DYNA_REC, "What? JIT cache still full - clearing."); + PanicAlert("What? JIT cache still full - clearing."); + } + ClearCache(); } + int block_num = blocks.AllocateBlock(em_address); + JitBlock *b = blocks.GetBlock(block_num); + blocks.FinalizeBlock(block_num, jo.enableBlocklink, DoJit(em_address, &code_buffer, b)); +} + + +const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buffer, JitBlock *b) +{ + if (em_address == 0) + PanicAlert("ERROR : Trying to compile at 0. LR=%08x", LR); + + int size; + js.isLastInstruction = false; + js.blockStart = em_address; + js.fifoBytesThisBlock = 0; + js.curBlock = b; + js.blockSetsQuantizers = false; + js.block_flags = 0; + js.cancel = false; + + //Analyze the block, collect all instructions it is made of (including inlining, + //if that is enabled), reorder instructions for optimal performance, and join joinable instructions. + PPCAnalyst::Flatten(em_address, &size, &js.st, &js.gpa, &js.fpa, code_buffer); + PPCAnalyst::CodeOp *ops = code_buffer->codebuffer; + + const u8 *start = AlignCode4(); //TODO: Test if this or AlignCode16 make a difference from GetCodePtr + b->checkedEntry = start; + b->runCount = 0; + + // Downcount flag check. The last block decremented downcounter, and the flag should still be available. + FixupBranch skip = J_CC(CC_NBE); + MOV(32, M(&PC), Imm32(js.blockStart)); + JMP(asm_routines.doTiming, true); // downcount hit zero - go doTiming. + SetJumpTarget(skip); + + const u8 *normalEntry = GetCodePtr(); + + if (ImHereDebug) + ABI_CallFunction((void *)&ImHere); //Used to get a trace of the last few blocks before a crash, sometimes VERY useful - void STACKALIGN Jit64::Run() + if (js.fpa.any) { - CompiledCode pExecAddr = (CompiledCode)asm_routines.enterCode; - pExecAddr(); - //Will return when PowerPC::state changes - } - - void Jit64::SingleStep() - { - // NOT USED, NOT TESTED, PROBABLY NOT WORKING YET - // PanicAlert("Single"); - /* - JitBlock temp_block; - PPCAnalyst::CodeBuffer temp_codebuffer(1); // Only room for one instruction! Single step! - const u8 *code = DoJit(PowerPC::ppcState.pc, &temp_codebuffer, &temp_block); - CompiledCode pExecAddr = (CompiledCode)code; - pExecAddr();*/ - } - - void STACKALIGN Jit64::Jit(u32 em_address) - { - if (GetSpaceLeft() < 0x10000 || blocks.IsFull()) - { - WARN_LOG(DYNA_REC, "JIT cache full - clearing.") - if (Core::g_CoreStartupParameter.bJITUnlimitedCache) - { - ERROR_LOG(DYNA_REC, "What? JIT cache still full - clearing."); - PanicAlert("What? JIT cache still full - clearing."); - } - ClearCache(); - } - int block_num = blocks.AllocateBlock(em_address); - JitBlock *b = blocks.GetBlock(block_num); - blocks.FinalizeBlock(block_num, jo.enableBlocklink, DoJit(em_address, &code_buffer, b)); - } - - const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buffer, JitBlock *b) - { - if (em_address == 0) - PanicAlert("ERROR : Trying to compile at 0. LR=%08x", LR); - - int size; - js.isLastInstruction = false; - js.blockStart = em_address; - js.fifoBytesThisBlock = 0; - js.curBlock = b; - js.blockSetsQuantizers = false; - js.block_flags = 0; - js.cancel = false; - - //Analyze the block, collect all instructions it is made of (including inlining, - //if that is enabled), reorder instructions for optimal performance, and join joinable instructions. - PPCAnalyst::Flatten(em_address, &size, &js.st, &js.gpa, &js.fpa, code_buffer); - PPCAnalyst::CodeOp *ops = code_buffer->codebuffer; - - const u8 *start = AlignCode4(); //TODO: Test if this or AlignCode16 make a difference from GetCodePtr - b->checkedEntry = start; - b->runCount = 0; - - // Downcount flag check. The last block decremented downcounter, and the flag should still be available. - FixupBranch skip = J_CC(CC_NBE); + //This block uses FPU - needs to add FP exception bailout + TEST(32, M(&PowerPC::ppcState.msr), Imm32(1 << 13)); //Test FP enabled bit + FixupBranch b1 = J_CC(CC_NZ); MOV(32, M(&PC), Imm32(js.blockStart)); - JMP(asm_routines.doTiming, true); // downcount hit zero - go doTiming. - SetJumpTarget(skip); + JMP(asm_routines.fpException, true); + SetJumpTarget(b1); + } - const u8 *normalEntry = GetCodePtr(); - - if (ImHereDebug) - ABI_CallFunction((void *)&ImHere); //Used to get a trace of the last few blocks before a crash, sometimes VERY useful - - if (js.fpa.any) - { - //This block uses FPU - needs to add FP exception bailout - TEST(32, M(&PowerPC::ppcState.msr), Imm32(1 << 13)); //Test FP enabled bit - FixupBranch b1 = J_CC(CC_NZ); - MOV(32, M(&PC), Imm32(js.blockStart)); - JMP(asm_routines.fpException, true); - SetJumpTarget(b1); - } + if (false && jo.fastInterrupts) + { + // This does NOT yet work. + TEST(32, M(&PowerPC::ppcState.Exceptions), Imm32(0xFFFFFFFF)); + FixupBranch b1 = J_CC(CC_Z); + MOV(32, M(&PC), Imm32(js.blockStart)); + JMP(asm_routines.testExceptions, true); + SetJumpTarget(b1); + } - if (false && jo.fastInterrupts) - { - // This does NOT yet work. - TEST(32, M(&PowerPC::ppcState.Exceptions), Imm32(0xFFFFFFFF)); - FixupBranch b1 = J_CC(CC_Z); - MOV(32, M(&PC), Imm32(js.blockStart)); - JMP(asm_routines.testExceptions, true); - SetJumpTarget(b1); - } - - // Conditionally add profiling code. - if (Profiler::g_ProfileBlocks) { - ADD(32, M(&b->runCount), Imm8(1)); + // Conditionally add profiling code. + if (Profiler::g_ProfileBlocks) { + ADD(32, M(&b->runCount), Imm8(1)); #ifdef _WIN32 - b->ticCounter.QuadPart = 0; - b->ticStart.QuadPart = 0; - b->ticStop.QuadPart = 0; + b->ticCounter.QuadPart = 0; + b->ticStart.QuadPart = 0; + b->ticStop.QuadPart = 0; #else //TODO #endif - // get start tic - PROFILER_QUERY_PERFORMACE_COUNTER(&b->ticStart); - } -#if defined(_DEBUG) || defined(DEBUGFAST) - // should help logged stacktraces become more accurate - MOV(32, M(&PC), Imm32(js.blockStart)); -#endif - - //Start up the register allocators - //They use the information in gpa/fpa to preload commonly used registers. - gpr.Start(js.gpa); - fpr.Start(js.fpa); - - js.downcountAmount = js.st.numCycles + PatchEngine::GetSpeedhackCycles(em_address); - js.blockSize = size; - // Translate instructions - for (int i = 0; i < (int)size; i++) - { - // gpr.Flush(FLUSH_ALL); - // if (PPCTables::UsesFPU(_inst)) - // fpr.Flush(FLUSH_ALL); - js.compilerPC = ops[i].address; - js.op = &ops[i]; - js.instructionNumber = i; - if (i == (int)size - 1) - { - // WARNING - cmp->branch merging will screw this up. - js.isLastInstruction = true; - js.next_inst = 0; - if (Profiler::g_ProfileBlocks) { - // CAUTION!!! push on stack regs you use, do your stuff, then pop - PROFILER_VPUSH; - // get end tic - PROFILER_QUERY_PERFORMACE_COUNTER(&b->ticStop); - // tic counter += (end tic - start tic) - PROFILER_ADD_DIFF_LARGE_INTEGER(&b->ticCounter, &b->ticStop, &b->ticStart); - PROFILER_VPOP; - } - } - else - { - // help peephole optimizations - js.next_inst = ops[i + 1].inst; - js.next_compilerPC = ops[i + 1].address; - } - - if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32) - { - js.fifoBytesThisBlock -= 32; - ABI_CallFunction(thunks.ProtectFunction((void *)&GPFifo::CheckGatherPipe, 0)); - } - - // If starting from the breakpointed instruction, we don't break. - if (em_address != ops[i].address && BreakPoints::IsAddressBreakPoint(ops[i].address)) - { - - } - - if (!ops[i].skip) - PPCTables::CompileInstruction(ops[i].inst); - - gpr.SanityCheck(); - fpr.SanityCheck(); - if (js.cancel) - break; - } - - b->flags = js.block_flags; - b->codeSize = (u32)(GetCodePtr() - normalEntry); - b->originalSize = size; - return normalEntry; + // get start tic + PROFILER_QUERY_PERFORMACE_COUNTER(&b->ticStart); } +//#if defined(_DEBUG) || defined(DEBUGFAST) + // should help logged stacktraces become more accurate + MOV(32, M(&PC), Imm32(js.blockStart)); +//#endif + + +// if (em_address == 0x801e4188) +// INT3(); + //Start up the register allocators + //They use the information in gpa/fpa to preload commonly used registers. + gpr.Start(js.gpa); + fpr.Start(js.fpa); + + js.downcountAmount = js.st.numCycles + PatchEngine::GetSpeedhackCycles(em_address); + js.blockSize = size; + // Translate instructions + for (int i = 0; i < (int)size; i++) + { + // gpr.Flush(FLUSH_ALL); + // if (PPCTables::UsesFPU(_inst)) + // fpr.Flush(FLUSH_ALL); + js.compilerPC = ops[i].address; + js.op = &ops[i]; + js.instructionNumber = i; + if (i == (int)size - 1) + { + // WARNING - cmp->branch merging will screw this up. + js.isLastInstruction = true; + js.next_inst = 0; + if (Profiler::g_ProfileBlocks) { + // CAUTION!!! push on stack regs you use, do your stuff, then pop + PROFILER_VPUSH; + // get end tic + PROFILER_QUERY_PERFORMACE_COUNTER(&b->ticStop); + // tic counter += (end tic - start tic) + PROFILER_ADD_DIFF_LARGE_INTEGER(&b->ticCounter, &b->ticStop, &b->ticStart); + PROFILER_VPOP; + } + } + else + { + // help peephole optimizations + js.next_inst = ops[i + 1].inst; + js.next_compilerPC = ops[i + 1].address; + } + + if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32) + { + js.fifoBytesThisBlock -= 32; + ABI_CallFunction(thunks.ProtectFunction((void *)&GPFifo::CheckGatherPipe, 0)); + } + + // If starting from the breakpointed instruction, we don't break. + if (em_address != ops[i].address && BreakPoints::IsAddressBreakPoint(ops[i].address)) + { + + } + + if (!ops[i].skip) + PPCTables::CompileInstruction(ops[i].inst); + + gpr.SanityCheck(); + fpr.SanityCheck(); + if (js.cancel) + break; + } + + b->flags = js.block_flags; + b->codeSize = (u32)(GetCodePtr() - normalEntry); + b->originalSize = size; + return normalEntry; +} diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Branch.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Branch.cpp index 94c57d3a4b..331cf26996 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Branch.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Branch.cpp @@ -40,250 +40,250 @@ using namespace Gen; - void Jit64::sc(UGeckoInstruction inst) +void Jit64::sc(UGeckoInstruction inst) +{ + if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITBranchOff) + {Default(inst); return;} // turn off from debugger + + INSTRUCTION_START; + + gpr.Flush(FLUSH_ALL); + fpr.Flush(FLUSH_ALL); + WriteExceptionExit(EXCEPTION_SYSCALL); +} + +void Jit64::rfi(UGeckoInstruction inst) +{ + if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITBranchOff) + {Default(inst); return;} // turn off from debugger + + INSTRUCTION_START; + + gpr.Flush(FLUSH_ALL); + fpr.Flush(FLUSH_ALL); + // See Interpreter rfi for details + const u32 mask = 0x87C0FFFF; + // MSR = (MSR & ~mask) | (SRR1 & mask); + MOV(32, R(EAX), M(&MSR)); + MOV(32, R(ECX), M(&SRR1)); + AND(32, R(EAX), Imm32(~mask)); + AND(32, R(ECX), Imm32(mask)); + OR(32, R(EAX), R(ECX)); + // MSR &= 0xFFFDFFFF; //TODO: VERIFY + AND(32, R(EAX), Imm32(0xFFFDFFFF)); + MOV(32, M(&MSR), R(EAX)); + // NPC = SRR0; + MOV(32, R(EAX), M(&SRR0)); + WriteRfiExitDestInEAX(); +} + +void Jit64::bx(UGeckoInstruction inst) +{ + if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITBranchOff) + {Default(inst); return;} // turn off from debugger + + INSTRUCTION_START; + + if (inst.LK) + MOV(32, M(&LR), Imm32(js.compilerPC + 4)); + gpr.Flush(FLUSH_ALL); + fpr.Flush(FLUSH_ALL); + + if (js.isLastInstruction) { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITBranchOff) - {Default(inst); return;} // turn off from debugger - - INSTRUCTION_START; - - gpr.Flush(FLUSH_ALL); - fpr.Flush(FLUSH_ALL); - WriteExceptionExit(EXCEPTION_SYSCALL); - } - - void Jit64::rfi(UGeckoInstruction inst) - { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITBranchOff) - {Default(inst); return;} // turn off from debugger - - INSTRUCTION_START; - - gpr.Flush(FLUSH_ALL); - fpr.Flush(FLUSH_ALL); - // See Interpreter rfi for details - const u32 mask = 0x87C0FFFF; - // MSR = (MSR & ~mask) | (SRR1 & mask); - MOV(32, R(EAX), M(&MSR)); - MOV(32, R(ECX), M(&SRR1)); - AND(32, R(EAX), Imm32(~mask)); - AND(32, R(ECX), Imm32(mask)); - OR(32, R(EAX), R(ECX)); - // MSR &= 0xFFFDFFFF; //TODO: VERIFY - AND(32, R(EAX), Imm32(0xFFFDFFFF)); - MOV(32, M(&MSR), R(EAX)); - // NPC = SRR0; - MOV(32, R(EAX), M(&SRR0)); - WriteRfiExitDestInEAX(); - } - - void Jit64::bx(UGeckoInstruction inst) - { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITBranchOff) - {Default(inst); return;} // turn off from debugger - - INSTRUCTION_START; - - if (inst.LK) - MOV(32, M(&LR), Imm32(js.compilerPC + 4)); - gpr.Flush(FLUSH_ALL); - fpr.Flush(FLUSH_ALL); - - if (js.isLastInstruction) - { - u32 destination; - if (inst.AA) - destination = SignExt26(inst.LI << 2); - else - destination = js.compilerPC + SignExt26(inst.LI << 2); -#ifdef ACID_TEST - if (inst.LK) - AND(32, M(&CR), Imm32(~(0xFF000000))); -#endif - if (destination == js.compilerPC) - { - //PanicAlert("Idle loop detected at %08x", destination); - // CALL(ProtectFunction(&CoreTiming::Idle, 0)); - // JMP(Asm::testExceptions, true); - // make idle loops go faster - js.downcountAmount += 8; - } - WriteExit(destination, 0); - } - else { - // TODO: investigate the good old method of merging blocks here. - PanicAlert("bx not last instruction of block"); // this should not happen - } - } - - // TODO - optimize to hell and beyond - // TODO - make nice easy to optimize special cases for the most common - // variants of this instruction. - void Jit64::bcx(UGeckoInstruction inst) - { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITBranchOff) - {Default(inst); return;} // turn off from debugger - - INSTRUCTION_START; - - // USES_CR - _assert_msg_(DYNA_REC, js.isLastInstruction, "bcx not last instruction of block"); - - gpr.Flush(FLUSH_ALL); - fpr.Flush(FLUSH_ALL); - - CCFlags branch = CC_Z; - - //const bool only_counter_check = (inst.BO & 16) ? true : false; - //const bool only_condition_check = (inst.BO & 4) ? true : false; - //if (only_condition_check && only_counter_check) - // PanicAlert("Bizarre bcx encountered. Likely bad or corrupt code."); - bool doFullTest = (inst.BO & 16) == 0 && (inst.BO & 4) == 0; - bool ctrDecremented = false; - - if ((inst.BO & 16) == 0) // Test a CR bit - { - TEST(8, M(&PowerPC::ppcState.cr_fast[inst.BI >> 2]), Imm8(8 >> (inst.BI & 3))); - if (inst.BO & 8) // Conditional branch - branch = CC_NZ; - else - branch = CC_Z; - - if (doFullTest) - SETcc(branch, R(EAX)); - } - else - { - if (doFullTest) - MOV(32, R(EAX), Imm32(1)); - } - - if ((inst.BO & 4) == 0) // Decrement and test CTR - { - // Decrement CTR - SUB(32, M(&CTR), Imm8(1)); - ctrDecremented = true; - // Test whether to branch if CTR is zero or not - if (inst.BO & 2) - branch = CC_Z; - else - branch = CC_NZ; - - if (doFullTest) - SETcc(branch, R(ECX)); - } - else - { - if (doFullTest) - MOV(32, R(ECX), Imm32(1)); - } - - if (doFullTest) - { - TEST(32, R(EAX), R(ECX)); - branch = CC_Z; - } - else - { - if (branch == CC_Z) - branch = CC_NZ; - else - branch = CC_Z; - } - - if (!ctrDecremented && (inst.BO & BO_DONT_DECREMENT_FLAG) == 0) - { - SUB(32, M(&CTR), Imm8(1)); - } - FixupBranch skip; - if (inst.BO != 20) - { - skip = J_CC(branch); - } u32 destination; - if (inst.LK) - MOV(32, M(&LR), Imm32(js.compilerPC + 4)); - if(inst.AA) - destination = SignExt16(inst.BD << 2); + if (inst.AA) + destination = SignExt26(inst.LI << 2); else - destination = js.compilerPC + SignExt16(inst.BD << 2); - WriteExit(destination, 0); - if (inst.BO != 20) - { - SetJumpTarget(skip); - WriteExit(js.compilerPC + 4, 1); - } - } - - void Jit64::bcctrx(UGeckoInstruction inst) - { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITBranchOff) - {Default(inst); return;} // turn off from debugger - - INSTRUCTION_START; - - gpr.Flush(FLUSH_ALL); - fpr.Flush(FLUSH_ALL); - - // bool fastway = true; - - if ((inst.BO & 16) == 0) - { - PanicAlert("Bizarro bcctrx %08x, not supported.", inst.hex); - _assert_msg_(DYNA_REC, 0, "Bizarro bcctrx"); - /* - fastway = false; - MOV(32, M(&PC), Imm32(js.compilerPC+4)); - MOV(32, R(EAX), M(&CR)); - XOR(32, R(ECX), R(ECX)); - AND(32, R(EAX), Imm32(0x80000000 >> inst.BI)); - - CCFlags branch; - if(inst.BO & 8) - branch = CC_NZ; - else - branch = CC_Z; - */ - // TODO(ector): Why is this commented out? - //SETcc(branch, R(ECX)); - // check for EBX - //TEST(32, R(ECX), R(ECX)); - //linkEnd = J_CC(branch); - } - // NPC = CTR & 0xfffffffc; - MOV(32, R(EAX), M(&CTR)); - if (inst.LK) - MOV(32, M(&LR), Imm32(js.compilerPC + 4)); // LR = PC + 4; - AND(32, R(EAX), Imm32(0xFFFFFFFC)); - WriteExitDestInEAX(0); - } - - - void Jit64::bclrx(UGeckoInstruction inst) - { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITBranchOff) - {Default(inst); return;} // turn off from debugger - - INSTRUCTION_START; - - gpr.Flush(FLUSH_ALL); - fpr.Flush(FLUSH_ALL); - //Special case BLR - if (inst.hex == 0x4e800020) - { - //CDynaRegCache::Flush(); - // This below line can be used to prove that blr "eats flags" in practice. - // This observation will let us do a lot of fun observations. + destination = js.compilerPC + SignExt26(inst.LI << 2); #ifdef ACID_TEST + if (inst.LK) AND(32, M(&CR), Imm32(~(0xFF000000))); #endif - MOV(32, R(EAX), M(&LR)); - MOV(32, M(&PC), R(EAX)); - WriteExitDestInEAX(0); - return; + if (destination == js.compilerPC) + { + //PanicAlert("Idle loop detected at %08x", destination); + // CALL(ProtectFunction(&CoreTiming::Idle, 0)); + // JMP(Asm::testExceptions, true); + // make idle loops go faster + js.downcountAmount += 8; } - // Call interpreter - Default(inst); - MOV(32, R(EAX), M(&NPC)); - WriteExitDestInEAX(0); + WriteExit(destination, 0); } + else { + // TODO: investigate the good old method of merging blocks here. + PanicAlert("bx not last instruction of block"); // this should not happen + } +} + +// TODO - optimize to hell and beyond +// TODO - make nice easy to optimize special cases for the most common +// variants of this instruction. +void Jit64::bcx(UGeckoInstruction inst) +{ + if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITBranchOff) + {Default(inst); return;} // turn off from debugger + + INSTRUCTION_START; + + // USES_CR + _assert_msg_(DYNA_REC, js.isLastInstruction, "bcx not last instruction of block"); + + gpr.Flush(FLUSH_ALL); + fpr.Flush(FLUSH_ALL); + + CCFlags branch = CC_Z; + + //const bool only_counter_check = (inst.BO & 16) ? true : false; + //const bool only_condition_check = (inst.BO & 4) ? true : false; + //if (only_condition_check && only_counter_check) + // PanicAlert("Bizarre bcx encountered. Likely bad or corrupt code."); + bool doFullTest = (inst.BO & 16) == 0 && (inst.BO & 4) == 0; + bool ctrDecremented = false; + + if ((inst.BO & 16) == 0) // Test a CR bit + { + TEST(8, M(&PowerPC::ppcState.cr_fast[inst.BI >> 2]), Imm8(8 >> (inst.BI & 3))); + if (inst.BO & 8) // Conditional branch + branch = CC_NZ; + else + branch = CC_Z; + + if (doFullTest) + SETcc(branch, R(EAX)); + } + else + { + if (doFullTest) + MOV(32, R(EAX), Imm32(1)); + } + + if ((inst.BO & 4) == 0) // Decrement and test CTR + { + // Decrement CTR + SUB(32, M(&CTR), Imm8(1)); + ctrDecremented = true; + // Test whether to branch if CTR is zero or not + if (inst.BO & 2) + branch = CC_Z; + else + branch = CC_NZ; + + if (doFullTest) + SETcc(branch, R(ECX)); + } + else + { + if (doFullTest) + MOV(32, R(ECX), Imm32(1)); + } + + if (doFullTest) + { + TEST(32, R(EAX), R(ECX)); + branch = CC_Z; + } + else + { + if (branch == CC_Z) + branch = CC_NZ; + else + branch = CC_Z; + } + + if (!ctrDecremented && (inst.BO & BO_DONT_DECREMENT_FLAG) == 0) + { + SUB(32, M(&CTR), Imm8(1)); + } + FixupBranch skip; + if (inst.BO != 20) + { + skip = J_CC(branch); + } + u32 destination; + if (inst.LK) + MOV(32, M(&LR), Imm32(js.compilerPC + 4)); + if(inst.AA) + destination = SignExt16(inst.BD << 2); + else + destination = js.compilerPC + SignExt16(inst.BD << 2); + WriteExit(destination, 0); + if (inst.BO != 20) + { + SetJumpTarget(skip); + WriteExit(js.compilerPC + 4, 1); + } +} + +void Jit64::bcctrx(UGeckoInstruction inst) +{ + if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITBranchOff) + {Default(inst); return;} // turn off from debugger + + INSTRUCTION_START; + + gpr.Flush(FLUSH_ALL); + fpr.Flush(FLUSH_ALL); + + // bool fastway = true; + + if ((inst.BO & 16) == 0) + { + PanicAlert("Bizarro bcctrx %08x, not supported.", inst.hex); + _assert_msg_(DYNA_REC, 0, "Bizarro bcctrx"); + /* + fastway = false; + MOV(32, M(&PC), Imm32(js.compilerPC+4)); + MOV(32, R(EAX), M(&CR)); + XOR(32, R(ECX), R(ECX)); + AND(32, R(EAX), Imm32(0x80000000 >> inst.BI)); + + CCFlags branch; + if(inst.BO & 8) + branch = CC_NZ; + else + branch = CC_Z; + */ + // TODO(ector): Why is this commented out? + //SETcc(branch, R(ECX)); + // check for EBX + //TEST(32, R(ECX), R(ECX)); + //linkEnd = J_CC(branch); + } + // NPC = CTR & 0xfffffffc; + MOV(32, R(EAX), M(&CTR)); + if (inst.LK) + MOV(32, M(&LR), Imm32(js.compilerPC + 4)); // LR = PC + 4; + AND(32, R(EAX), Imm32(0xFFFFFFFC)); + WriteExitDestInEAX(0); +} + + +void Jit64::bclrx(UGeckoInstruction inst) +{ + if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITBranchOff) + {Default(inst); return;} // turn off from debugger + + INSTRUCTION_START; + + gpr.Flush(FLUSH_ALL); + fpr.Flush(FLUSH_ALL); + //Special case BLR + if (inst.hex == 0x4e800020) + { + //CDynaRegCache::Flush(); + // This below line can be used to prove that blr "eats flags" in practice. + // This observation will let us do a lot of fun observations. +#ifdef ACID_TEST + AND(32, M(&CR), Imm32(~(0xFF000000))); +#endif + MOV(32, R(EAX), M(&LR)); + MOV(32, M(&PC), R(EAX)); + WriteExitDestInEAX(0); + return; + } + // Call interpreter + Default(inst); + MOV(32, R(EAX), M(&NPC)); + WriteExitDestInEAX(0); +} diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp index 472c8603c9..78b3fabe7a 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp @@ -196,6 +196,8 @@ MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8)); // _x86Reg < 0 SetJumpTarget(continue1); SetJumpTarget(continue2); + // TODO: If we ever care about SO, borrow a trick from + // http://maws.mameworld.info/maws/mamesrc/src/emu/cpu/powerpc/drc_ops.c : bt, adc } else { int test_bit = 8 >> (js.next_inst.BI & 3); bool condition = (js.next_inst.BO & 8) ? false : true; diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitAsm.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/JitAsm.cpp index 2aef0da9b5..0423bdf91f 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitAsm.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitAsm.cpp @@ -215,25 +215,39 @@ const float m_65535 = 65535.0f; #define QUANTIZE_OVERFLOW_SAFE + // according to Intel Docs CVTPS2DQ writes 0x80000000 if the source floating point value is out of int32 range // while it's OK for large negatives, it isn't for positives // I don't know whether the overflow actually happens in any games // but it potentially can cause problems, so we need some clamping +// TODO(ector): Improve 64-bit version +static void WriteDual32(u64 value, u32 address) +{ + Memory::Write_U32((u32)(value >> 32), address); + Memory::Write_U32((u32)value, address + 4); +} + void AsmRoutineManager::GenQuantizedStores() { const u8* storePairedIllegal = AlignCode4(); UD2(); const u8* storePairedFloat = AlignCode4(); + // IN: value = XMM0, two singles in bottom. PPC address = ECX. #ifdef _M_X64 - MOVQ_xmm(R(RAX), XMM0); - ROL(64, R(RAX), Imm8(32)); + // INT3(); + MOVQ_xmm(M(&psTemp[0]), XMM0); + MOV(64, R(RAX), M(&psTemp[0])); + //INT3(); + //MOVQ_xmm(R(RAX), XMM0); + //INT3(); + ROL(64, R(RAX), Imm8(32)); // Swap the two - the big BSWAP will unswap. TEST(32, R(ECX), Imm32(0x0C000000)); FixupBranch argh = J_CC(CC_NZ); BSWAP(64, RAX); - MOV(64, MComplex(RBX, RCX, 1, 0), R(RAX)); + MOV(64, MComplex(RBX, RCX, SCALE_1, 0), R(RAX)); FixupBranch arg2 = J(); SetJumpTarget(argh); - ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U64, 2), RAX, RCX); + ABI_CallFunctionRR(thunks.ProtectFunction((void *)&WriteDual32, 2), RAX, RCX); SetJumpTarget(arg2); #else MOVQ_xmm(M(&psTemp[0]), XMM0); @@ -258,11 +272,12 @@ void AsmRoutineManager::GenQuantizedStores() { RET(); const u8* storePairedU8 = AlignCode4(); + INT3(); SHR(32, R(EAX), Imm8(6)); MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS)); PUNPCKLDQ(XMM1, R(XMM1)); MULPS(XMM0, R(XMM1)); -#ifdef QUANTIZE_OVERFLOW_SAFE +#ifdef QUANTIZE_OVERFLOW_SAFE MOVSS(XMM1, M((void *)&m_65535)); PUNPCKLDQ(XMM1, R(XMM1)); MINPS(XMM0, R(XMM1)); @@ -280,6 +295,7 @@ void AsmRoutineManager::GenQuantizedStores() { RET(); const u8* storePairedS8 = AlignCode4(); + INT3(); SHR(32, R(EAX), Imm8(6)); MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS)); PUNPCKLDQ(XMM1, R(XMM1)); @@ -302,6 +318,7 @@ void AsmRoutineManager::GenQuantizedStores() { RET(); const u8* storePairedU16 = AlignCode4(); + INT3(); SHR(32, R(EAX), Imm8(6)); MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS)); PUNPCKLDQ(XMM1, R(XMM1)); @@ -333,6 +350,7 @@ void AsmRoutineManager::GenQuantizedStores() { RET(); const u8* storePairedS16 = AlignCode4(); + INT3(); SHR(32, R(EAX), Imm8(6)); MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS)); PUNPCKLDQ(XMM1, R(XMM1)); diff --git a/Source/Core/Core/Src/PowerPC/PPCTables.cpp b/Source/Core/Core/Src/PowerPC/PPCTables.cpp index 547eacf9c7..76690f783f 100644 --- a/Source/Core/Core/Src/PowerPC/PPCTables.cpp +++ b/Source/Core/Core/Src/PowerPC/PPCTables.cpp @@ -444,7 +444,7 @@ static GekkoOPTemplate table31_2[] = static GekkoOPTemplate table59[] = { - {18, Interpreter::fdivsx, &Jit64::fp_arith_s, {"fdivsx", OPTYPE_FPU, FL_RC_BIT_F, 16}}, + {18, Interpreter::fdivsx, &Jit64::Default, /*TODO*/ {"fdivsx", OPTYPE_FPU, FL_RC_BIT_F, 16}}, {20, Interpreter::fsubsx, &Jit64::fp_arith_s, {"fsubsx", OPTYPE_FPU, FL_RC_BIT_F}}, {21, Interpreter::faddsx, &Jit64::fp_arith_s, {"faddsx", OPTYPE_FPU, FL_RC_BIT_F}}, // {22, Interpreter::fsqrtsx, &Jit64::Default, {"fsqrtsx", OPTYPE_FPU, FL_RC_BIT_F}}, // Not implemented on gekko @@ -687,7 +687,7 @@ void InitTables() } #define OPLOG -#define OP_TO_LOG "mcrfs" +#define OP_TO_LOG "mffs" #ifdef OPLOG namespace { diff --git a/Source/Core/Core/Src/PowerPC/PowerPC.cpp b/Source/Core/Core/Src/PowerPC/PowerPC.cpp index 6e1de73617..d780925896 100644 --- a/Source/Core/Core/Src/PowerPC/PowerPC.cpp +++ b/Source/Core/Core/Src/PowerPC/PowerPC.cpp @@ -357,7 +357,7 @@ void OnIdleIL() int PPCFPClass(double dvalue) { -#ifdef _WIN32 + /* // win32-only reference implementation, to compare to: switch (_fpclass(dvalue)) { case _FPCLASS_SNAN: @@ -371,9 +371,9 @@ int PPCFPClass(double dvalue) case _FPCLASS_PN: return 0x4; case _FPCLASS_PINF: return 0x5; default: return 0x4; - } -#else - // TODO: Make sure the below is equivalent to the above - then switch win32 implementation to it. + }*/ + + // TODO: Optimize the below to be as fast as possible. union { double d; u64 i; @@ -395,7 +395,7 @@ int PPCFPClass(double dvalue) return 0x9; } else { // OK let's dissect this thing. - int sign = (int)(value.i & 0x8000000000000000ULL) ? 1 : 0; + int sign = value.i >> 63; int exp = (int)((value.i >> 52) & 0x7FF); if (exp >= 1 && exp <= 2046) { // Nice normalized number. @@ -419,12 +419,22 @@ int PPCFPClass(double dvalue) } return 0x4; -#endif } } // namespace + +// FPSCR update functions + void UpdateFPRF(double dvalue) { FPSCR.FPRF = PowerPC::PPCFPClass(dvalue); } + +void UpdateFEX() { + FPSCR.FEX = (FPSCR.XX & FPSCR.XE) | + (FPSCR.ZX & FPSCR.ZE) | + (FPSCR.UX & FPSCR.UE) | + (FPSCR.OX & FPSCR.OE) | + (FPSCR.VX & FPSCR.VE); +} diff --git a/Source/Core/Core/Src/PowerPC/PowerPC.h b/Source/Core/Core/Src/PowerPC/PowerPC.h index 1540c72aa4..d3e23dc8d6 100644 --- a/Source/Core/Core/Src/PowerPC/PowerPC.h +++ b/Source/Core/Core/Src/PowerPC/PowerPC.h @@ -91,6 +91,8 @@ volatile CPUState *GetStatePtr(); // this oddity is here instead of an extern d void CompactCR(); void ExpandCR(); +int PPCFPClass(double dvalue); + void OnIdle(u32 _uThreadAddr); void OnIdleIL(); diff --git a/Source/Core/DolphinWX/DolphinWX.vcproj b/Source/Core/DolphinWX/DolphinWX.vcproj index e8b33eaea6..8e5a483412 100644 --- a/Source/Core/DolphinWX/DolphinWX.vcproj +++ b/Source/Core/DolphinWX/DolphinWX.vcproj @@ -96,7 +96,7 @@ AdditionalLibraryDirectories="..\..\..\Externals\wxWidgets\lib;..\..\..\Externals\LZO\win32\$(ConfigurationName)" IgnoreAllDefaultLibraries="false" IgnoreDefaultLibraryNames="msvcrt" - GenerateDebugInformation="false" + GenerateDebugInformation="true" ProgramDatabaseFile="$(PlatformName)\$(ConfigurationName)\$(TargetName).pdb" GenerateMapFile="false" MapFileName="$(TargetDir)linkermap.map" @@ -768,7 +768,7 @@ AdditionalLibraryDirectories="..\..\..\Externals\wxWidgets\lib;..\..\..\Externals\LZO\win32\$(ConfigurationName)" IgnoreAllDefaultLibraries="false" IgnoreDefaultLibraryNames="msvcrt" - GenerateDebugInformation="false" + GenerateDebugInformation="true" ProgramDatabaseFile="$(PlatformName)\$(ConfigurationName)\$(TargetName).pdb" GenerateMapFile="false" MapFileName="$(TargetDir)linkermap.map" diff --git a/Source/Core/InputCommon/Src/EventHandler.cpp b/Source/Core/InputCommon/Src/EventHandler.cpp index 7a0cde5f9c..e01dd4a38a 100644 --- a/Source/Core/InputCommon/Src/EventHandler.cpp +++ b/Source/Core/InputCommon/Src/EventHandler.cpp @@ -20,7 +20,7 @@ EventHandler::~EventHandler() { } EventHandler *EventHandler::GetInstance() { - fprintf(stderr, "handler instance %p\n", m_Instance); + // fprintf(stderr, "handler instance %p\n", m_Instance); if (! m_Instance) m_Instance = new EventHandler(); @@ -30,13 +30,13 @@ EventHandler *EventHandler::GetInstance() { void EventHandler::Destroy() { if (m_Instance) delete m_Instance; - fprintf(stderr, "deleting instance %p\n", m_Instance); + // fprintf(stderr, "deleting instance %p\n", m_Instance); m_Instance = 0; } bool EventHandler::RegisterEventListener(listenFuncPtr func, Keys key) { if (key.inputType == KeyboardInput) { - fprintf(stderr, "Registering %d:%d %p %p \n", key.keyCode, key.mods, func, this); + // fprintf(stderr, "Registering %d:%d %p %p \n", key.keyCode, key.mods, func, this); if (key.keyCode == sf::Key::Count || key.mods >= NUMMODS || key.keyCode >= NUMKEYS) return false; diff --git a/Source/Dolphin.sln b/Source/Dolphin.sln index 54341b0cff..27c49b9e8e 100644 --- a/Source/Dolphin.sln +++ b/Source/Dolphin.sln @@ -3,7 +3,9 @@ Microsoft Visual Studio Solution File, Format Version 10.00 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Core", "Core\Core\Core.vcproj", "{F0B874CB-4476-4199-9315-8343D05AE684}" ProjectSection(ProjectDependencies) = postProject {C7E5D50A-2916-464B-86A7-E10B3CC88ADA} = {C7E5D50A-2916-464B-86A7-E10B3CC88ADA} + {33546D62-7F34-4EA6-A88E-D538B36E16BF} = {33546D62-7F34-4EA6-A88E-D538B36E16BF} {11F55366-12EC-4C44-A8CB-1D4E315D61ED} = {11F55366-12EC-4C44-A8CB-1D4E315D61ED} + {3E03C179-8251-46E4-81F4-466F114BAC63} = {3E03C179-8251-46E4-81F4-466F114BAC63} {0E231FB1-F3C9-4724-ACCB-DE8BCB3C089E} = {0E231FB1-F3C9-4724-ACCB-DE8BCB3C089E} {29C2ABC1-ADA5-42CD-A5FC-96022D52A510} = {29C2ABC1-ADA5-42CD-A5FC-96022D52A510} {1C8436C9-DBAF-42BE-83BC-CF3EC9175ABE} = {1C8436C9-DBAF-42BE-83BC-CF3EC9175ABE} @@ -185,6 +187,12 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "SOIL", "..\Externals\SOIL\S EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "SFML_Network", "..\Externals\SFML\build\vc2008\sfml-network.vcproj", "{823DDC98-42D5-4A38-88CF-9DC06C788AE4}" EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "UnitTests", "UnitTests\UnitTests.vcproj", "{40C636FA-B5BF-4D67-ABC8-376B524A7551}" + ProjectSection(ProjectDependencies) = postProject + {F0B874CB-4476-4199-9315-8343D05AE684} = {F0B874CB-4476-4199-9315-8343D05AE684} + {C573CAF7-EE6A-458E-8049-16C0BF34C2E9} = {C573CAF7-EE6A-458E-8049-16C0BF34C2E9} + EndProjectSection +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Win32 = Debug|Win32 @@ -623,6 +631,19 @@ Global {823DDC98-42D5-4A38-88CF-9DC06C788AE4}.Release|Win32.Build.0 = Release|Win32 {823DDC98-42D5-4A38-88CF-9DC06C788AE4}.Release|x64.ActiveCfg = Release|x64 {823DDC98-42D5-4A38-88CF-9DC06C788AE4}.Release|x64.Build.0 = Release|x64 + {40C636FA-B5BF-4D67-ABC8-376B524A7551}.Debug|Win32.ActiveCfg = Debug|Win32 + {40C636FA-B5BF-4D67-ABC8-376B524A7551}.Debug|Win32.Build.0 = Debug|Win32 + {40C636FA-B5BF-4D67-ABC8-376B524A7551}.Debug|x64.ActiveCfg = Debug|Win32 + {40C636FA-B5BF-4D67-ABC8-376B524A7551}.DebugFast|Win32.ActiveCfg = Debug|Win32 + {40C636FA-B5BF-4D67-ABC8-376B524A7551}.DebugFast|Win32.Build.0 = Debug|Win32 + {40C636FA-B5BF-4D67-ABC8-376B524A7551}.DebugFast|x64.ActiveCfg = Debug|Win32 + {40C636FA-B5BF-4D67-ABC8-376B524A7551}.Release_JITIL|Win32.ActiveCfg = Release|Win32 + {40C636FA-B5BF-4D67-ABC8-376B524A7551}.Release_JITIL|Win32.Build.0 = Release|Win32 + {40C636FA-B5BF-4D67-ABC8-376B524A7551}.Release_JITIL|x64.ActiveCfg = Release|Win32 + {40C636FA-B5BF-4D67-ABC8-376B524A7551}.Release|Win32.ActiveCfg = Release|Win32 + {40C636FA-B5BF-4D67-ABC8-376B524A7551}.Release|Win32.Build.0 = Release|Win32 + {40C636FA-B5BF-4D67-ABC8-376B524A7551}.Release|x64.ActiveCfg = Release|x64 + {40C636FA-B5BF-4D67-ABC8-376B524A7551}.Release|x64.Build.0 = Release|x64 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/Source/Plugins/Plugin_VideoOGL/Src/GLUtil.h b/Source/Plugins/Plugin_VideoOGL/Src/GLUtil.h index 1c21da8699..d75638d9ec 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/GLUtil.h +++ b/Source/Plugins/Plugin_VideoOGL/Src/GLUtil.h @@ -144,7 +144,7 @@ bool OpenGL_ReportFBOError(const char *function, const char *file, int line); #if defined(_DEBUG) || defined(DEBUGFAST) #define GL_REPORT_ERRORD() OpenGL_ReportGLError(__FUNCTION__, __FILE__, __LINE__) #else -#define GL_REPORT_ERRORD() GL_NO_ERROR +#define GL_REPORT_ERRORD() #endif #endif // GLTEST ?? diff --git a/Source/Plugins/Plugin_VideoOGL/Src/OnScreenDisplay.cpp b/Source/Plugins/Plugin_VideoOGL/Src/OnScreenDisplay.cpp index 7a4c7126ca..6f08857bd8 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/OnScreenDisplay.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/OnScreenDisplay.cpp @@ -81,8 +81,13 @@ void DrawMessages() } } + GL_REPORT_ERRORD(); + if (enabled) glEnable(GL_BLEND); + + + GL_REPORT_ERRORD(); } } // namespace diff --git a/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp b/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp index 4683c7c870..50ac672752 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp @@ -1122,6 +1122,7 @@ void Renderer::SwapBuffers() fpscount = 0; } // --------------------------------------------------------------------- + GL_REPORT_ERRORD(); for (int i = 0; i < 8; i++) { glActiveTexture(GL_TEXTURE0 + i); @@ -1132,8 +1133,12 @@ void Renderer::SwapBuffers() DrawDebugText(); + GL_REPORT_ERRORD(); + OSD::DrawMessages(); + GL_REPORT_ERRORD(); + #if defined(DVPROFILE) if (g_bWriteProfile) { //g_bWriteProfile = 0; @@ -1148,6 +1153,9 @@ void Renderer::SwapBuffers() #endif // Copy the rendered frame to the real window OpenGL_SwapBuffers(); + + GL_REPORT_ERRORD(); + // Clear framebuffer glClearColor(0, 0, 0, 0); glClear(GL_COLOR_BUFFER_BIT); @@ -1359,6 +1367,7 @@ void Renderer::RenderText(const char* pstr, int left, int top, u32 color) left * 2.0f / (float)nBackbufferWidth - 1, 1 - top * 2.0f / (float)nBackbufferHeight, 0, nBackbufferWidth, nBackbufferHeight); + GL_REPORT_ERRORD(); } diff --git a/Source/Plugins/Plugin_VideoOGL/Src/rasterfont.cpp b/Source/Plugins/Plugin_VideoOGL/Src/rasterfont.cpp index ef4fec7b9a..d0794d9eac 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/rasterfont.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/rasterfont.cpp @@ -16,20 +16,7 @@ // http://code.google.com/p/dolphin-emu/ #include "Globals.h" - -#ifdef _WIN32 -#include -#endif - -#if defined(__APPLE__) - -#include - -#else - -#include - -#endif +#include "GLUtil.h" #include @@ -154,13 +141,28 @@ RasterFont::~RasterFont() void RasterFont::printString(const char *s, double x, double y, double z) { + int length = strlen(s); + if (!length) + return; + + // Sanitize string to avoid GL errors. + char *s2 = new char[length + 1]; + strcpy(s2, s); + for (int i = 0; i < length; i++) { + if (s2[i] < 32 || s2[i] > 126) + s2[i] = '!'; + } + // go to the right spot glRasterPos3d(x, y, z); + GL_REPORT_ERRORD(); glPushAttrib (GL_LIST_BIT); glListBase(fontOffset); - glCallLists((GLsizei)strlen(s), GL_UNSIGNED_BYTE, (GLubyte *) s); - glPopAttrib (); + glCallLists((GLsizei)strlen(s2), GL_UNSIGNED_BYTE, (GLubyte *) s2); + GL_REPORT_ERRORD(); + glPopAttrib(); + GL_REPORT_ERRORD(); } void RasterFont::printCenteredString(const char *s, double y, int screen_width, double z) diff --git a/Source/UnitTests/UnitTests.cpp b/Source/UnitTests/UnitTests.cpp new file mode 100644 index 0000000000..b258a862bb --- /dev/null +++ b/Source/UnitTests/UnitTests.cpp @@ -0,0 +1,89 @@ +// Copyright (C) 2003-2009 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#include +#include + +#include "PowerPC/PowerPC.h" +#include "HW/SI_DeviceGCController.h" + +using namespace std; + +int fail_count = 0; + +#define EXPECT_EQ(a, b) \ + if ((a) != (b)) { \ + cout << "FAIL: " << #a << " %s is not equal to " << #b << endl; \ + cout << "Actual: " << a << endl << "Expected: " << b << endl; \ + fail_count++; \ + } + +void CoreTests() +{ + // Tests that our fp classifier is correct. + EXPECT_EQ(PowerPC::PPCFPClass(1.0), 0x4); + EXPECT_EQ(PowerPC::PPCFPClass(-1.0), 0x8); + EXPECT_EQ(PowerPC::PPCFPClass(1235223.0), 0x4); + EXPECT_EQ(PowerPC::PPCFPClass(-126323521.0), 0x8); + EXPECT_EQ(PowerPC::PPCFPClass(1.0E-308), 0x14); + EXPECT_EQ(PowerPC::PPCFPClass(-1.0E-308), 0x18); + EXPECT_EQ(PowerPC::PPCFPClass(0.0), 0x2); + EXPECT_EQ(PowerPC::PPCFPClass(-0.0), 0x12); + EXPECT_EQ(PowerPC::PPCFPClass(HUGE_VAL), 0x5); // weird #define for infinity + EXPECT_EQ(PowerPC::PPCFPClass(-HUGE_VAL), 0x9); + EXPECT_EQ(PowerPC::PPCFPClass(sqrt(-1.0)), 0x11); // SNAN +} + +int main(int argc, _TCHAR* argv[]) +{ + CoreTests(); + if (fail_count == 0) + { + printf("All tests passed.\n"); + } + return 0; +} + + +// Pretend that we are a host so we can link to core.... urgh. +//============================================================== +void Host_UpdateMainFrame(){} +void Host_UpdateDisasmDialog(){} +void Host_UpdateLogDisplay(){} +void Host_UpdateMemoryView(){} +void Host_NotifyMapLoaded(){} +void Host_UpdateBreakPointView(){} +void Host_SetDebugMode(bool enable){} + +void Host_SetWaitCursor(bool enable){} + +void Host_UpdateStatusBar(const char* _pText, int Filed = 0){} +#ifdef SETUP_TIMER_WAITING +void Host_UpdateGUI(){} +#endif + +void Host_SysMessage(const char *fmt, ...){} +void Host_SetWiiMoteConnectionState(int _State){} + +void Host_UpdateLeds(int bits){} +void Host_UpdateSpeakerStatus(int index, int bits){} +void Host_UpdateStatus(){} + +int CSIDevice_GCController::GetNetInput(u8 numPAD, SPADStatus PadStatus, u32 *PADStatus) +{ + return 0; +} diff --git a/Source/UnitTests/UnitTests.vcproj b/Source/UnitTests/UnitTests.vcproj new file mode 100644 index 0000000000..02ab68bd77 --- /dev/null +++ b/Source/UnitTests/UnitTests.vcproj @@ -0,0 +1,329 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +