From 5d9ea96abc65a8f018d50b67b66fbb5b09e30feb Mon Sep 17 00:00:00 2001 From: Mr-Wiseguy Date: Tue, 15 Nov 2022 19:55:48 -0500 Subject: [PATCH] Added temp for switch case operand, fixed compilation issues in output --- include/recomp_port.h | 2 ++ recomp.h | 34 ++++++++++++++++++---- src/analysis.cpp | 31 ++++++++++++-------- src/main.cpp | 66 +++++++++++++++++++++++++++++++++++++++---- src/recompilation.cpp | 32 ++++++++++++++------- 5 files changed, 132 insertions(+), 33 deletions(-) diff --git a/include/recomp_port.h b/include/recomp_port.h index 7852650..ec52742 100644 --- a/include/recomp_port.h +++ b/include/recomp_port.h @@ -24,6 +24,7 @@ namespace RecompPort { uint32_t addend_reg; uint32_t rom; uint32_t lw_vram; + uint32_t addu_vram; uint32_t jr_vram; std::vector entries; }; @@ -33,6 +34,7 @@ namespace RecompPort { uint32_t rom; const std::span words; std::string name; + bool ignored; }; struct FunctionStats { diff --git a/recomp.h b/recomp.h index 6cc002c..7929a92 100644 --- a/recomp.h +++ b/recomp.h @@ -2,6 +2,7 @@ #define __RECOMP_H__ #include +#include #if 0 // treat GPRs as 32-bit, should be better codegen typedef uint32_t gpr; @@ -22,22 +23,26 @@ typedef uint64_t gpr; ((gpr)(int32_t)((a) - (b))) #define MEM_D(offset, reg) \ - (*(int64_t*)((rdram) + (((reg) + (offset)) ^ 3))) + (*(int64_t*)(rdram + ((((reg) + (offset))) & 0x3FFFFFF))) #define MEM_W(offset, reg) \ - (*(int32_t*)((rdram) + (((reg) + (offset)) ^ 3))) + (*(int32_t*)(rdram + ((((reg) + (offset))) & 0x3FFFFFF))) #define MEM_H(offset, reg) \ - (*(int16_t*)((rdram) + (((reg) + (offset)) ^ 3))) + (*(int16_t*)(rdram + ((((reg) + (offset)) ^ 2) & 0x3FFFFFF))) #define MEM_B(offset, reg) \ - (*(int8_t*)((rdram) + (((reg) + (offset)) ^ 3))) + (*(int8_t*)(rdram + ((((reg) + (offset)) ^ 3) & 0x3FFFFFF))) #define MEM_HU(offset, reg) \ - (*(uint16_t*)((rdram) + (((reg) + (offset)) ^ 3))) + (*(uint16_t*)(rdram + ((((reg) + (offset)) ^ 2) & 0x3FFFFFF))) #define MEM_BU(offset, reg) \ - (*(uint8_t*)((rdram) + (((reg) + (offset)) ^ 3))) + (*(uint8_t*)(rdram + ((((reg) + (offset)) ^ 3) & 0x3FFFFFF))) + +// TODO proper lwl/lwr/swl/swr +#define MEM_WL(offset, reg) \ + (*(int32_t*)(rdram + ((((reg) + (offset))) & 0x3FFFFFF))) #define S32(val) \ ((int32_t)(val)) @@ -104,6 +109,23 @@ typedef struct { uint64_t hi, lo; } recomp_context; +#ifdef __cplusplus +#define restrict __restrict +extern "C" { +#endif + void switch_error(const char* func, uint32_t vram, uint32_t jtbl); +void do_break(uint32_t vram); + +typedef void (recomp_func_t)(uint8_t* restrict rdram, recomp_context* restrict ctx); + +recomp_func_t* get_function(uint32_t vram); + +#define LOOKUP_FUNC(val) \ + get_function(val) + +#ifdef __cplusplus +} +#endif #endif diff --git a/src/analysis.cpp b/src/analysis.cpp index c3e1e47..45d0f80 100644 --- a/src/analysis.cpp +++ b/src/analysis.cpp @@ -12,14 +12,16 @@ extern "C" const char* RabbitizerRegister_getNameGpr(uint8_t regValue); struct RegState { // For tracking a register that will be used to load from RAM uint32_t prev_lui; - uint32_t prev_addiu; + uint32_t prev_addiu_vram; + uint32_t prev_addu_vram; uint8_t prev_addend_reg; bool valid_lui; bool valid_addiu; bool valid_addend; // For tracking a register that has been loaded from RAM - uint32_t loaded_lw_addr; - uint32_t loaded_addr; + uint32_t loaded_lw_vram; + uint32_t loaded_addu_vram; + uint32_t loaded_address; uint8_t loaded_addend_reg; bool valid_loaded; @@ -27,15 +29,17 @@ struct RegState { void invalidate() { prev_lui = 0; - prev_addiu = 0; + prev_addiu_vram = 0; + prev_addu_vram = 0; prev_addend_reg = 0; valid_lui = false; valid_addiu = false; valid_addend = false; - loaded_lw_addr = 0; - loaded_addr = 0; + loaded_lw_vram = 0; + loaded_addu_vram = 0; + loaded_address = 0; loaded_addend_reg = 0; valid_loaded = false; @@ -82,7 +86,7 @@ bool analyze_instruction(const rabbitizer::InstructionCpu& instr, const RecompPo reg_states[rt] = reg_states[rs]; // Set the addiu state if and only if there hasn't been an addiu already if (!reg_states[rt].valid_addiu) { - reg_states[rt].prev_addiu = (int16_t)imm; + reg_states[rt].prev_addiu_vram = (int16_t)imm; reg_states[rt].valid_addiu = true; } else { // Otherwise, there have been 2 or more consecutive addius so invalidate the whole register @@ -102,6 +106,7 @@ bool analyze_instruction(const rabbitizer::InstructionCpu& instr, const RecompPo temp = reg_states[valid_lui_reg]; temp.valid_addend = true; temp.prev_addend_reg = addend_reg; + temp.prev_addu_vram = instr.getVram(); } else { // Check if this is a move check_move(); @@ -124,14 +129,15 @@ bool analyze_instruction(const rabbitizer::InstructionCpu& instr, const RecompPo if (nonzero_immediate) { lo16 = (int16_t)imm; } else { - lo16 = reg_states[base].prev_addiu; + lo16 = reg_states[base].prev_addiu_vram; } uint32_t address = reg_states[base].prev_lui + lo16; temp.valid_loaded = true; - temp.loaded_lw_addr = instr.getVram(); - temp.loaded_addr = address; + temp.loaded_lw_vram = instr.getVram(); + temp.loaded_address = address; temp.loaded_addend_reg = reg_states[base].prev_addend_reg; + temp.loaded_addu_vram = reg_states[base].prev_addu_vram; } } reg_states[rt] = temp; @@ -144,10 +150,11 @@ bool analyze_instruction(const rabbitizer::InstructionCpu& instr, const RecompPo // Check if the source reg has a valid loaded state and if so record that as a jump table if (reg_states[rs].valid_loaded) { stats.jump_tables.emplace_back( - reg_states[rs].loaded_addr, + reg_states[rs].loaded_address, reg_states[rs].loaded_addend_reg, 0, - reg_states[rs].loaded_lw_addr, + reg_states[rs].loaded_lw_vram, + reg_states[rs].loaded_addu_vram, instr.getVram(), std::vector{} ); diff --git a/src/main.cpp b/src/main.cpp index 9d759c8..4a99481 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -6,6 +6,7 @@ #include "rabbitizer.hpp" #include "elfio/elfio.hpp" #include "fmt/format.h" +#include "fmt/ostream.h" #include "recomp_port.h" @@ -221,7 +222,16 @@ std::unordered_set ignored_funcs { "__osSetConfig", "__osGetConfig", "__osSetWatchLo", - "__osGetWatchLo" + "__osGetWatchLo", + // Cache funcs + "osInvalDCache", + "osInvalICache", + "osWritebackDCache", + "osWritebackDCacheAll" +}; + +std::unordered_set renamed_funcs{ + "sincosf" }; int main(int argc, char** argv) { @@ -305,6 +315,14 @@ int main(int argc, char** argv) { // Check if this symbol is a function or has no type (like a regular glabel would) // Symbols with no type have a dummy entry created so that their symbol can be looked up for function calls if (type == ELFIO::STT_FUNC || type == ELFIO::STT_NOTYPE) { + bool ignored = false; + if (renamed_funcs.contains(name)) { + name = "_" + name; + } + if (ignored_funcs.contains(name)) { + name = name + "_recomp"; + ignored = true; + } if (section_index < section_rom_addrs.size()) { auto section_rom_addr = section_rom_addrs[section_index]; auto section_offset = value - elf_file.sections[section_index]->get_address(); @@ -316,7 +334,8 @@ int main(int argc, char** argv) { vram, static_cast(section_offset + section_rom_addr), std::span{ words, num_instructions }, - std::move(name) + std::move(name), + ignored ); } else { uint32_t vram = static_cast(value); @@ -325,7 +344,8 @@ int main(int argc, char** argv) { vram, 0, std::span{}, - std::move(name) + std::move(name), + ignored ); } } @@ -333,16 +353,52 @@ int main(int argc, char** argv) { fmt::print("Function count: {}\n", context.functions.size()); + std::ofstream func_lookup_file{ "out/funcs/lookup.cpp" }; + std::ofstream func_header_file{ "out/funcs/funcs.h" }; + + fmt::print(func_lookup_file, + "#include \n" + "#include \"recomp.h\"\n" + "#include \"funcs.h\"\n" + "\n" + "std::pair funcs[] {{\n" + ); + + fmt::print(func_header_file, + "#include \"recomp.h\"\n" + "\n" + "#ifdef __cplusplus\n" + "extern \"C\" {{\n" + "#endif\n" + "\n" + ); + //#pragma omp parallel for for (size_t i = 0; i < context.functions.size(); i++) { const auto& func = context.functions[i]; - if (!ignored_funcs.contains(func.name) && func.words.size() != 0) { - if (RecompPort::recompile_function(context, func, "out/" + func.name + ".c") == false) { + if (!func.ignored && func.words.size() != 0) { + fmt::print(func_header_file, + "void {}(uint8_t* restrict rdram, recomp_context* restrict ctx);\n", func.name); + fmt::print(func_lookup_file, + " {{ 0x{:08X}u, {} }},\n", func.vram, func.name); + if (RecompPort::recompile_function(context, func, "out/funcs/" + func.name + ".c") == false) { + func_lookup_file.clear(); fmt::print(stderr, "Error recompiling {}\n", func.name); std::exit(EXIT_FAILURE); } } } + fmt::print(func_lookup_file, + "}};\n" + "extern const size_t num_funcs = sizeof(funcs) / sizeof(funcs[0]);\n" + ); + + fmt::print(func_header_file, + "\n" + "#ifdef __cplusplus\n" + "}}\n" + "#endif\n" + ); return 0; } diff --git a/src/recompilation.cpp b/src/recompilation.cpp index 51caaef..bf0742c 100644 --- a/src/recompilation.cpp +++ b/src/recompilation.cpp @@ -31,7 +31,9 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F fmt::print(output_file, " // {}\n", instr.disassemble(0)); } - if (skipped_insns.contains(instr.getVram())) { + uint32_t instr_vram = instr.getVram(); + + if (skipped_insns.contains(instr_vram)) { return true; } @@ -105,6 +107,18 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F print_line("{}{} = {:#X} << 16", ctx_gpr_prefix(rt), rt, imm); break; case InstrId::cpu_addu: + { + // Check if this addu belongs to a jump table load + auto find_result = std::find_if(stats.jump_tables.begin(), stats.jump_tables.end(), + [instr_vram](const RecompPort::JumpTable& jtbl) { + return jtbl.addu_vram == instr_vram; + }); + // If so, create a temp to preserve the addend register's value + if (find_result != stats.jump_tables.end()) { + const RecompPort::JumpTable& cur_jtbl = *find_result; + print_line("gpr jr_addend_{:08X} = {}{}", cur_jtbl.jr_vram, ctx_gpr_prefix(cur_jtbl.addend_reg), cur_jtbl.addend_reg); + } + } print_line("{}{} = ADD32({}{}, {}{})", ctx_gpr_prefix(rd), rd, ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt); break; case InstrId::cpu_daddu: @@ -169,10 +183,10 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F print_line("{}{} = {}{} < {:#X} ? 1 : 0", ctx_gpr_prefix(rt), rt, ctx_gpr_prefix(rs), rs, (int16_t)imm); break; case InstrId::cpu_mult: - print_line("uint64_t result = S64({}{}) * S64({}{}); lo = S32(result >> 0); hi = S32(result >> 32)", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt); + print_line("result = S64({}{}) * S64({}{}); lo = S32(result >> 0); hi = S32(result >> 32)", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt); break; case InstrId::cpu_multu: - print_line("uint64_t result = U64({}{}) * U64({}{}); lo = S32(result >> 0); hi = S32(result >> 32)", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt); + print_line("result = U64({}{}) * U64({}{}); lo = S32(result >> 0); hi = S32(result >> 32)", ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt); break; case InstrId::cpu_div: // Cast to 64-bits before division to prevent artihmetic exception for s32(0x80000000) / -1 @@ -232,13 +246,13 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F print_line("{}{} = MEM_WL({:#X}, {}{})", ctx_gpr_prefix(rt), rt, (int16_t)imm, ctx_gpr_prefix(base), base); break; case InstrId::cpu_lwr: - print_line("{}{} = MEM_WR({:#X}, {}{})", ctx_gpr_prefix(rt), rt, (int16_t)imm, ctx_gpr_prefix(base), base); + print_line("//{}{} = MEM_WR({:#X}, {}{})", ctx_gpr_prefix(rt), rt, (int16_t)imm, ctx_gpr_prefix(base), base); break; case InstrId::cpu_swl: print_line("MEM_WL({:#X}, {}{}) = {}{}", (int16_t)imm, ctx_gpr_prefix(base), base, ctx_gpr_prefix(rt), rt); break; case InstrId::cpu_swr: - print_line("MEM_WR({:#X}, {}{}) = {}{}", (int16_t)imm, ctx_gpr_prefix(base), base, ctx_gpr_prefix(rt), rt); + print_line("//MEM_WR({:#X}, {}{}) = {}{}", (int16_t)imm, ctx_gpr_prefix(base), base, ctx_gpr_prefix(rt), rt); break; // Branches @@ -310,7 +324,6 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F if (rs == (int)rabbitizer::Registers::Cpu::GprO32::GPR_O32_ra) { print_unconditional_branch("return"); } else { - uint32_t instr_vram = instr.getVram(); auto find_result = std::find_if(stats.jump_tables.begin(), stats.jump_tables.end(), [instr_vram](const RecompPort::JumpTable& jtbl) { return jtbl.jr_vram == instr_vram; @@ -322,8 +335,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F bool dummy_needs_link_branch, dummy_is_branch_likely; process_instruction(context, func, stats, skipped_insns, instr_index + 1, instructions, output_file, false, false, link_branch_index, dummy_needs_link_branch, dummy_is_branch_likely); print_indent(); - // TODO this will fail if the register holding the addend is mangled, add logic to emit a temp with the addend into the code - fmt::print(output_file, "switch ({}{} >> 2) {{\n", ctx_gpr_prefix(cur_jtbl.addend_reg), cur_jtbl.addend_reg, cur_jtbl.vram); + fmt::print(output_file, "switch (jr_addend_{:08X} >> 2) {{\n", cur_jtbl.jr_vram); for (size_t entry_index = 0; entry_index < cur_jtbl.entries.size(); entry_index++) { print_indent(); print_line("case {}: goto L_{:08X}; break", entry_index, cur_jtbl.entries[entry_index]); @@ -383,7 +395,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F print_branch("goto L_{:08X}", (uint32_t)instr.getBranchVramGeneric()); break; case InstrId::cpu_break: - print_line("do_break();"); + print_line("do_break({})", instr_vram); break; // Cop1 loads/stores @@ -731,7 +743,7 @@ bool RecompPort::recompile_function(const RecompPort::Context& context, const Re "\n" "void {}(uint8_t* restrict rdram, recomp_context* restrict ctx) {{\n" // these variables shouldn't need to be preserved across function boundaries, so make them local for more efficient output - " uint64_t hi = 0, lo = 0;\n" + " uint64_t hi = 0, lo = 0, result = 0;\n" " int c1cs = 0; \n", // cop1 conditional signal func.name);