A tiny bit more JIT WIP work.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@1847 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
magumagu9 2009-01-11 01:26:58 +00:00
parent 4acda0096b
commit 018cb993e3
2 changed files with 62 additions and 30 deletions

View File

@ -90,16 +90,15 @@ integer code are more aggresively combining blocks and dead condition
register elimination, which should be very helpful for small blocks. register elimination, which should be very helpful for small blocks.
TODO (in no particular order): TODO (in no particular order):
Floating-point JIT (both paired and unpaired) JIT for misc remaining FP instructions
(very large win for FP code, no effect for integer code) JIT for bcctrx
Inter-block dead condition register elimination (Likely significant win Misc optimizations for FP instructions
combined with optimized conditions) Inter-block dead register elimination; this seems likely to have large
Optimize conditions for conditional branches. performance benefits, although I'm not completely sure.
General dead register elimination. Inter-block inlining; also likely to have large performance benefits.
Inter-block inlining. The tricky parts are deciding which blocks to inline, and that the
Track down issues with new JIT + dual-core mode (I think I'm going to IR can't really deal with branches whose destination is in the
need help with this one; I'm not very familiar with the the middle of a generated block.
dual-core code.)
Specialized slw/srw/sraw; I think there are some tricks that could Specialized slw/srw/sraw; I think there are some tricks that could
have a non-trivial effect, and there are significantly shorter have a non-trivial effect, and there are significantly shorter
implementations for 64-bit involving abusing 64-bit shifts. implementations for 64-bit involving abusing 64-bit shifts.
@ -111,15 +110,19 @@ Scheduling to reduce register pressure: PowerPC compilers like to push
instruction reordering. instruction reordering.
Common subexpression elimination Common subexpression elimination
Optimize load/store of sum using complex addressing (partially implemented) Optimize load/store of sum using complex addressing (partially implemented)
Implement idle-skipping Loop optimizations (loop-carried registers, LICM)
Loop optimizations (loop-carried registers, LICM); not sure how much Fold register loads into arithmetic operations
this will help on top of dead register elimination
Fold loads (both register and memory) into arithmetic operations
Code refactoring/cleanup Code refactoring/cleanup
Investigate performance of the JIT itself; this doesn't affect Investigate performance of the JIT itself; this doesn't affect
framerates significantly, but it does take a visible amount framerates significantly, but it does take a visible amount
of time for a complicated piece of code like a video decoder of time for a complicated piece of code like a video decoder
to compile. to compile.
Fix profiled loads/stores to work safely. On 32-bit, one solution is to
use a spare segment register, and expand the backpatch solution
to work in all the relevant situations. On 64-bit, the existing
fast memory solution should basically work. An alternative
would be to figure out a heuristic for what loads actually
vary their "type", and special-case them.
*/ */
@ -464,6 +467,12 @@ InstLoc IRBuilder::FoldBranchCond(InstLoc Op1, InstLoc Op2) {
if (branchValue == 2) if (branchValue == 2)
return FoldBranchCond(EmitICmpEq(getOp1(getOp1(Op1)), return FoldBranchCond(EmitICmpEq(getOp1(getOp1(Op1)),
getOp2(getOp1(Op1))), Op2); getOp2(getOp1(Op1))), Op2);
if (branchValue == 4)
return FoldBranchCond(EmitICmpSgt(getOp1(getOp1(Op1)),
getOp2(getOp1(Op1))), Op2);
if (branchValue == 8)
return FoldBranchCond(EmitICmpSlt(getOp1(getOp1(Op1)),
getOp2(getOp1(Op1))), Op2);
} }
if (getOpcode(*Op1) == Xor && if (getOpcode(*Op1) == Xor &&
isImm(*getOp2(Op1))) { isImm(*getOp2(Op1))) {
@ -475,10 +484,15 @@ InstLoc IRBuilder::FoldBranchCond(InstLoc Op1, InstLoc Op2) {
unsigned innerBranchValue = unsigned innerBranchValue =
GetImmValue(getOp2(XOp1)); GetImmValue(getOp2(XOp1));
if (branchValue == innerBranchValue) { if (branchValue == innerBranchValue) {
if (branchValue == 4) { if (branchValue == 2)
return FoldBranchCond(EmitICmpNe(getOp1(getOp1(XOp1)),
getOp2(getOp1(XOp1))), Op2);
if (branchValue == 4)
return FoldBranchCond(EmitICmpSle(getOp1(getOp1(XOp1)), return FoldBranchCond(EmitICmpSle(getOp1(getOp1(XOp1)),
getOp2(getOp1(XOp1))), Op2); getOp2(getOp1(XOp1))), Op2);
} if (branchValue == 8)
return FoldBranchCond(EmitICmpSge(getOp1(getOp1(XOp1)),
getOp2(getOp1(XOp1))), Op2);
} }
} }
} }
@ -493,6 +507,9 @@ InstLoc IRBuilder::FoldICmp(unsigned Opcode, InstLoc Op1, InstLoc Op2) {
case ICmpEq: case ICmpEq:
result = GetImmValue(Op1) == GetImmValue(Op2); result = GetImmValue(Op1) == GetImmValue(Op2);
break; break;
case ICmpNe:
result = GetImmValue(Op1) != GetImmValue(Op2);
break;
case ICmpUgt: case ICmpUgt:
result = GetImmValue(Op1) > GetImmValue(Op2); result = GetImmValue(Op1) > GetImmValue(Op2);
break; break;
@ -1285,9 +1302,7 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) {
regMarkUse(RI, I, getOp1(I), 1); regMarkUse(RI, I, getOp1(I), 1);
break; break;
case BranchCond: { case BranchCond: {
unsigned CondOpcode = getOpcode(*getOp1(I)); if (isICmp(*getOp1(I)) &&
if ((CondOpcode == ICmpEq ||
CondOpcode == ICmpSle) &&
isImm(*getOp2(getOp1(I)))) { isImm(*getOp2(getOp1(I)))) {
regMarkUse(RI, I, getOp1(getOp1(I)), 1); regMarkUse(RI, I, getOp1(getOp1(I)), 1);
} else { } else {
@ -1904,20 +1919,24 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) {
case BlockEnd: case BlockEnd:
break; break;
case BranchCond: { case BranchCond: {
if (getOpcode(*getOp1(I)) == ICmpEq && if (isICmp(*getOp1(I)) &&
isImm(*getOp2(getOp1(I)))) { isImm(*getOp2(getOp1(I)))) {
Jit->CMP(32, regLocForInst(RI, getOp1(getOp1(I))), Jit->CMP(32, regLocForInst(RI, getOp1(getOp1(I))),
Imm32(RI.Build->GetImmValue(getOp2(getOp1(I))))); Imm32(RI.Build->GetImmValue(getOp2(getOp1(I)))));
FixupBranch cont = Jit->J_CC(CC_NZ); CCFlags flag;
regWriteExit(RI, getOp2(I)); switch (getOpcode(*getOp1(I))) {
Jit->SetJumpTarget(cont); case ICmpEq: flag = CC_NE; break;
if (RI.IInfo[I - RI.FirstI] & 4) case ICmpNe: flag = CC_E; break;
regClearInst(RI, getOp1(getOp1(I))); case ICmpUgt: flag = CC_BE; break;
} else if (getOpcode(*getOp1(I)) == ICmpSle && case ICmpUlt: flag = CC_AE; break;
isImm(*getOp2(getOp1(I)))) { case ICmpUge: flag = CC_L; break;
Jit->CMP(32, regLocForInst(RI, getOp1(getOp1(I))), case ICmpUle: flag = CC_A; break;
Imm32(RI.Build->GetImmValue(getOp2(getOp1(I))))); case ICmpSgt: flag = CC_LE; break;
FixupBranch cont = Jit->J_CC(CC_G); case ICmpSlt: flag = CC_GE; break;
case ICmpSge: flag = CC_L; break;
case ICmpSle: flag = CC_G; break;
}
FixupBranch cont = Jit->J_CC(flag);
regWriteExit(RI, getOp2(I)); regWriteExit(RI, getOp2(I));
Jit->SetJumpTarget(cont); Jit->SetJumpTarget(cont);
if (RI.IInfo[I - RI.FirstI] & 4) if (RI.IInfo[I - RI.FirstI] & 4)

View File

@ -217,6 +217,10 @@ namespace IREmitter {
return getOpcode(i) >= CInt16 && getOpcode(i) <= CInt32; return getOpcode(i) >= CInt16 && getOpcode(i) <= CInt32;
} }
unsigned inline isICmp(Inst i) {
return getOpcode(i) >= ICmpEq && getOpcode(i) <= ICmpSle;
}
unsigned inline isFResult(Inst i) { unsigned inline isFResult(Inst i) {
return getOpcode(i) > FResult_Start && return getOpcode(i) > FResult_Start &&
getOpcode(i) < FResult_End; getOpcode(i) < FResult_End;
@ -329,12 +333,21 @@ namespace IREmitter {
InstLoc EmitICmpEq(InstLoc op1, InstLoc op2) { InstLoc EmitICmpEq(InstLoc op1, InstLoc op2) {
return FoldBiOp(ICmpEq, op1, op2); return FoldBiOp(ICmpEq, op1, op2);
} }
InstLoc EmitICmpNe(InstLoc op1, InstLoc op2) {
return FoldBiOp(ICmpNe, op1, op2);
}
InstLoc EmitICmpUgt(InstLoc op1, InstLoc op2) { InstLoc EmitICmpUgt(InstLoc op1, InstLoc op2) {
return FoldBiOp(ICmpUgt, op1, op2); return FoldBiOp(ICmpUgt, op1, op2);
} }
InstLoc EmitICmpSgt(InstLoc op1, InstLoc op2) { InstLoc EmitICmpSgt(InstLoc op1, InstLoc op2) {
return FoldBiOp(ICmpSgt, op1, op2); return FoldBiOp(ICmpSgt, op1, op2);
} }
InstLoc EmitICmpSlt(InstLoc op1, InstLoc op2) {
return FoldBiOp(ICmpSlt, op1, op2);
}
InstLoc EmitICmpSge(InstLoc op1, InstLoc op2) {
return FoldBiOp(ICmpSge, op1, op2);
}
InstLoc EmitICmpSle(InstLoc op1, InstLoc op2) { InstLoc EmitICmpSle(InstLoc op1, InstLoc op2) {
return FoldBiOp(ICmpSle, op1, op2); return FoldBiOp(ICmpSle, op1, op2);
} }