mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-03-29 01:20:28 +00:00
A tiny bit more JIT WIP work.
git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@1847 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
parent
4acda0096b
commit
018cb993e3
@ -90,16 +90,15 @@ integer code are more aggresively combining blocks and dead condition
|
|||||||
register elimination, which should be very helpful for small blocks.
|
register elimination, which should be very helpful for small blocks.
|
||||||
|
|
||||||
TODO (in no particular order):
|
TODO (in no particular order):
|
||||||
Floating-point JIT (both paired and unpaired)
|
JIT for misc remaining FP instructions
|
||||||
(very large win for FP code, no effect for integer code)
|
JIT for bcctrx
|
||||||
Inter-block dead condition register elimination (Likely significant win
|
Misc optimizations for FP instructions
|
||||||
combined with optimized conditions)
|
Inter-block dead register elimination; this seems likely to have large
|
||||||
Optimize conditions for conditional branches.
|
performance benefits, although I'm not completely sure.
|
||||||
General dead register elimination.
|
Inter-block inlining; also likely to have large performance benefits.
|
||||||
Inter-block inlining.
|
The tricky parts are deciding which blocks to inline, and that the
|
||||||
Track down issues with new JIT + dual-core mode (I think I'm going to
|
IR can't really deal with branches whose destination is in the
|
||||||
need help with this one; I'm not very familiar with the
|
the middle of a generated block.
|
||||||
dual-core code.)
|
|
||||||
Specialized slw/srw/sraw; I think there are some tricks that could
|
Specialized slw/srw/sraw; I think there are some tricks that could
|
||||||
have a non-trivial effect, and there are significantly shorter
|
have a non-trivial effect, and there are significantly shorter
|
||||||
implementations for 64-bit involving abusing 64-bit shifts.
|
implementations for 64-bit involving abusing 64-bit shifts.
|
||||||
@ -111,15 +110,19 @@ Scheduling to reduce register pressure: PowerPC compilers like to push
|
|||||||
instruction reordering.
|
instruction reordering.
|
||||||
Common subexpression elimination
|
Common subexpression elimination
|
||||||
Optimize load/store of sum using complex addressing (partially implemented)
|
Optimize load/store of sum using complex addressing (partially implemented)
|
||||||
Implement idle-skipping
|
Loop optimizations (loop-carried registers, LICM)
|
||||||
Loop optimizations (loop-carried registers, LICM); not sure how much
|
Fold register loads into arithmetic operations
|
||||||
this will help on top of dead register elimination
|
|
||||||
Fold loads (both register and memory) into arithmetic operations
|
|
||||||
Code refactoring/cleanup
|
Code refactoring/cleanup
|
||||||
Investigate performance of the JIT itself; this doesn't affect
|
Investigate performance of the JIT itself; this doesn't affect
|
||||||
framerates significantly, but it does take a visible amount
|
framerates significantly, but it does take a visible amount
|
||||||
of time for a complicated piece of code like a video decoder
|
of time for a complicated piece of code like a video decoder
|
||||||
to compile.
|
to compile.
|
||||||
|
Fix profiled loads/stores to work safely. On 32-bit, one solution is to
|
||||||
|
use a spare segment register, and expand the backpatch solution
|
||||||
|
to work in all the relevant situations. On 64-bit, the existing
|
||||||
|
fast memory solution should basically work. An alternative
|
||||||
|
would be to figure out a heuristic for what loads actually
|
||||||
|
vary their "type", and special-case them.
|
||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@ -464,6 +467,12 @@ InstLoc IRBuilder::FoldBranchCond(InstLoc Op1, InstLoc Op2) {
|
|||||||
if (branchValue == 2)
|
if (branchValue == 2)
|
||||||
return FoldBranchCond(EmitICmpEq(getOp1(getOp1(Op1)),
|
return FoldBranchCond(EmitICmpEq(getOp1(getOp1(Op1)),
|
||||||
getOp2(getOp1(Op1))), Op2);
|
getOp2(getOp1(Op1))), Op2);
|
||||||
|
if (branchValue == 4)
|
||||||
|
return FoldBranchCond(EmitICmpSgt(getOp1(getOp1(Op1)),
|
||||||
|
getOp2(getOp1(Op1))), Op2);
|
||||||
|
if (branchValue == 8)
|
||||||
|
return FoldBranchCond(EmitICmpSlt(getOp1(getOp1(Op1)),
|
||||||
|
getOp2(getOp1(Op1))), Op2);
|
||||||
}
|
}
|
||||||
if (getOpcode(*Op1) == Xor &&
|
if (getOpcode(*Op1) == Xor &&
|
||||||
isImm(*getOp2(Op1))) {
|
isImm(*getOp2(Op1))) {
|
||||||
@ -475,10 +484,15 @@ InstLoc IRBuilder::FoldBranchCond(InstLoc Op1, InstLoc Op2) {
|
|||||||
unsigned innerBranchValue =
|
unsigned innerBranchValue =
|
||||||
GetImmValue(getOp2(XOp1));
|
GetImmValue(getOp2(XOp1));
|
||||||
if (branchValue == innerBranchValue) {
|
if (branchValue == innerBranchValue) {
|
||||||
if (branchValue == 4) {
|
if (branchValue == 2)
|
||||||
|
return FoldBranchCond(EmitICmpNe(getOp1(getOp1(XOp1)),
|
||||||
|
getOp2(getOp1(XOp1))), Op2);
|
||||||
|
if (branchValue == 4)
|
||||||
return FoldBranchCond(EmitICmpSle(getOp1(getOp1(XOp1)),
|
return FoldBranchCond(EmitICmpSle(getOp1(getOp1(XOp1)),
|
||||||
getOp2(getOp1(XOp1))), Op2);
|
getOp2(getOp1(XOp1))), Op2);
|
||||||
}
|
if (branchValue == 8)
|
||||||
|
return FoldBranchCond(EmitICmpSge(getOp1(getOp1(XOp1)),
|
||||||
|
getOp2(getOp1(XOp1))), Op2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -493,6 +507,9 @@ InstLoc IRBuilder::FoldICmp(unsigned Opcode, InstLoc Op1, InstLoc Op2) {
|
|||||||
case ICmpEq:
|
case ICmpEq:
|
||||||
result = GetImmValue(Op1) == GetImmValue(Op2);
|
result = GetImmValue(Op1) == GetImmValue(Op2);
|
||||||
break;
|
break;
|
||||||
|
case ICmpNe:
|
||||||
|
result = GetImmValue(Op1) != GetImmValue(Op2);
|
||||||
|
break;
|
||||||
case ICmpUgt:
|
case ICmpUgt:
|
||||||
result = GetImmValue(Op1) > GetImmValue(Op2);
|
result = GetImmValue(Op1) > GetImmValue(Op2);
|
||||||
break;
|
break;
|
||||||
@ -1285,9 +1302,7 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) {
|
|||||||
regMarkUse(RI, I, getOp1(I), 1);
|
regMarkUse(RI, I, getOp1(I), 1);
|
||||||
break;
|
break;
|
||||||
case BranchCond: {
|
case BranchCond: {
|
||||||
unsigned CondOpcode = getOpcode(*getOp1(I));
|
if (isICmp(*getOp1(I)) &&
|
||||||
if ((CondOpcode == ICmpEq ||
|
|
||||||
CondOpcode == ICmpSle) &&
|
|
||||||
isImm(*getOp2(getOp1(I)))) {
|
isImm(*getOp2(getOp1(I)))) {
|
||||||
regMarkUse(RI, I, getOp1(getOp1(I)), 1);
|
regMarkUse(RI, I, getOp1(getOp1(I)), 1);
|
||||||
} else {
|
} else {
|
||||||
@ -1904,20 +1919,24 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) {
|
|||||||
case BlockEnd:
|
case BlockEnd:
|
||||||
break;
|
break;
|
||||||
case BranchCond: {
|
case BranchCond: {
|
||||||
if (getOpcode(*getOp1(I)) == ICmpEq &&
|
if (isICmp(*getOp1(I)) &&
|
||||||
isImm(*getOp2(getOp1(I)))) {
|
isImm(*getOp2(getOp1(I)))) {
|
||||||
Jit->CMP(32, regLocForInst(RI, getOp1(getOp1(I))),
|
Jit->CMP(32, regLocForInst(RI, getOp1(getOp1(I))),
|
||||||
Imm32(RI.Build->GetImmValue(getOp2(getOp1(I)))));
|
Imm32(RI.Build->GetImmValue(getOp2(getOp1(I)))));
|
||||||
FixupBranch cont = Jit->J_CC(CC_NZ);
|
CCFlags flag;
|
||||||
regWriteExit(RI, getOp2(I));
|
switch (getOpcode(*getOp1(I))) {
|
||||||
Jit->SetJumpTarget(cont);
|
case ICmpEq: flag = CC_NE; break;
|
||||||
if (RI.IInfo[I - RI.FirstI] & 4)
|
case ICmpNe: flag = CC_E; break;
|
||||||
regClearInst(RI, getOp1(getOp1(I)));
|
case ICmpUgt: flag = CC_BE; break;
|
||||||
} else if (getOpcode(*getOp1(I)) == ICmpSle &&
|
case ICmpUlt: flag = CC_AE; break;
|
||||||
isImm(*getOp2(getOp1(I)))) {
|
case ICmpUge: flag = CC_L; break;
|
||||||
Jit->CMP(32, regLocForInst(RI, getOp1(getOp1(I))),
|
case ICmpUle: flag = CC_A; break;
|
||||||
Imm32(RI.Build->GetImmValue(getOp2(getOp1(I)))));
|
case ICmpSgt: flag = CC_LE; break;
|
||||||
FixupBranch cont = Jit->J_CC(CC_G);
|
case ICmpSlt: flag = CC_GE; break;
|
||||||
|
case ICmpSge: flag = CC_L; break;
|
||||||
|
case ICmpSle: flag = CC_G; break;
|
||||||
|
}
|
||||||
|
FixupBranch cont = Jit->J_CC(flag);
|
||||||
regWriteExit(RI, getOp2(I));
|
regWriteExit(RI, getOp2(I));
|
||||||
Jit->SetJumpTarget(cont);
|
Jit->SetJumpTarget(cont);
|
||||||
if (RI.IInfo[I - RI.FirstI] & 4)
|
if (RI.IInfo[I - RI.FirstI] & 4)
|
||||||
|
@ -217,6 +217,10 @@ namespace IREmitter {
|
|||||||
return getOpcode(i) >= CInt16 && getOpcode(i) <= CInt32;
|
return getOpcode(i) >= CInt16 && getOpcode(i) <= CInt32;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned inline isICmp(Inst i) {
|
||||||
|
return getOpcode(i) >= ICmpEq && getOpcode(i) <= ICmpSle;
|
||||||
|
}
|
||||||
|
|
||||||
unsigned inline isFResult(Inst i) {
|
unsigned inline isFResult(Inst i) {
|
||||||
return getOpcode(i) > FResult_Start &&
|
return getOpcode(i) > FResult_Start &&
|
||||||
getOpcode(i) < FResult_End;
|
getOpcode(i) < FResult_End;
|
||||||
@ -329,12 +333,21 @@ namespace IREmitter {
|
|||||||
InstLoc EmitICmpEq(InstLoc op1, InstLoc op2) {
|
InstLoc EmitICmpEq(InstLoc op1, InstLoc op2) {
|
||||||
return FoldBiOp(ICmpEq, op1, op2);
|
return FoldBiOp(ICmpEq, op1, op2);
|
||||||
}
|
}
|
||||||
|
InstLoc EmitICmpNe(InstLoc op1, InstLoc op2) {
|
||||||
|
return FoldBiOp(ICmpNe, op1, op2);
|
||||||
|
}
|
||||||
InstLoc EmitICmpUgt(InstLoc op1, InstLoc op2) {
|
InstLoc EmitICmpUgt(InstLoc op1, InstLoc op2) {
|
||||||
return FoldBiOp(ICmpUgt, op1, op2);
|
return FoldBiOp(ICmpUgt, op1, op2);
|
||||||
}
|
}
|
||||||
InstLoc EmitICmpSgt(InstLoc op1, InstLoc op2) {
|
InstLoc EmitICmpSgt(InstLoc op1, InstLoc op2) {
|
||||||
return FoldBiOp(ICmpSgt, op1, op2);
|
return FoldBiOp(ICmpSgt, op1, op2);
|
||||||
}
|
}
|
||||||
|
InstLoc EmitICmpSlt(InstLoc op1, InstLoc op2) {
|
||||||
|
return FoldBiOp(ICmpSlt, op1, op2);
|
||||||
|
}
|
||||||
|
InstLoc EmitICmpSge(InstLoc op1, InstLoc op2) {
|
||||||
|
return FoldBiOp(ICmpSge, op1, op2);
|
||||||
|
}
|
||||||
InstLoc EmitICmpSle(InstLoc op1, InstLoc op2) {
|
InstLoc EmitICmpSle(InstLoc op1, InstLoc op2) {
|
||||||
return FoldBiOp(ICmpSle, op1, op2);
|
return FoldBiOp(ICmpSle, op1, op2);
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user