From dfc9f6953e8cee7862d9ff285e3b27e7296a79cf Mon Sep 17 00:00:00 2001 From: twinaphex Date: Wed, 19 Feb 2014 23:07:25 +0100 Subject: [PATCH] (GX) Optimize GX_CallDispList --- gx/gx_video.c | 4 +- gx/gx_video_inl.h | 394 ++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 351 insertions(+), 47 deletions(-) diff --git a/gx/gx_video.c b/gx/gx_video.c index 73c8c46fec..5eb47a341a 100644 --- a/gx/gx_video.c +++ b/gx/gx_video.c @@ -934,14 +934,14 @@ static bool gx_frame(void *data, const void *frame, __GX_SetCurrentMtx(__gx, GX_PNMTX0); GX_LoadTexObj(&g_tex.obj, GX_TEXMAP0); - GX_CallDispList(display_list, display_list_size); + __GX_CallDispList(__gx, display_list, display_list_size); GX_DrawDone(); if (gx->rgui_texture_enable) { __GX_SetCurrentMtx(__gx, GX_PNMTX1); GX_LoadTexObj(&menu_tex.obj, GX_TEXMAP0); - GX_CallDispList(display_list, display_list_size); + __GX_CallDispList(__gx, display_list, display_list_size); GX_DrawDone(); } diff --git a/gx/gx_video_inl.h b/gx/gx_video_inl.h index 6fdc79e284..388cacc1a6 100644 --- a/gx/gx_video_inl.h +++ b/gx/gx_video_inl.h @@ -1,11 +1,4 @@ #define STRUCT_REGDEF_SIZE 1440 - -#define GX_LOAD_BP_REG(x) \ - wgPipe->U8 = 0x61; \ - asm volatile ("" ::: "memory" ); \ - wgPipe->U32 = (u32)(x); \ - asm volatile ("" ::: "memory" ) - #define WGPIPE (0xCC008000) #define FIFO_PUTU8(x) *(vu8*)WGPIPE = (u8)(x) @@ -18,6 +11,24 @@ #define XY(x, y) (((y) << 10) | (x)) +#define GX_LOAD_BP_REG(x) \ + FIFO_PUTU8(0x61); \ + FIFO_PUTU32((x)) + +#define GX_LOAD_CP_REG(x, y) \ + FIFO_PUTU8(0x08); \ + FIFO_PUTU8((x)); \ + FIFO_PUTU32((y)) + +#define GX_LOAD_XF_REG(x, y) \ + FIFO_PUTU8(0x10); \ + FIFO_PUTU32(((x)&0xffff)); \ + FIFO_PUTU32((y)) + +#define GX_LOAD_XF_REGS(x, n) \ + FIFO_PUTU8(0x10); \ + FIFO_PUTU32((((((n)&0xffff)-1)<<16)|((x)&0xffff))) + extern u8 __gxregs[]; struct __gx_regdef @@ -98,43 +109,6 @@ struct __gx_texobj u8 tex_flag; } __attribute__((packed)); -#if defined(HW_RVL) -static GXTexRegion* __GXDefTexRegionCallback(GXTexObj *obj,u8 mapid) -{ - struct __gx_regdef *__gx = (struct __gx_regdef*)__gxregs; - u32 fmt; - GXTexRegion *ret = NULL; - - fmt = ((struct __gx_texobj*)obj)->tex_fmt; - if ((fmt >= GX_TF_CI4 && fmt<=GX_TF_CI14) || fmt==GX_TF_CMPR) - ret = &__gx->texRegion[mapid]; - else - ret = &__gx->texRegion[mapid+8]; - - return ret; -} -#else -static GXTexRegion* __GXDefTexRegionCallback(GXTexObj *obj,u8 mapid) -{ - struct __gx_regdef *__gx = (struct __gx_regdef*)__gxregs; - u32 fmt; - u32 idx; - static u32 regionA = 0; - static u32 regionB = 0; - GXTexRegion *ret = NULL; - - fmt = ((struct __gx_texobj*)obj)->tex_fmt; - if(fmt==0x0008 || fmt==0x0009 || fmt==0x000a) { - idx = regionB++; - ret = &__gx->texRegion[(idx&3)+8]; - } else { - idx = regionA++; - ret = &__gx->texRegion[(idx&7)]; - } - return ret; -} -#endif - #define __GX_SetDispCopySrc(__gx, left, top, wd, ht) \ __gx->dispCopyTL = (__gx->dispCopyTL&~0x00ffffff)|XY(left,top); \ __gx->dispCopyTL = (__gx->dispCopyTL&~0xff000000)|(_SHIFTL(0x49,24,8)); \ @@ -224,4 +198,334 @@ static inline void __GX_CopyDisp(struct __gx_regdef *__gx, void *dest,u8 clear) __gx->mtxIdxLo = (__gx->mtxIdxLo&~0x3f)|(mtx&0x3f); \ __gx->dirtyState |= 0x04000000 -#define GX_LoadTexObj(obj,mapid) GX_LoadTexObjPreloaded(obj,(__GXDefTexRegionCallback(obj,mapid)),mapid) +#define __GX_SetVAT(__gx, setvtx) \ + for(s32 i = 0; i < 8;i++) \ + { \ + setvtx = (1<VATTable&setvtx) \ + { \ + GX_LOAD_CP_REG((0x70+(i&7)),__gx->VAT0reg[i]); \ + GX_LOAD_CP_REG((0x80+(i&7)),__gx->VAT1reg[i]); \ + GX_LOAD_CP_REG((0x90+(i&7)),__gx->VAT2reg[i]); \ + } \ + } \ + __gx->VATTable = 0 + +#define __GX_XfVtxSpecs(__gx) \ +{ \ + u32 nrms,texs,cols; \ + cols = 0; \ + if(__gx->vcdLo&0x6000) cols++; \ + if(__gx->vcdLo&0x18000) cols++; \ + nrms = 0; \ + if(__gx->vcdNrms==1) nrms = 1; \ + else if(__gx->vcdNrms==2) nrms = 2; \ + texs = 0; \ + if(__gx->vcdHi & 0x3) texs++; \ + if(__gx->vcdHi & 0xc) texs++; \ + if(__gx->vcdHi&0x30) texs++; \ + if(__gx->vcdHi&0xc0) texs++; \ + if(__gx->vcdHi&0x300) texs++; \ + if(__gx->vcdHi&0xc00) texs++; \ + if(__gx->vcdHi&0x3000) texs++; \ + if(__gx->vcdHi&0xc000) texs++; \ + GX_LOAD_XF_REG(0x1008, ((_SHIFTL(texs,4,4))|(_SHIFTL(nrms,2,2))|(cols&0x3))); \ +} + +#define __GX_SetVCD(__gx) \ + GX_LOAD_CP_REG(0x50,__gx->vcdLo); \ + GX_LOAD_CP_REG(0x60,__gx->vcdHi); \ + __GX_XfVtxSpecs(__gx) + + +#define __GX_SetChanCntrl(__gx) \ +{ \ + u32 i,chan,mask; \ + if(__gx->dirtyState&0x01000000) \ + { \ + GX_LOAD_XF_REG(0x1009,(_SHIFTR(__gx->genMode,4,3))); \ + } \ + i = 0; \ + chan = 0x100e; \ + mask = _SHIFTR(__gx->dirtyState,12,4); \ + while(mask) { \ + if(mask&0x0001) \ + { \ + GX_LOAD_XF_REG(chan,__gx->chnCntrl[i]); \ + } \ + mask >>= 1; \ + chan++; \ + i++; \ + } \ +} + +#define __GX_SetMatrixIndex(__gx, mtx) \ + if(mtx<5) { \ + GX_LOAD_CP_REG(0x30,__gx->mtxIdxLo); \ + GX_LOAD_XF_REG(0x1018,__gx->mtxIdxLo); \ + } else { \ + GX_LOAD_CP_REG(0x40,__gx->mtxIdxHi); \ + GX_LOAD_XF_REG(0x1019,__gx->mtxIdxHi); \ + } + +#ifdef HW_DOL +static inline void __GX_UpdateBPMask(struct __gx_regdef *__gx) +{ + u32 i; + u32 nbmp,nres; + u8 ntexmap; + + nbmp = _SHIFTR(_gx[0xac],16,3); + + nres = 0; + for(i=0;idirtyState&0x0100) \ + { \ + GX_LOAD_XF_REG(0x100a,__gx->chnAmbColor[0]); \ + } \ + if(__gx->dirtyState&0x0200) \ + { \ + GX_LOAD_XF_REG(0x100b,__gx->chnAmbColor[1]); \ + } \ + if(__gx->dirtyState&0x0400) \ + { \ + GX_LOAD_XF_REG(0x100c,__gx->chnMatColor[0]); \ + } \ + if(__gx->dirtyState&0x0800) \ + { \ + GX_LOAD_XF_REG(0x100d,__gx->chnMatColor[1]); \ + } + +static inline void __GX_SetTexCoordGen(struct __gx_regdef *__gx) +{ + u32 i,mask; + u32 texcoord; + + if(__gx->dirtyState&0x02000000) + { + GX_LOAD_XF_REG(0x103f,(__gx->genMode&0xf)); + } + + i = 0; + texcoord = 0x1040; + mask = _SHIFTR(__gx->dirtyState,16,8); + while(mask) { + if(mask&0x0001) { + GX_LOAD_XF_REG(texcoord,__gx->texCoordGen[i]); + GX_LOAD_XF_REG((texcoord+0x10),__gx->texCoordGen2[i]); + } + mask >>= 1; + texcoord++; + i++; + } +} + +static void __SetSURegs(struct __gx_regdef *__gx, u8 texmap,u8 texcoord) +{ + u32 reg; + u16 wd,ht; + u8 wrap_s,wrap_t; + + wd = __gx->texMapSize[texmap]&0x3ff; + ht = _SHIFTR(__gx->texMapSize[texmap],10,10); + wrap_s = __gx->texMapWrap[texmap]&3; + wrap_t = _SHIFTR(__gx->texMapWrap[texmap],2,2); + + reg = (texcoord&0x7); + __gx->suSsize[reg] = (__gx->suSsize[reg]&~0x0000ffff)|wd; + __gx->suTsize[reg] = (__gx->suTsize[reg]&~0x0000ffff)|ht; + __gx->suSsize[reg] = (__gx->suSsize[reg]&~0x00010000)|(_SHIFTL(wrap_s,16,1)); + __gx->suTsize[reg] = (__gx->suTsize[reg]&~0x00010000)|(_SHIFTL(wrap_t,16,1)); + + GX_LOAD_BP_REG(__gx->suSsize[reg]); + GX_LOAD_BP_REG(__gx->suTsize[reg]); +} + +static inline void __GX_SetSUTexRegs(struct __gx_regdef *__gx) +{ + u32 i; + u32 indtev,dirtev; + u8 texcoord,texmap; + u32 tevreg,tevm,texcm; + + dirtev = (_SHIFTR(__gx->genMode,10,4))+1; + indtev = _SHIFTR(__gx->genMode,16,3); + + //indirect texture order + for(i=0;itevRasOrder[2]&7; + texcoord = _SHIFTR(__gx->tevRasOrder[2],3,3); + break; + case GX_INDTEXSTAGE1: + texmap = _SHIFTR(__gx->tevRasOrder[2],6,3); + texcoord = _SHIFTR(__gx->tevRasOrder[2],9,3); + break; + case GX_INDTEXSTAGE2: + texmap = _SHIFTR(__gx->tevRasOrder[2],12,3); + texcoord = _SHIFTR(__gx->tevRasOrder[2],15,3); + break; + case GX_INDTEXSTAGE3: + texmap = _SHIFTR(__gx->tevRasOrder[2],18,3); + texcoord = _SHIFTR(__gx->tevRasOrder[2],21,3); + break; + default: + texmap = 0; + texcoord = 0; + break; + } + + texcm = _SHIFTL(1,texcoord,1); + if(!(__gx->texCoordManually&texcm)) + __SetSURegs(__gx, texmap,texcoord); + } + + //direct texture order + for(i=0;itevTexMap[i]&0xff); + + if(i&1) texcoord = _SHIFTR(__gx->tevRasOrder[tevreg],15,3); + else texcoord = _SHIFTR(__gx->tevRasOrder[tevreg],3,3); + + tevm = _SHIFTL(1,i,1); + texcm = _SHIFTL(1,texcoord,1); + if(texmap!=0xff && (__gx->tevTexCoordEnable&tevm) && !(__gx->texCoordManually&texcm)) { + __SetSURegs(__gx, texmap,texcoord); + } + } +} + +#define __GX_SetGenMode(__gx) \ + GX_LOAD_BP_REG(__gx->genMode); \ + __gx->xfFlush = 0 + +static void __GX_SetDirtyState(struct __gx_regdef *__gx) +{ + if(__gx->dirtyState&0x0001) + { + __GX_SetSUTexRegs(__gx); + } +#ifdef HW_DOL + if(__gx->dirtyState&0x0002) + { + __GX_UpdateBPMask(__gx); + } +#endif + if(__gx->dirtyState&0x0004) + { + __GX_SetGenMode(__gx); + } + if(__gx->dirtyState&0x0008) + { + __GX_SetVCD(__gx); + } + if(__gx->dirtyState&0x0010) + { + u8 setvtx = 0; + __GX_SetVAT(__gx, setvtx); + } + if(__gx->dirtyState&~0xff) + { + if(__gx->dirtyState&0x0f00) + { + __GX_SetChanColor(__gx); + } + if(__gx->dirtyState&0x0100f000) + { + __GX_SetChanCntrl(__gx); + } + if(__gx->dirtyState&0x02ff0000) + { + __GX_SetTexCoordGen(__gx); + } + if(__gx->dirtyState&0x04000000) + { + __GX_SetMatrixIndex(__gx, 0); + __GX_SetMatrixIndex(__gx, 5); + } + } + __gx->dirtyState = 0; +} + +static void __GX_SendFlushPrim(struct __gx_regdef *__gx) +{ + u32 tmp,tmp2,cnt; + + tmp = (__gx->xfFlush*__gx->xfFlushExp); + + FIFO_PUTU8(0x98); + FIFO_PUTU16(__gx->xfFlush); + + tmp2 = (tmp+3)/4; + + if(tmp > 0) + { + cnt = tmp2/8; + while(cnt) + { + FIFO_PUTU32(0); + FIFO_PUTU32(0); + FIFO_PUTU32(0); + FIFO_PUTU32(0); + FIFO_PUTU32(0); + FIFO_PUTU32(0); + FIFO_PUTU32(0); + FIFO_PUTU32(0); + cnt--; + } + tmp2 &= 0x0007; + if(tmp2) + { + while(tmp2) + { + FIFO_PUTU32(0); + tmp2--; + } + } + } + __gx->xfFlush = 1; +} + +#define __GX_CallDispList(__gx, list, nbytes) \ + if(__gx->dirtyState) \ + { \ + __GX_SetDirtyState(__gx); \ + } \ + if(!__gx->vcdClear) \ + { \ + __GX_SendFlushPrim(__gx); \ + } \ + FIFO_PUTU8(0x40); /*call displaylist */ \ + FIFO_PUTU32(MEM_VIRTUAL_TO_PHYSICAL(list)); \ + FIFO_PUTU32(nbytes)