From b19859450ef9a3d16d9aa1524fc37c3bd50f8d7b Mon Sep 17 00:00:00 2001 From: hrydgard Date: Sun, 23 Nov 2008 23:54:52 +0000 Subject: [PATCH] Vertexloader cleanup/fixing: Don't use floating point copies unless necessary, don't allow scale factors for floating point texture coordinates. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@1282 8ced0084-cf51-0410-be5f-012b33b47a6e --- .../Plugin_VideoOGL/Src/VertexLoader_Color.h | 12 ++--- .../Src/VertexLoader_Normal.cpp | 48 ++++++++--------- .../Src/VertexLoader_Position.h | 53 +++++++++++++++++-- .../Src/VertexLoader_TextCoord.h | 30 ++++------- 4 files changed, 91 insertions(+), 52 deletions(-) diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader_Color.h b/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader_Color.h index 76c298fb0d..c5bfc0dec2 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader_Color.h +++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader_Color.h @@ -37,18 +37,18 @@ inline void _SetCol(u32 val) void _SetCol4444(u16 val) { u32 col = lut4to8[(val>>0)&0xF]<>12)&0xF] <>8)&0xF] <>4)&0xF] <>12)&0xF] <>8)&0xF] <>4)&0xF] <>18)&0x3F] << RSHIFT; - col |= lut6to8[(val>>12)&0x3F] << GSHIFT; - col |= lut6to8[(val>>6)&0x3F] << BSHIFT; - col |= lut6to8[(val>>0)&0x3F] << ASHIFT; + col |= lut6to8[(val>>12)&0x3F] << GSHIFT; + col |= lut6to8[(val>>6)&0x3F] << BSHIFT; + col |= lut6to8[(val>>0)&0x3F] << ASHIFT; _SetCol(col); } diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader_Normal.cpp b/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader_Normal.cpp index 42e5969155..ace593ba8f 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader_Normal.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader_Normal.cpp @@ -203,9 +203,9 @@ void LOADERDECL VertexLoader_Normal::Normal_DirectShort() void LOADERDECL VertexLoader_Normal::Normal_DirectFloat() { - ((float*)VertexManager::s_pCurBufferPointer)[0] = DataReadF32(); - ((float*)VertexManager::s_pCurBufferPointer)[1] = DataReadF32(); - ((float*)VertexManager::s_pCurBufferPointer)[2] = DataReadF32(); + ((u32*)VertexManager::s_pCurBufferPointer)[0] = DataReadU32(); + ((u32*)VertexManager::s_pCurBufferPointer)[1] = DataReadU32(); + ((u32*)VertexManager::s_pCurBufferPointer)[2] = DataReadU32(); VertexManager::s_pCurBufferPointer += 12; LOG_NORMF() } @@ -238,9 +238,9 @@ void LOADERDECL VertexLoader_Normal::Normal_DirectFloat3() { for (int i = 0; i < 3; i++) { - ((float*)VertexManager::s_pCurBufferPointer)[0] = DataReadF32(); - ((float*)VertexManager::s_pCurBufferPointer)[1] = DataReadF32(); - ((float*)VertexManager::s_pCurBufferPointer)[2] = DataReadF32(); + ((u32*)VertexManager::s_pCurBufferPointer)[0] = DataReadU32(); + ((u32*)VertexManager::s_pCurBufferPointer)[1] = DataReadU32(); + ((u32*)VertexManager::s_pCurBufferPointer)[2] = DataReadU32(); VertexManager::s_pCurBufferPointer += 12; LOG_NORMF(); } @@ -279,9 +279,9 @@ void LOADERDECL VertexLoader_Normal::Normal_Index8_Float() { u8 Index = DataReadU8(); u32 iAddress = arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]); - ((float*)VertexManager::s_pCurBufferPointer)[0] = Memory_Read_Float(iAddress); - ((float*)VertexManager::s_pCurBufferPointer)[1] = Memory_Read_Float(iAddress+4); - ((float*)VertexManager::s_pCurBufferPointer)[2] = Memory_Read_Float(iAddress+8); + ((u32*)VertexManager::s_pCurBufferPointer)[0] = Memory_Read_U32(iAddress); + ((u32*)VertexManager::s_pCurBufferPointer)[1] = Memory_Read_U32(iAddress+4); + ((u32*)VertexManager::s_pCurBufferPointer)[2] = Memory_Read_U32(iAddress+8); VertexManager::s_pCurBufferPointer += 12; LOG_NORMF(); } @@ -320,9 +320,9 @@ void LOADERDECL VertexLoader_Normal::Normal_Index8_Float3_Indices1() for (int i = 0; i < 3; i++) { u32 iAddress = arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 4*3*i; - ((float*)VertexManager::s_pCurBufferPointer)[0] = Memory_Read_Float(iAddress); - ((float*)VertexManager::s_pCurBufferPointer)[1] = Memory_Read_Float(iAddress+4); - ((float*)VertexManager::s_pCurBufferPointer)[2] = Memory_Read_Float(iAddress+8); + ((u32*)VertexManager::s_pCurBufferPointer)[0] = Memory_Read_U32(iAddress); + ((u32*)VertexManager::s_pCurBufferPointer)[1] = Memory_Read_U32(iAddress+4); + ((u32*)VertexManager::s_pCurBufferPointer)[2] = Memory_Read_U32(iAddress+8); VertexManager::s_pCurBufferPointer += 12; LOG_NORMF(); } @@ -362,9 +362,9 @@ void LOADERDECL VertexLoader_Normal::Normal_Index8_Float3_Indices3() { u8 Index = DataReadU8(); u32 iAddress = arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 4*3*i; - ((float*)VertexManager::s_pCurBufferPointer)[0] = Memory_Read_Float(iAddress); - ((float*)VertexManager::s_pCurBufferPointer)[1] = Memory_Read_Float(iAddress+4); - ((float*)VertexManager::s_pCurBufferPointer)[2] = Memory_Read_Float(iAddress+8); + ((u32*)VertexManager::s_pCurBufferPointer)[0] = Memory_Read_U32(iAddress); + ((u32*)VertexManager::s_pCurBufferPointer)[1] = Memory_Read_U32(iAddress+4); + ((u32*)VertexManager::s_pCurBufferPointer)[2] = Memory_Read_U32(iAddress+8); VertexManager::s_pCurBufferPointer += 12; LOG_NORMF(); } @@ -400,9 +400,9 @@ void LOADERDECL VertexLoader_Normal::Normal_Index16_Float() { u16 Index = DataReadU16(); u32 iAddress = arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]); - ((float*)VertexManager::s_pCurBufferPointer)[0] = Memory_Read_Float(iAddress); - ((float*)VertexManager::s_pCurBufferPointer)[1] = Memory_Read_Float(iAddress+4); - ((float*)VertexManager::s_pCurBufferPointer)[2] = Memory_Read_Float(iAddress+8); + ((u32*)VertexManager::s_pCurBufferPointer)[0] = Memory_Read_U32(iAddress); + ((u32*)VertexManager::s_pCurBufferPointer)[1] = Memory_Read_U32(iAddress+4); + ((u32*)VertexManager::s_pCurBufferPointer)[2] = Memory_Read_U32(iAddress+8); VertexManager::s_pCurBufferPointer += 12; LOG_NORMF(); } @@ -441,9 +441,9 @@ void LOADERDECL VertexLoader_Normal::Normal_Index16_Float3_Indices1() for (int i = 0; i < 3; i++) { u32 iAddress = arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 4*3*i; - ((float*)VertexManager::s_pCurBufferPointer)[0] = Memory_Read_Float(iAddress); - ((float*)VertexManager::s_pCurBufferPointer)[1] = Memory_Read_Float(iAddress+4); - ((float*)VertexManager::s_pCurBufferPointer)[2] = Memory_Read_Float(iAddress+8); + ((u32*)VertexManager::s_pCurBufferPointer)[0] = Memory_Read_U32(iAddress); + ((u32*)VertexManager::s_pCurBufferPointer)[1] = Memory_Read_U32(iAddress+4); + ((u32*)VertexManager::s_pCurBufferPointer)[2] = Memory_Read_U32(iAddress+8); VertexManager::s_pCurBufferPointer += 12; LOG_NORMF(); } @@ -484,9 +484,9 @@ void LOADERDECL VertexLoader_Normal::Normal_Index16_Float3_Indices3() { u16 Index = DataReadU16(); u32 iAddress = arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 4*3*i; - ((float*)VertexManager::s_pCurBufferPointer)[0] = Memory_Read_Float(iAddress); - ((float*)VertexManager::s_pCurBufferPointer)[1] = Memory_Read_Float(iAddress+4); - ((float*)VertexManager::s_pCurBufferPointer)[2] = Memory_Read_Float(iAddress+8); + ((u32*)VertexManager::s_pCurBufferPointer)[0] = Memory_Read_U32(iAddress); + ((u32*)VertexManager::s_pCurBufferPointer)[1] = Memory_Read_U32(iAddress+4); + ((u32*)VertexManager::s_pCurBufferPointer)[2] = Memory_Read_U32(iAddress+8); VertexManager::s_pCurBufferPointer += 12; LOG_NORMF(); } diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader_Position.h b/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader_Position.h index 4807f9edf6..43991b8eed 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader_Position.h +++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader_Position.h @@ -20,6 +20,52 @@ #define LOG_VTX() PRIM_LOG("vtx: %f %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[0], ((float*)VertexManager::s_pCurBufferPointer)[1], ((float*)VertexManager::s_pCurBufferPointer)[2]); +// Thoughts on the implementation of a vertex loader compiler. +// s_pCurBufferPointer should definitely be in a register. +// Could load the position scale factor in XMM7, for example. + +// The pointer inside DataReadU8 in another. +// Let's check out Pos_ReadDirect_UByte(). For Byte, replace MOVZX with MOVSX. + +/* +MOVZX(32, R(EAX), MOffset(ESI, 0)); +MOVZX(32, R(EBX), MOffset(ESI, 1)); +MOVZX(32, R(ECX), MOffset(ESI, 2)); +MOVD(XMM0, R(EAX)); +MOVD(XMM1, R(EBX)); +MOVD(XMM2, R(ECX)); +CVTDQ2PS(XMM0, XMM0); +CVTDQ2PS(XMM1, XMM1); +CVTDQ2PS(XMM2, XMM2); +MULSS(XMM0, XMM7); +MULSS(XMM1, XMM7); +MULSS(XMM2, XMM7); +MOVSS(MOffset(EDI, 0), XMM0); +MOVSS(MOffset(EDI, 4), XMM1); +MOVSS(MOffset(EDI, 8), XMM2); + +Alternatively, lookup table: +MOVZX(32, R(EAX), MOffset(ESI, 0)); +MOVZX(32, R(EBX), MOffset(ESI, 1)); +MOVZX(32, R(ECX), MOffset(ESI, 2)); +MOV(32, R(EAX), MComplex(LUTREG, EAX, 4)); +MOV(32, R(EBX), MComplex(LUTREG, EBX, 4)); +MOV(32, R(ECX), MComplex(LUTREG, ECX, 4)); +MOV(MOffset(EDI, 0), XMM0); +MOV(MOffset(EDI, 4), XMM1); +MOV(MOffset(EDI, 8), XMM2); + +SSE4: +PINSRB(XMM0, MOffset(ESI, 0), 0); +PINSRB(XMM0, MOffset(ESI, 1), 4); +PINSRB(XMM0, MOffset(ESI, 2), 8); +CVTDQ2PS(XMM0, XMM0); + +MULPS(XMM0, XMM7); +MOVUPS(MOffset(EDI, 0), XMM0); + + */ + // ============================================================================== // Direct // ============================================================================== @@ -73,10 +119,11 @@ void LOADERDECL Pos_ReadDirect_Short() void LOADERDECL Pos_ReadDirect_Float() { - ((float*)VertexManager::s_pCurBufferPointer)[0] = DataReadF32(); - ((float*)VertexManager::s_pCurBufferPointer)[1] = DataReadF32(); + // No need to use floating point here. + ((u32 *)VertexManager::s_pCurBufferPointer)[0] = DataReadU32(); + ((u32 *)VertexManager::s_pCurBufferPointer)[1] = DataReadU32(); if (pVtxAttr->PosElements) - ((float*)VertexManager::s_pCurBufferPointer)[2] = DataReadF32(); + ((u32 *)VertexManager::s_pCurBufferPointer)[2] = DataReadU32(); else ((float*)VertexManager::s_pCurBufferPointer)[2] = 1.0f; LOG_VTX(); diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader_TextCoord.h b/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader_TextCoord.h index ccc23d4903..f6b7aa6b7c 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader_TextCoord.h +++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader_TextCoord.h @@ -94,15 +94,15 @@ void LOADERDECL TexCoord_ReadDirect_Short2() void LOADERDECL TexCoord_ReadDirect_Float1() { - ((float*)VertexManager::s_pCurBufferPointer)[0] = DataReadF32() * tcScaleU[tcIndex]; + ((u32*)VertexManager::s_pCurBufferPointer)[0] = DataReadU32(); LOG_TEX1(); VertexManager::s_pCurBufferPointer += 4; tcIndex++; } void LOADERDECL TexCoord_ReadDirect_Float2() { - ((float*)VertexManager::s_pCurBufferPointer)[0] = DataReadF32() * tcScaleU[tcIndex]; - ((float*)VertexManager::s_pCurBufferPointer)[1] = DataReadF32() * tcScaleV[tcIndex]; + ((u32*)VertexManager::s_pCurBufferPointer)[0] = DataReadU32(); + ((u32*)VertexManager::s_pCurBufferPointer)[1] = DataReadU32(); LOG_TEX2(); VertexManager::s_pCurBufferPointer += 8; tcIndex++; @@ -201,9 +201,7 @@ void LOADERDECL TexCoord_ReadIndex8_Float1() { u16 Index = DataReadU8(); u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]); - u32 uTemp; - uTemp = Memory_Read_U32(iAddress); - ((float*)VertexManager::s_pCurBufferPointer)[0] = *(float*)&uTemp * tcScaleU[tcIndex]; + ((u32*)VertexManager::s_pCurBufferPointer)[0] = Memory_Read_U32(iAddress); LOG_TEX1(); VertexManager::s_pCurBufferPointer += 4; tcIndex++; @@ -212,11 +210,8 @@ void LOADERDECL TexCoord_ReadIndex8_Float2() { u16 Index = DataReadU8(); u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]); - u32 uTemp; - uTemp = Memory_Read_U32(iAddress); - ((float*)VertexManager::s_pCurBufferPointer)[0] = *(float*)&uTemp * tcScaleU[tcIndex]; - uTemp = Memory_Read_U32(iAddress+4); - ((float*)VertexManager::s_pCurBufferPointer)[1] = *(float*)&uTemp * tcScaleV[tcIndex]; + ((u32*)VertexManager::s_pCurBufferPointer)[0] = Memory_Read_U32(iAddress); + ((u32*)VertexManager::s_pCurBufferPointer)[1] = Memory_Read_U32(iAddress+4); LOG_TEX2(); VertexManager::s_pCurBufferPointer += 8; tcIndex++; @@ -315,9 +310,8 @@ void LOADERDECL TexCoord_ReadIndex16_Float1() { u16 Index = DataReadU16(); u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]); - u32 uTemp; - uTemp = Memory_Read_U32(iAddress ); - ((float*)VertexManager::s_pCurBufferPointer)[0] = *(float*)&uTemp * tcScaleU[tcIndex]; + + ((u32*)VertexManager::s_pCurBufferPointer)[0] = Memory_Read_U32(iAddress); LOG_TEX1(); VertexManager::s_pCurBufferPointer += 4; tcIndex++; @@ -326,11 +320,9 @@ void LOADERDECL TexCoord_ReadIndex16_Float2() { u16 Index = DataReadU16(); u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]); - u32 uTemp; - uTemp = Memory_Read_U32(iAddress ); - ((float*)VertexManager::s_pCurBufferPointer)[0] = *(float*)&uTemp * tcScaleU[tcIndex]; - uTemp = Memory_Read_U32(iAddress+4); - ((float*)VertexManager::s_pCurBufferPointer)[1] = *(float*)&uTemp * tcScaleV[tcIndex]; + + ((u32*)VertexManager::s_pCurBufferPointer)[0] = Memory_Read_U32(iAddress); + ((u32*)VertexManager::s_pCurBufferPointer)[1] = Memory_Read_U32(iAddress + 4); LOG_TEX2(); VertexManager::s_pCurBufferPointer += 8; tcIndex++;