From 5c58227702d1783fe3b67de4c8a69932cea1d82d Mon Sep 17 00:00:00 2001 From: hrydgard Date: Sun, 23 Nov 2008 17:46:14 +0000 Subject: [PATCH] Optimize vertex loader with a mini JIT (only first step, more optimizations may follow). Some various error message and warning fixes. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@1276 8ced0084-cf51-0410-be5f-012b33b47a6e --- Source/Core/Common/Src/ABI.cpp | 38 ++++- Source/Core/Common/Src/ABI.h | 5 + Source/Core/Common/Src/x64Emitter.cpp | 37 ----- Source/Core/Common/Src/x64Emitter.h | 10 +- .../Core/Core/Src/HW/PeripheralInterface.cpp | 9 +- .../Core/VideoCommon/Src/NativeVertexFormat.h | 2 +- Source/Core/VideoCommon/Src/Statistics.h | 10 +- Source/Plugins/Plugin_VideoDX9/Src/Render.cpp | 2 +- .../Plugin_VideoDX9/Src/VertexManager.cpp | 4 +- .../Plugins/Plugin_VideoOGL/Src/BPStructs.cpp | 4 +- .../Src/NativeVertexFormat.cpp | 4 +- Source/Plugins/Plugin_VideoOGL/Src/Render.cpp | 3 + .../Plugin_VideoOGL/Src/VertexLoader.cpp | 147 +++++++++++------- .../Plugin_VideoOGL/Src/VertexLoader.h | 2 + .../Plugin_VideoOGL/Src/VertexLoader_Color.h | 36 ++--- .../Src/VertexLoader_Normal.cpp | 48 +++--- .../Plugin_VideoOGL/Src/VertexLoader_Normal.h | 48 +++--- .../Src/VertexLoader_Position.h | 45 ++---- .../Src/VertexLoader_TextCoord.h | 62 ++++---- .../Plugin_VideoOGL/Src/VertexManager.cpp | 9 +- 20 files changed, 274 insertions(+), 251 deletions(-) diff --git a/Source/Core/Common/Src/ABI.cpp b/Source/Core/Common/Src/ABI.cpp index 097d6fc45b..2e29e2bc6c 100644 --- a/Source/Core/Common/Src/ABI.cpp +++ b/Source/Core/Common/Src/ABI.cpp @@ -4,6 +4,40 @@ using namespace Gen; +// Shared code between Win64 and Unix64 +// ==================================== + +// Sets up a __cdecl function. +void ABI_EmitPrologue(int maxCallParams) +{ +#ifdef _M_IX86 + // Don't really need to do anything +#elif defined(_M_X64) +#if _WIN32 + int stacksize = ((maxCallParams + 1) & ~1)*8 + 8; + // Set up a stack frame so that we can call functions + // TODO: use maxCallParams + SUB(64, R(RSP), Imm8(stacksize)); +#endif +#else +#error Arch not supported +#endif +} +void ABI_EmitEpilogue(int maxCallParams) +{ +#ifdef _M_IX86 + RET(); +#elif defined(_M_X64) +#ifdef _WIN32 + int stacksize = ((maxCallParams+1)&~1)*8 + 8; + ADD(64, R(RSP), Imm8(stacksize)); +#endif + RET(); +#else +#error Arch not supported +#endif +} + #ifdef _M_IX86 // All32 // Shared code between Win32 and Unix32 @@ -76,6 +110,7 @@ unsigned int ABI_GetAlignedFrameSize(unsigned int frameSize) { return alignedSize; } + void ABI_AlignStack(unsigned int frameSize) { // Mac OS X requires the stack to be 16-byte aligned before every call. // Linux requires the stack to be 16-byte aligned before calls that put SSE @@ -103,9 +138,6 @@ void ABI_RestoreStack(unsigned int frameSize) { #else -// Shared code between Win64 and Unix64 -// ==================================== - void ABI_CallFunctionC(void *func, u32 param1) { MOV(32, R(ABI_PARAM1), Imm32(param1)); CALL(func); diff --git a/Source/Core/Common/Src/ABI.h b/Source/Core/Common/Src/ABI.h index 2bbd169d00..2dfbf5aa19 100644 --- a/Source/Core/Common/Src/ABI.h +++ b/Source/Core/Common/Src/ABI.h @@ -107,6 +107,11 @@ unsigned int ABI_GetAlignedFrameSize(unsigned int frameSize); void ABI_AlignStack(unsigned int frameSize); void ABI_RestoreStack(unsigned int frameSize); +// Sets up a __cdecl function. +// Only x64 really needs the parameter. +void ABI_EmitPrologue(int maxCallParams); +void ABI_EmitEpilogue(int maxCallParams); + #ifdef _M_IX86 inline int ABI_GetNumXMMRegs() { return 8; } #else diff --git a/Source/Core/Common/Src/x64Emitter.cpp b/Source/Core/Common/Src/x64Emitter.cpp index fe839d8537..e3adb2e54b 100644 --- a/Source/Core/Common/Src/x64Emitter.cpp +++ b/Source/Core/Common/Src/x64Emitter.cpp @@ -1316,43 +1316,6 @@ namespace Gen } void RTDSC() { Write8(0x0F); Write8(0x31); } - - namespace Util - { - - // Sets up a __cdecl function. - void EmitPrologue(int maxCallParams) - { -#ifdef _M_IX86 - // Don't really need to do anything -#elif defined(_M_X64) -#if _WIN32 - int stacksize = ((maxCallParams + 1) & ~1)*8 + 8; - // Set up a stack frame so that we can call functions - // TODO: use maxCallParams - SUB(64, R(RSP), Imm8(stacksize)); -#endif -#else -#error Arch not supported -#endif - } - void EmitEpilogue(int maxCallParams) - { -#ifdef _M_IX86 - RET(); -#elif defined(_M_X64) -#ifdef _WIN32 - int stacksize = ((maxCallParams+1)&~1)*8 + 8; - ADD(64, R(RSP), Imm8(stacksize)); -#endif - RET(); -#else -#error Arch not supported -#endif - } - - } // namespace - // helper routines for setting pointers void CallCdeclFunction3(void* fnptr, u32 arg0, u32 arg1, u32 arg2) diff --git a/Source/Core/Common/Src/x64Emitter.h b/Source/Core/Common/Src/x64Emitter.h index 995143b91a..0238569a28 100644 --- a/Source/Core/Common/Src/x64Emitter.h +++ b/Source/Core/Common/Src/x64Emitter.h @@ -520,16 +520,8 @@ namespace Gen void PMOVMSKB(X64Reg dest, OpArg arg); void PSHUFB(X64Reg dest, OpArg arg); - void RTDSC(); + void RTDSC(); - namespace Util - { - // Sets up a __cdecl function. - // Only x64 really needs the parameter. - void EmitPrologue(int maxCallParams); - void EmitEpilogue(int maxCallParams); - } - void CallCdeclFunction3(void* fnptr, u32 arg0, u32 arg1, u32 arg2); void CallCdeclFunction4(void* fnptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3); void CallCdeclFunction5(void* fnptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4); diff --git a/Source/Core/Core/Src/HW/PeripheralInterface.cpp b/Source/Core/Core/Src/HW/PeripheralInterface.cpp index 2e30eaa1b8..2d4424d9c5 100644 --- a/Source/Core/Core/Src/HW/PeripheralInterface.cpp +++ b/Source/Core/Core/Src/HW/PeripheralInterface.cpp @@ -145,13 +145,12 @@ void CPeripheralInterface::Write32(const u32 _uValue, const u32 _iAddress) { switch (_uValue) { case 3: - PanicAlert("Game wants to go to memory card manager. Since BIOS is being HLE:d - can't do that.\n" - "We might pop up a fake memcard manager here and then reset the game in the future :)\n"); + PanicAlert("The game wants to go to memory card manager. BIOS is being HLE:d - so we can't do that.\n"); break; default: { TCHAR szTemp[256]; - sprintf(szTemp, "Game wants to reset the machine. PI_RESET_CODE: (%08x)", _uValue); + sprintf(szTemp, "The game wants to reset the machine. PI_RESET_CODE: (%08x)", _uValue); PanicAlert(szTemp); } break; @@ -161,8 +160,8 @@ void CPeripheralInterface::Write32(const u32 _uValue, const u32 _iAddress) break; default: - LOG(PERIPHERALINTERFACE,"!!!!Unknown write!!!! 0x%08x", _iAddress); - PanicAlert("Unknown write to PI"); + LOG(PERIPHERALINTERFACE,"!!!!Unknown PI write!!!! 0x%08x", _iAddress); + PanicAlert("Unknown write to PI: %08x", _iAddress); break; } } diff --git a/Source/Core/VideoCommon/Src/NativeVertexFormat.h b/Source/Core/VideoCommon/Src/NativeVertexFormat.h index bb9be45d79..3aee5cd753 100644 --- a/Source/Core/VideoCommon/Src/NativeVertexFormat.h +++ b/Source/Core/VideoCommon/Src/NativeVertexFormat.h @@ -55,7 +55,7 @@ enum { }; #define LOADERDECL __cdecl -typedef void (LOADERDECL *TPipelineFunction)(const void *); +typedef void (LOADERDECL *TPipelineFunction)(); enum VarType { VAR_BYTE, diff --git a/Source/Core/VideoCommon/Src/Statistics.h b/Source/Core/VideoCommon/Src/Statistics.h index e618d543ba..fe4a1e201c 100644 --- a/Source/Core/VideoCommon/Src/Statistics.h +++ b/Source/Core/VideoCommon/Src/Statistics.h @@ -20,8 +20,6 @@ struct Statistics { - int numPrimitives; - int numPixelShadersCreated; int numPixelShadersAlive; int numVertexShadersCreated; @@ -37,8 +35,6 @@ struct Statistics int numDListsCreated; int numDListsAlive; - int numJoins; - int numVertexLoaders; struct ThisFrame @@ -52,10 +48,14 @@ struct Statistics int numXFLoadsInDL; int numDLs; - int numDLPrims; int numPrims; + int numDLPrims; int numShaderChanges; + int numPrimitiveJoins; + int numDrawCalls; + int numBufferSplits; + int numDListsCalled; }; ThisFrame thisFrame; diff --git a/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp b/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp index b367c32b5b..71ae5fb05b 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp @@ -207,8 +207,8 @@ void Renderer::SwapBuffers(void) p+=sprintf(p,"Num dlists called: %i\n",stats.numDListsCalled); p+=sprintf(p,"Num dlists created: %i\n",stats.numDListsCreated); p+=sprintf(p,"Num dlists alive: %i\n",stats.numDListsAlive); - p+=sprintf(p,"Num strip joins: %i\n",stats.numJoins); p+=sprintf(p,"Num primitives: %i\n",stats.thisFrame.numPrims); + p+=sprintf(p,"Num primitive joins: %i\n",stats.thisFrame.numPrimitiveJoins); p+=sprintf(p,"Num primitives (DL): %i\n",stats.thisFrame.numDLPrims); p+=sprintf(p,"Num XF loads: %i\n",stats.thisFrame.numXFLoads); p+=sprintf(p,"Num XF loads (DL): %i\n",stats.thisFrame.numXFLoadsInDL); diff --git a/Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp b/Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp index 6fc2a6d911..425e6fad19 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp @@ -173,9 +173,9 @@ void AddVertices(int _primitive, int _numVertices, const DecodedVArray *varray) else //We are collecting the right type, keep going { _assert_msg_(vbufferwrite!=0, "collecting: vbufferwrite == 0!","WTF"); - INCSTAT(stats.numJoins); + INCSTAT(stats.thisFrame.numPrimitiveJoins); //Success, keep adding to unlocked buffer - int last=indexGen.GetNumVerts(); + int last = indexGen.GetNumVerts(); AddIndices(_primitive, _numVertices); if (_numVertices >= MAXVBUFFERSIZE) diff --git a/Source/Plugins/Plugin_VideoOGL/Src/BPStructs.cpp b/Source/Plugins/Plugin_VideoOGL/Src/BPStructs.cpp index a0e7c7c196..0eabc01cf9 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/BPStructs.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/BPStructs.cpp @@ -462,8 +462,8 @@ void BPWritten(int addr, int changes, int newval) { // the number of lines copied is determined by the y scale * source efb height float yScale = bpmem.dispcopyyscale / 256.0f; - float xfbLines = bpmem.copyTexSrcWH.y + 1 * yScale; - XFB_Write(Memory_GetPtr(bpmem.copyTexDest<<5), multirc, (bpmem.copyMipMapStrideChannels << 4), xfbLines); + float xfbLines = bpmem.copyTexSrcWH.y + 1.0 * yScale; + XFB_Write(Memory_GetPtr(bpmem.copyTexDest<<5), multirc, (bpmem.copyMipMapStrideChannels << 4), (int)xfbLines); } else { diff --git a/Source/Plugins/Plugin_VideoOGL/Src/NativeVertexFormat.cpp b/Source/Plugins/Plugin_VideoOGL/Src/NativeVertexFormat.cpp index 25a8095ea8..d13cb06daf 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/NativeVertexFormat.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/NativeVertexFormat.cpp @@ -85,7 +85,7 @@ void NativeVertexFormat::Initialize(const PortableVertexDeclaration &vtx_decl) // Alright, we have our vertex declaration. Compile some crazy code to set it quickly using GL. u8 *old_code_ptr = GetWritableCodePtr(); SetCodePtr(m_compiledCode); - Util::EmitPrologue(6); + ABI_EmitPrologue(6); CallCdeclFunction4_I(glVertexPointer, 3, GL_FLOAT, vtx_decl.stride, 0); @@ -137,7 +137,7 @@ void NativeVertexFormat::Initialize(const PortableVertexDeclaration &vtx_decl) CallCdeclFunction6((void *)glVertexAttribPointer, SHADER_POSMTX_ATTRIB, 4, GL_UNSIGNED_BYTE, GL_FALSE, vtx_decl.stride, vtx_decl.posmtx_offset); } - Util::EmitEpilogue(6); + ABI_EmitEpilogue(6); if (Gen::GetCodePtr() - (u8*)m_compiledCode > COMPILED_CODE_SIZE) { Crash(); diff --git a/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp b/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp index ea4a56544b..6927a26b2c 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp @@ -775,6 +775,9 @@ void Renderer::SwapBuffers() //p+=sprintf(p,"Num dlists alive: %i\n",stats.numDListsAlive); //p+=sprintf(p,"Num strip joins: %i\n",stats.numJoins); p+=sprintf(p,"Num primitives: %i\n",stats.thisFrame.numPrims); + p+=sprintf(p,"Num primitive joins: %i\n",stats.thisFrame.numPrimitiveJoins); + p+=sprintf(p,"Num buffer splits: %i\n",stats.thisFrame.numBufferSplits); + p+=sprintf(p,"Num draw calls: %i\n",stats.thisFrame.numDrawCalls); p+=sprintf(p,"Num primitives (DL): %i\n",stats.thisFrame.numDLPrims); p+=sprintf(p,"Num XF loads: %i\n",stats.thisFrame.numXFLoads); p+=sprintf(p,"Num XF loads (DL): %i\n",stats.thisFrame.numXFLoadsInDL); diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader.cpp b/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader.cpp index fba0314008..c426359b60 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader.cpp @@ -22,13 +22,19 @@ #include "Common.h" #include "Config.h" #include "Profiler.h" +#include "MemoryUtil.h" +#include "x64Emitter.h" +#include "ABI.h" +#include "Statistics.h" #include "VertexManager.h" #include "VertexLoaderManager.h" #include "VertexLoader.h" #include "BPStructs.h" #include "DataReader.h" +#define USE_JIT + NativeVertexFormat *g_nativeVertexFmt; //these don't need to be saved @@ -49,14 +55,17 @@ static u8 s_curposmtx; static u8 s_curtexmtx[8]; static int s_texmtxwrite = 0; static int s_texmtxread = 0; +static TVtxAttr* pVtxAttr; -void LOADERDECL PosMtx_ReadDirect_UByte(const void *_p) +using namespace Gen; + +void LOADERDECL PosMtx_ReadDirect_UByte() { s_curposmtx = DataReadU8() & 0x3f; PRIM_LOG("posmtx: %d, ", s_curposmtx); } -void LOADERDECL PosMtx_Write(const void *_p) +void LOADERDECL PosMtx_Write() { *VertexManager::s_pCurBufferPointer++ = s_curposmtx; *VertexManager::s_pCurBufferPointer++ = 0; @@ -64,27 +73,27 @@ void LOADERDECL PosMtx_Write(const void *_p) *VertexManager::s_pCurBufferPointer++ = 0; } -void LOADERDECL TexMtx_ReadDirect_UByte(const void *_p) +void LOADERDECL TexMtx_ReadDirect_UByte() { s_curtexmtx[s_texmtxread] = DataReadU8()&0x3f; PRIM_LOG("texmtx%d: %d, ", s_texmtxread, s_curtexmtx[s_texmtxread]); s_texmtxread++; } -void LOADERDECL TexMtx_Write_Float(const void *_p) +void LOADERDECL TexMtx_Write_Float() { *(float*)VertexManager::s_pCurBufferPointer = (float)s_curtexmtx[s_texmtxwrite++]; VertexManager::s_pCurBufferPointer += 4; } -void LOADERDECL TexMtx_Write_Float2(const void *_p) +void LOADERDECL TexMtx_Write_Float2() { ((float*)VertexManager::s_pCurBufferPointer)[0] = 0; ((float*)VertexManager::s_pCurBufferPointer)[1] = (float)s_curtexmtx[s_texmtxwrite++]; VertexManager::s_pCurBufferPointer += 8; } -void LOADERDECL TexMtx_Write_Short3(const void *_p) +void LOADERDECL TexMtx_Write_Short3() { ((s16*)VertexManager::s_pCurBufferPointer)[0] = 0; ((s16*)VertexManager::s_pCurBufferPointer)[1] = 0; @@ -97,6 +106,8 @@ void LOADERDECL TexMtx_Write_Short3(const void *_p) #include "VertexLoader_Color.h" #include "VertexLoader_TextCoord.h" +#define COMPILED_CODE_SIZE 4096 + VertexLoader::VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr) { m_VertexSize = 0; @@ -107,11 +118,16 @@ VertexLoader::VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr) m_VtxDesc = vtx_desc; SetVAT(vtx_attr.g0.Hex, vtx_attr.g1.Hex, vtx_attr.g2.Hex); + m_compiledCode = (u8 *)AllocateExecutableMemory(COMPILED_CODE_SIZE, false); + if (m_compiledCode) { + memset(m_compiledCode, 0, COMPILED_CODE_SIZE); + } CompileVertexTranslator(); } VertexLoader::~VertexLoader() { + FreeMemoryPages(m_compiledCode, COMPILED_CODE_SIZE); delete m_NativeFmt; } @@ -119,6 +135,9 @@ void VertexLoader::CompileVertexTranslator() { m_VertexSize = 0; + u8 *old_code_ptr = GetWritableCodePtr(); + SetCodePtr(m_compiledCode); + ABI_EmitPrologue(4); // Colors const int col[2] = {m_VtxDesc.Color0, m_VtxDesc.Color1}; // TextureCoord @@ -144,7 +163,7 @@ void VertexLoader::CompileVertexTranslator() // Position Matrix Index if (m_VtxDesc.PosMatIdx) { - m_PipelineStages[m_numPipelineStages++] = PosMtx_ReadDirect_UByte; + WriteCall(PosMtx_ReadDirect_UByte); m_NativeFmt->m_components |= VB_HAS_POSMTXIDX; m_VertexSize += 1; } @@ -430,7 +449,10 @@ void VertexLoader::CompileVertexTranslator() vtx_decl.stride = native_stride; if (vtx_decl.stride != offset) PanicAlert("offset/stride mismatch, %i %i", vtx_decl.stride, offset); - +#ifdef USE_JIT + ABI_EmitEpilogue(4); +#endif + SetCodePtr(old_code_ptr); m_NativeFmt->Initialize(vtx_decl); } @@ -532,7 +554,11 @@ void VertexLoader::SetupTexCoord(int num, int mode, int format, int elements, in void VertexLoader::WriteCall(TPipelineFunction func) { +#ifdef USE_JIT + CALL((void*)func); +#else m_PipelineStages[m_numPipelineStages++] = func; +#endif } void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count) @@ -569,6 +595,7 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count) m_VtxAttr.texCoord[6].Frac = g_VtxAttr[vtx_attr_group].g2.Tex6Frac; m_VtxAttr.texCoord[7].Frac = g_VtxAttr[vtx_attr_group].g2.Tex7Frac; + pVtxAttr = &m_VtxAttr; posScale = shiftLookup[m_VtxAttr.PosFrac]; if (m_NativeFmt->m_components & VB_HAS_UVALL) { for (int i = 0; i < 8; i++) { @@ -582,7 +609,7 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count) // if strips or fans, make sure all vertices can fit in buffer, otherwise flush int granularity = 1; switch (primitive) { - case 3: // strip + case 3: // strip .. hm, weird case 4: // fan if (VertexManager::GetRemainingSize() < 3 * native_stride) VertexManager::Flush(); @@ -603,59 +630,67 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count) } int startv = 0, extraverts = 0; - for (int v = 0; v < count; v++) + int v = 0; + + while (v < count) { - if ((v % granularity) == 0) - { - if (VertexManager::GetRemainingSize() < granularity*native_stride) { - // This buffer full - break current primitive and flush, to switch to the next buffer. - u8* plastptr = VertexManager::s_pCurBufferPointer; - if (v - startv > 0) - VertexManager::AddVertices(primitive, v - startv + extraverts); - VertexManager::Flush(); - // Why does this need to be so complicated? - switch (primitive) { - case 3: // triangle strip, copy last two vertices - // a little trick since we have to keep track of signs - if (v & 1) { - memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-2*native_stride, native_stride); - memcpy_gc(VertexManager::s_pCurBufferPointer+native_stride, plastptr-native_stride*2, 2*native_stride); - VertexManager::s_pCurBufferPointer += native_stride*3; - extraverts = 3; - } - else { - memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride*2, native_stride*2); - VertexManager::s_pCurBufferPointer += native_stride*2; - extraverts = 2; - } - break; - case 4: // tri fan, copy first and last vert - memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride*(v-startv+extraverts), native_stride); - VertexManager::s_pCurBufferPointer += native_stride; - memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride, native_stride); - VertexManager::s_pCurBufferPointer += native_stride; + if (VertexManager::GetRemainingSize() < granularity*native_stride) { + INCSTAT(stats.thisFrame.numBufferSplits); + // This buffer full - break current primitive and flush, to switch to the next buffer. + u8* plastptr = VertexManager::s_pCurBufferPointer; + if (v - startv > 0) + VertexManager::AddVertices(primitive, v - startv + extraverts); + VertexManager::Flush(); + // Why does this need to be so complicated? + switch (primitive) { + case 3: // triangle strip, copy last two vertices + // a little trick since we have to keep track of signs + if (v & 1) { + memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-2*native_stride, native_stride); + memcpy_gc(VertexManager::s_pCurBufferPointer+native_stride, plastptr-native_stride*2, 2*native_stride); + VertexManager::s_pCurBufferPointer += native_stride*3; + extraverts = 3; + } + else { + memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride*2, native_stride*2); + VertexManager::s_pCurBufferPointer += native_stride*2; extraverts = 2; - break; - case 6: // line strip - memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride, native_stride); - VertexManager::s_pCurBufferPointer += native_stride; - extraverts = 1; - break; - default: - extraverts = 0; - break; - } - startv = v; + } + break; + case 4: // tri fan, copy first and last vert + memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride*(v-startv+extraverts), native_stride); + VertexManager::s_pCurBufferPointer += native_stride; + memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride, native_stride); + VertexManager::s_pCurBufferPointer += native_stride; + extraverts = 2; + break; + case 6: // line strip + memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride, native_stride); + VertexManager::s_pCurBufferPointer += native_stride; + extraverts = 1; + break; + default: + extraverts = 0; + break; } + startv = v; } - tcIndex = 0; - colIndex = 0; - s_texmtxwrite = s_texmtxread = 0; - for (int i = 0; i < m_numPipelineStages; i++) - m_PipelineStages[i](&m_VtxAttr); + for (int s = 0; s < granularity; s++) + { + tcIndex = 0; + colIndex = 0; + s_texmtxwrite = s_texmtxread = 0; + #ifdef USE_JIT + ((void (*)())(void*)m_compiledCode)(); + #else + for (int i = 0; i < m_numPipelineStages; i++) + m_PipelineStages[i](); + #endif - PRIM_LOG("\n"); + PRIM_LOG("\n"); + v++; + } } if (startv < count) diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader.h b/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader.h index 3519d920ff..74586ad3db 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader.h +++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader.h @@ -81,6 +81,8 @@ private: TPipelineFunction m_PipelineStages[32]; // TODO - figure out real max. it's lower. int m_numPipelineStages; + u8 *m_compiledCode; + void SetupColor(int num, int _iMode, int _iFormat, int _iElements); void SetupTexCoord(int num, int _iMode, int _iFormat, int _iElements, int _iFrac); diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader_Color.h b/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader_Color.h index dc91765dbb..76c298fb0d 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader_Color.h +++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader_Color.h @@ -81,7 +81,7 @@ inline u32 _Read32(u32 iAddress) ////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////// -void LOADERDECL Color_ReadDirect_24b_888(const void *_p) +void LOADERDECL Color_ReadDirect_24b_888() { u32 col = DataReadU8()<PosElements) @@ -36,9 +35,8 @@ void LOADERDECL Pos_ReadDirect_UByte(const void *_p) VertexManager::s_pCurBufferPointer += 12; } -void LOADERDECL Pos_ReadDirect_Byte(const void *_p) +void LOADERDECL Pos_ReadDirect_Byte() { - TVtxAttr* pVtxAttr = (TVtxAttr*)_p; ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)DataReadU8() * posScale; ((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s8)DataReadU8() * posScale; if (pVtxAttr->PosElements) @@ -49,9 +47,8 @@ void LOADERDECL Pos_ReadDirect_Byte(const void *_p) VertexManager::s_pCurBufferPointer += 12; } -void LOADERDECL Pos_ReadDirect_UShort(const void *_p) +void LOADERDECL Pos_ReadDirect_UShort() { - TVtxAttr* pVtxAttr = (TVtxAttr*)_p; ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)DataReadU16() * posScale; ((float*)VertexManager::s_pCurBufferPointer)[1] = (float)DataReadU16() * posScale; if (pVtxAttr->PosElements) @@ -62,9 +59,8 @@ void LOADERDECL Pos_ReadDirect_UShort(const void *_p) VertexManager::s_pCurBufferPointer += 12; } -void LOADERDECL Pos_ReadDirect_Short(const void *_p) +void LOADERDECL Pos_ReadDirect_Short() { - TVtxAttr* pVtxAttr = (TVtxAttr*)_p; ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)DataReadU16() * posScale; ((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s16)DataReadU16() * posScale; if (pVtxAttr->PosElements) @@ -75,9 +71,8 @@ void LOADERDECL Pos_ReadDirect_Short(const void *_p) VertexManager::s_pCurBufferPointer += 12; } -void LOADERDECL Pos_ReadDirect_Float(const void *_p) +void LOADERDECL Pos_ReadDirect_Float() { - TVtxAttr* pVtxAttr = (TVtxAttr*)_p; ((float*)VertexManager::s_pCurBufferPointer)[0] = DataReadF32(); ((float*)VertexManager::s_pCurBufferPointer)[1] = DataReadF32(); if (pVtxAttr->PosElements) @@ -127,37 +122,32 @@ void LOADERDECL Pos_ReadDirect_Float(const void *_p) // ============================================================================== // Index 8 // ============================================================================== -void LOADERDECL Pos_ReadIndex8_UByte(const void *_p) +void LOADERDECL Pos_ReadIndex8_UByte() { - TVtxAttr* pVtxAttr = (TVtxAttr*)_p; u8 Index = DataReadU8(); Pos_ReadIndex_Byte(u8); } -void LOADERDECL Pos_ReadIndex8_Byte(const void *_p) +void LOADERDECL Pos_ReadIndex8_Byte() { - TVtxAttr* pVtxAttr = (TVtxAttr*)_p; u8 Index = DataReadU8(); Pos_ReadIndex_Byte(s8); } -void LOADERDECL Pos_ReadIndex8_UShort(const void *_p) +void LOADERDECL Pos_ReadIndex8_UShort() { - TVtxAttr* pVtxAttr = (TVtxAttr*)_p; u8 Index = DataReadU8(); Pos_ReadIndex_Short(u16); } -void LOADERDECL Pos_ReadIndex8_Short(const void *_p) +void LOADERDECL Pos_ReadIndex8_Short() { - TVtxAttr* pVtxAttr = (TVtxAttr*)_p; u8 Index = DataReadU8(); Pos_ReadIndex_Short(s16); } -void LOADERDECL Pos_ReadIndex8_Float(const void *_p) +void LOADERDECL Pos_ReadIndex8_Float() { - TVtxAttr* pVtxAttr = (TVtxAttr*)_p; u8 Index = DataReadU8(); Pos_ReadIndex_Float(); } @@ -166,34 +156,29 @@ void LOADERDECL Pos_ReadIndex8_Float(const void *_p) // Index 16 // ============================================================================== -void LOADERDECL Pos_ReadIndex16_UByte(const void *_p){ - TVtxAttr* pVtxAttr = (TVtxAttr*)_p; +void LOADERDECL Pos_ReadIndex16_UByte(){ u16 Index = DataReadU16(); Pos_ReadIndex_Byte(u8); } -void LOADERDECL Pos_ReadIndex16_Byte(const void *_p){ - TVtxAttr* pVtxAttr = (TVtxAttr*)_p; +void LOADERDECL Pos_ReadIndex16_Byte(){ u16 Index = DataReadU16(); Pos_ReadIndex_Byte(s8); } -void LOADERDECL Pos_ReadIndex16_UShort(const void *_p){ - TVtxAttr* pVtxAttr = (TVtxAttr*)_p; +void LOADERDECL Pos_ReadIndex16_UShort(){ u16 Index = DataReadU16(); Pos_ReadIndex_Short(u16); } -void LOADERDECL Pos_ReadIndex16_Short(const void *_p) +void LOADERDECL Pos_ReadIndex16_Short() { - TVtxAttr* pVtxAttr = (TVtxAttr*)_p; u16 Index = DataReadU16(); Pos_ReadIndex_Short(s16); } -void LOADERDECL Pos_ReadIndex16_Float(const void *_p) +void LOADERDECL Pos_ReadIndex16_Float() { - TVtxAttr* pVtxAttr = (TVtxAttr*)_p; u16 Index = DataReadU16(); Pos_ReadIndex_Float(); } diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader_TextCoord.h b/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader_TextCoord.h index 4596fa5f45..ccc23d4903 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader_TextCoord.h +++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader_TextCoord.h @@ -23,19 +23,19 @@ extern int tcIndex; -void LOADERDECL TexCoord_Read_Dummy(const void *_p) +void LOADERDECL TexCoord_Read_Dummy() { tcIndex++; } -void LOADERDECL TexCoord_ReadDirect_UByte1(const void *_p) +void LOADERDECL TexCoord_ReadDirect_UByte1() { ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)DataReadU8() * tcScaleU[tcIndex]; LOG_TEX1(); VertexManager::s_pCurBufferPointer += 4; tcIndex++; } -void LOADERDECL TexCoord_ReadDirect_UByte2(const void *_p) +void LOADERDECL TexCoord_ReadDirect_UByte2() { ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)DataReadU8() * tcScaleU[tcIndex]; ((float*)VertexManager::s_pCurBufferPointer)[1] = (float)DataReadU8() * tcScaleV[tcIndex]; @@ -44,14 +44,14 @@ void LOADERDECL TexCoord_ReadDirect_UByte2(const void *_p) tcIndex++; } -void LOADERDECL TexCoord_ReadDirect_Byte1(const void *_p) +void LOADERDECL TexCoord_ReadDirect_Byte1() { ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)DataReadU8() * tcScaleU[tcIndex]; LOG_TEX1(); VertexManager::s_pCurBufferPointer += 4; tcIndex++; } -void LOADERDECL TexCoord_ReadDirect_Byte2(const void *_p) +void LOADERDECL TexCoord_ReadDirect_Byte2() { ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)DataReadU8() * tcScaleU[tcIndex]; ((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s8)DataReadU8() * tcScaleV[tcIndex]; @@ -60,14 +60,14 @@ void LOADERDECL TexCoord_ReadDirect_Byte2(const void *_p) tcIndex++; } -void LOADERDECL TexCoord_ReadDirect_UShort1(const void *_p) +void LOADERDECL TexCoord_ReadDirect_UShort1() { ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)DataReadU16() * tcScaleU[tcIndex]; LOG_TEX1(); VertexManager::s_pCurBufferPointer += 4; tcIndex++; } -void LOADERDECL TexCoord_ReadDirect_UShort2(const void *_p) +void LOADERDECL TexCoord_ReadDirect_UShort2() { ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)DataReadU16() * tcScaleU[tcIndex]; ((float*)VertexManager::s_pCurBufferPointer)[1] = (float)DataReadU16() * tcScaleV[tcIndex]; @@ -76,14 +76,14 @@ void LOADERDECL TexCoord_ReadDirect_UShort2(const void *_p) tcIndex++; } -void LOADERDECL TexCoord_ReadDirect_Short1(const void *_p) +void LOADERDECL TexCoord_ReadDirect_Short1() { ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)DataReadU16() * tcScaleU[tcIndex]; LOG_TEX1(); VertexManager::s_pCurBufferPointer += 4; tcIndex++; } -void LOADERDECL TexCoord_ReadDirect_Short2(const void *_p) +void LOADERDECL TexCoord_ReadDirect_Short2() { ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)DataReadU16() * tcScaleU[tcIndex]; ((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s16)DataReadU16() * tcScaleV[tcIndex]; @@ -92,14 +92,14 @@ void LOADERDECL TexCoord_ReadDirect_Short2(const void *_p) tcIndex++; } -void LOADERDECL TexCoord_ReadDirect_Float1(const void *_p) +void LOADERDECL TexCoord_ReadDirect_Float1() { ((float*)VertexManager::s_pCurBufferPointer)[0] = DataReadF32() * tcScaleU[tcIndex]; LOG_TEX1(); VertexManager::s_pCurBufferPointer += 4; tcIndex++; } -void LOADERDECL TexCoord_ReadDirect_Float2(const void *_p) +void LOADERDECL TexCoord_ReadDirect_Float2() { ((float*)VertexManager::s_pCurBufferPointer)[0] = DataReadF32() * tcScaleU[tcIndex]; ((float*)VertexManager::s_pCurBufferPointer)[1] = DataReadF32() * tcScaleV[tcIndex]; @@ -109,7 +109,7 @@ void LOADERDECL TexCoord_ReadDirect_Float2(const void *_p) } // ================================================================================== -void LOADERDECL TexCoord_ReadIndex8_UByte1(const void *_p) +void LOADERDECL TexCoord_ReadIndex8_UByte1() { u8 Index = DataReadU8(); u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]); @@ -119,7 +119,7 @@ void LOADERDECL TexCoord_ReadIndex8_UByte1(const void *_p) VertexManager::s_pCurBufferPointer += 4; tcIndex++; } -void LOADERDECL TexCoord_ReadIndex8_UByte2(const void *_p) +void LOADERDECL TexCoord_ReadIndex8_UByte2() { u8 Index = DataReadU8(); u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]); @@ -131,7 +131,7 @@ void LOADERDECL TexCoord_ReadIndex8_UByte2(const void *_p) tcIndex++; } -void LOADERDECL TexCoord_ReadIndex8_Byte1(const void *_p) +void LOADERDECL TexCoord_ReadIndex8_Byte1() { u8 Index = DataReadU8(); u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]); @@ -141,7 +141,7 @@ void LOADERDECL TexCoord_ReadIndex8_Byte1(const void *_p) VertexManager::s_pCurBufferPointer += 4; tcIndex++; } -void LOADERDECL TexCoord_ReadIndex8_Byte2(const void *_p) +void LOADERDECL TexCoord_ReadIndex8_Byte2() { u8 Index = DataReadU8(); u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]); @@ -153,7 +153,7 @@ void LOADERDECL TexCoord_ReadIndex8_Byte2(const void *_p) tcIndex++; } -void LOADERDECL TexCoord_ReadIndex8_UShort1(const void *_p) +void LOADERDECL TexCoord_ReadIndex8_UShort1() { u8 Index = DataReadU8(); u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]); @@ -163,7 +163,7 @@ void LOADERDECL TexCoord_ReadIndex8_UShort1(const void *_p) VertexManager::s_pCurBufferPointer += 4; tcIndex++; } -void LOADERDECL TexCoord_ReadIndex8_UShort2(const void *_p) +void LOADERDECL TexCoord_ReadIndex8_UShort2() { u8 Index = DataReadU8(); u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]); @@ -175,7 +175,7 @@ void LOADERDECL TexCoord_ReadIndex8_UShort2(const void *_p) tcIndex++; } -void LOADERDECL TexCoord_ReadIndex8_Short1(const void *_p) +void LOADERDECL TexCoord_ReadIndex8_Short1() { u8 Index = DataReadU8(); u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]); @@ -185,7 +185,7 @@ void LOADERDECL TexCoord_ReadIndex8_Short1(const void *_p) VertexManager::s_pCurBufferPointer += 4; tcIndex++; } -void LOADERDECL TexCoord_ReadIndex8_Short2(const void *_p) +void LOADERDECL TexCoord_ReadIndex8_Short2() { u8 Index = DataReadU8(); u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]); @@ -197,7 +197,7 @@ void LOADERDECL TexCoord_ReadIndex8_Short2(const void *_p) tcIndex++; } -void LOADERDECL TexCoord_ReadIndex8_Float1(const void *_p) +void LOADERDECL TexCoord_ReadIndex8_Float1() { u16 Index = DataReadU8(); u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]); @@ -208,7 +208,7 @@ void LOADERDECL TexCoord_ReadIndex8_Float1(const void *_p) VertexManager::s_pCurBufferPointer += 4; tcIndex++; } -void LOADERDECL TexCoord_ReadIndex8_Float2(const void *_p) +void LOADERDECL TexCoord_ReadIndex8_Float2() { u16 Index = DataReadU8(); u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]); @@ -223,7 +223,7 @@ void LOADERDECL TexCoord_ReadIndex8_Float2(const void *_p) } // ================================================================================== -void LOADERDECL TexCoord_ReadIndex16_UByte1(const void *_p) +void LOADERDECL TexCoord_ReadIndex16_UByte1() { u16 Index = DataReadU16(); u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]); @@ -233,7 +233,7 @@ void LOADERDECL TexCoord_ReadIndex16_UByte1(const void *_p) VertexManager::s_pCurBufferPointer += 4; tcIndex++; } -void LOADERDECL TexCoord_ReadIndex16_UByte2(const void *_p) +void LOADERDECL TexCoord_ReadIndex16_UByte2() { u16 Index = DataReadU16(); u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]); @@ -245,7 +245,7 @@ void LOADERDECL TexCoord_ReadIndex16_UByte2(const void *_p) tcIndex++; } -void LOADERDECL TexCoord_ReadIndex16_Byte1(const void *_p) +void LOADERDECL TexCoord_ReadIndex16_Byte1() { u16 Index = DataReadU16(); u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]); @@ -255,7 +255,7 @@ void LOADERDECL TexCoord_ReadIndex16_Byte1(const void *_p) VertexManager::s_pCurBufferPointer += 4; tcIndex++; } -void LOADERDECL TexCoord_ReadIndex16_Byte2(const void *_p) +void LOADERDECL TexCoord_ReadIndex16_Byte2() { u16 Index = DataReadU16(); u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]); @@ -267,7 +267,7 @@ void LOADERDECL TexCoord_ReadIndex16_Byte2(const void *_p) tcIndex++; } -void LOADERDECL TexCoord_ReadIndex16_UShort1(const void *_p) +void LOADERDECL TexCoord_ReadIndex16_UShort1() { u16 Index = DataReadU16(); u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]); @@ -277,7 +277,7 @@ void LOADERDECL TexCoord_ReadIndex16_UShort1(const void *_p) VertexManager::s_pCurBufferPointer += 4; tcIndex++; } -void LOADERDECL TexCoord_ReadIndex16_UShort2(const void *_p) +void LOADERDECL TexCoord_ReadIndex16_UShort2() { u16 Index = DataReadU16(); u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]); @@ -289,7 +289,7 @@ void LOADERDECL TexCoord_ReadIndex16_UShort2(const void *_p) tcIndex++; } -void LOADERDECL TexCoord_ReadIndex16_Short1(const void *_p) +void LOADERDECL TexCoord_ReadIndex16_Short1() { u16 Index = DataReadU16(); u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]); @@ -299,7 +299,7 @@ void LOADERDECL TexCoord_ReadIndex16_Short1(const void *_p) VertexManager::s_pCurBufferPointer += 4; tcIndex++; } -void LOADERDECL TexCoord_ReadIndex16_Short2(const void *_p) +void LOADERDECL TexCoord_ReadIndex16_Short2() { u16 Index = DataReadU16(); u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]); @@ -311,7 +311,7 @@ void LOADERDECL TexCoord_ReadIndex16_Short2(const void *_p) tcIndex++; } -void LOADERDECL TexCoord_ReadIndex16_Float1(const void *_p) +void LOADERDECL TexCoord_ReadIndex16_Float1() { u16 Index = DataReadU16(); u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]); @@ -322,7 +322,7 @@ void LOADERDECL TexCoord_ReadIndex16_Float1(const void *_p) VertexManager::s_pCurBufferPointer += 4; tcIndex++; } -void LOADERDECL TexCoord_ReadIndex16_Float2(const void *_p) +void LOADERDECL TexCoord_ReadIndex16_Float2() { u16 Index = DataReadU16(); u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]); diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp b/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp index c9f672ec93..a772813fa7 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp @@ -108,6 +108,12 @@ void AddVertices(int primitive, int numvertices) _assert_( numvertices > 0 ); ADDSTAT(stats.thisFrame.numPrims, numvertices); + /* + if (s_vStoredPrimitives.size() && s_vStoredPrimitives[s_vStoredPrimitives.size() - 1].first == primitive) { + // Actually, just count potential primitive joins. + // Doesn't seem worth it in Metroid Prime games. + INCSTAT(stats.thisFrame.numPrimitiveJoins); + }*/ s_vStoredPrimitives.push_back(std::pair(c_primitiveType[primitive], numvertices)); #if defined(_DEBUG) || defined(DEBUGFAST) @@ -150,7 +156,7 @@ void Flush() DVSTARTPROFILE(); - GL_REPORT_ERRORD(); + GL_REPORT_ERRORD(); glBindBuffer(GL_ARRAY_BUFFER, s_vboBuffers[s_nCurVBOIndex]); glBufferData(GL_ARRAY_BUFFER, s_pCurBufferPointer - s_pBaseBufferPointer, s_pBaseBufferPointer, GL_STREAM_DRAW); @@ -263,6 +269,7 @@ void Flush() int offset = 0; for (std::vector< std::pair >::const_iterator it = s_vStoredPrimitives.begin(); it != s_vStoredPrimitives.end(); ++it) { + INCSTAT(stats.thisFrame.numDrawCalls); glDrawArrays(it->first, offset, it->second); offset += it->second; }