I hope you all like this. Changes:

Opengl:
Reorder the rendering path to make it more generic, and a little bit faster i think.
Reduce the index Buffer size to make it more Friendly to older video hardware.
get rid of the remaining annoying sps (I hope).
D3D:
Implement the same path as opengl to make the plugins more similar and maintainable.
Fixed Peek_Z this means, Pushing stars in SMG Now Works.
Please give heavy testing to this changes and compare the performance with the old path.
Thanks to hrydgard for let me participate :).

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@4353 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
Rodolfo Osvaldo Bogado 2009-10-02 14:03:07 +00:00
parent f099ea7be4
commit 9d67518f71
5 changed files with 180 additions and 209 deletions

View File

@ -44,7 +44,7 @@
#define GX_DRAW_LINES 0x5 // 0xA8
#define GX_DRAW_LINE_STRIP 0x6 // 0xB0
#define GX_DRAW_POINTS 0x7 // 0xB8
#define GX_DRAW_NONE 0x1; //Tis is a fake value to used in the plugins
void OpcodeDecoder_Init();
void OpcodeDecoder_Shutdown();
void OpcodeDecoder_Run(bool skipped_frame);

View File

@ -57,7 +57,9 @@ void Create()
hr = s_efb_color_texture->GetSurfaceLevel(0, &s_efb_color_surface);
CHECK(hr);
hr = D3D::dev->CreateDepthStencilSurface(target_width, target_height, D3DFMT_D24S8,
//D3DFMT_D32F_LOCKABLE and D3DFMT_D16_LOCKABLE must be used to peek_z to work, 16 bits is a crapy z buffer and to allow
// to read directi as a float we need it to be float so using ...
hr = D3D::dev->CreateDepthStencilSurface(target_width, target_height, D3DFMT_D32F_LOCKABLE,
D3DMULTISAMPLE_NONE, 0, FALSE, &s_efb_depth_surface, NULL);
CHECK(hr);
}

View File

@ -472,22 +472,25 @@ u32 Renderer::AccessEFB(EFBAccessType type, int x, int y)
if (D3D::dev->GetDepthStencilSurface(&pZBuffer) == D3DERR_NOTFOUND)
pZBuffer = NULL;
//D3DLOCKED_RECT drect;
//HRESULT hr;
D3DLOCKED_RECT drect;
HRESULT hr;
if(!pZBuffer) {
PanicAlert("No Z-Buffer!");
return 0;
}
// TODO: Fix
//if((hr = pZBuffer->LockRect(0, &drect, NULL, NULL)) != D3D_OK)
// PanicAlert("IT WAS AS I THOUGHT, %s", hr == D3DERR_WASSTILLDRAWING ? "Still drawing" :
// hr == D3DERR_INVALIDCALL ? "Invalid call" : "w00t");
RECT RectToLock;
RectToLock.bottom = targetPixelRc.bottom;
RectToLock.left = targetPixelRc.left;
RectToLock.right = targetPixelRc.right;
RectToLock.top = targetPixelRc.top;
if((hr = pZBuffer->LockRect(&drect, &RectToLock, D3DLOCK_READONLY)) != D3D_OK)
PanicAlert("IT WAS AS I THOUGHT, %s", hr == D3DERR_WASSTILLDRAWING ? "Still drawing" :
hr == D3DERR_INVALIDCALL ? "Invalid call" : "w00t");
//val = ((float *)drect.pBits)[0];
val = ((float *)drect.pBits)[0];
//pZBuffer->UnlockRect(0);
pZBuffer->UnlockRect();
// [0.0, 1.0] ==> [0, 0xFFFFFFFF]
z = val * 0xFFFFFFFF;

View File

@ -48,75 +48,45 @@ extern NativeVertexFormat *g_nativeVertexFmt;
namespace VertexManager
{
enum Collection
{
C_NOTHING,
C_TRIANGLES,
C_LINES,
C_POINTS,
};
static IndexGenerator2 indexGen;
const D3DPRIMITIVETYPE pts[3] =
{
D3DPT_POINTLIST, //DUMMY
D3DPT_TRIANGLELIST,
D3DPT_LINELIST,
};
static IndexGenerator indexGen;
static Collection collection;
int lastPrimitive;
static u8 *fakeVBuffer; // format undefined - NativeVertexFormat takes care of the declaration.
static u16 *fakeIBuffer; // These are just straightforward 16-bit indices.
#define MAXVBUFFERSIZE 65536*128
#define MAXIBUFFERSIZE 65536*4
const Collection collectionTypeLUT[8] =
{
C_TRIANGLES, //quads
C_NOTHING, //nothing
C_TRIANGLES, //triangles
C_TRIANGLES, //strip
C_TRIANGLES, //fan
C_LINES, //lines
C_LINES, //linestrip
C_POINTS //guess :P
};
const D3DPRIMITIVETYPE gxPrimToD3DPrim[8] = {
(D3DPRIMITIVETYPE)0, // not supported
(D3DPRIMITIVETYPE)0, // nothing
D3DPT_TRIANGLELIST,
D3DPT_TRIANGLESTRIP,
D3DPT_TRIANGLEFAN,
D3DPT_LINELIST,
D3DPT_LINESTRIP,
};
static int lastPrimitive;
static u8 *LocalVBuffer;
static u16 *TIBuffer;
static u16 *LIBuffer;
static u16 *PIBuffer;
#define MAXVBUFFERSIZE 0x50000
#define MAXIBUFFERSIZE 0xFFFF
void CreateDeviceObjects();
void DestroyDeviceObjects();
bool Init()
{
collection = C_NOTHING;
fakeVBuffer = new u8[MAXVBUFFERSIZE];
fakeIBuffer = new u16[MAXIBUFFERSIZE];
CreateDeviceObjects();
VertexManager::s_pCurBufferPointer = fakeVBuffer;
LocalVBuffer = new u8[MAXVBUFFERSIZE];
TIBuffer = new u16[MAXIBUFFERSIZE];
LIBuffer = new u16[MAXIBUFFERSIZE];
PIBuffer = new u16[MAXIBUFFERSIZE];
s_pCurBufferPointer = LocalVBuffer;
indexGen.Start(TIBuffer,LIBuffer,PIBuffer);
return true;
}
void ResetBuffer()
{
s_pCurBufferPointer = LocalVBuffer;
indexGen.Start(TIBuffer,LIBuffer,PIBuffer);
}
void Shutdown()
{
DestroyDeviceObjects();
delete [] fakeVBuffer;
delete [] fakeIBuffer;
delete [] LocalVBuffer;
delete [] TIBuffer;
delete [] LIBuffer;
delete [] PIBuffer;
ResetBuffer();
}
void CreateDeviceObjects()
@ -142,83 +112,55 @@ void AddIndices(int _primitive, int _numVertices)
}
}
int GetRemainingSize()
{
return fakeVBuffer + MAXVBUFFERSIZE - VertexManager::s_pCurBufferPointer;
return MAXVBUFFERSIZE - (int)(s_pCurBufferPointer - LocalVBuffer);
}
void AddVertices(int _primitive, int _numVertices)
{
if (_numVertices <= 0) //This check is pretty stupid...
if (_numVertices < 0)
return;
lastPrimitive = _primitive;
Collection type = collectionTypeLUT[_primitive];
if (type == C_NOTHING)
return;
DVSTARTPROFILE();
_assert_msg_(type != C_NOTHING, "type == C_NOTHING!!", "WTF");
switch (_primitive)
{
case GX_DRAW_QUADS:
case GX_DRAW_TRIANGLES:
case GX_DRAW_TRIANGLE_STRIP:
case GX_DRAW_TRIANGLE_FAN:
if(MAXIBUFFERSIZE - indexGen.GetTriangleindexLen() < 2 * _numVertices)
Flush();
break;
case GX_DRAW_LINE_STRIP:
case GX_DRAW_LINES:
if(MAXIBUFFERSIZE - indexGen.GetLineindexLen() < 2 * _numVertices)
Flush();
break;
case GX_DRAW_POINTS:
if(MAXIBUFFERSIZE - indexGen.GetPointindexLen() < _numVertices)
Flush();
break;
default: return;
}
lastPrimitive = _primitive;
ADDSTAT(stats.thisFrame.numPrims, _numVertices);
if (collection != type)
{
// We are NOT collecting the right type.
Flush();
collection = type;
u16 *ptr = 0;
if (type != C_POINTS)
{
ptr = fakeIBuffer;
indexGen.Start((unsigned short*)ptr);
AddIndices(_primitive, _numVertices);
}
if (_numVertices >= MAXVBUFFERSIZE)
MessageBoxA(NULL, "Too many vertices for the buffer", "Dolphin DX9 Video Plugin", MB_OK);
}
else // We are collecting the right type, keep going
{
_assert_msg_(vbufferwrite != 0, "collecting: vbufferwrite == 0!","WTF");
INCSTAT(stats.thisFrame.numPrimitiveJoins);
//Success, keep adding to unlocked buffer
int last = indexGen.GetNumVerts();
AddIndices(_primitive, _numVertices);
if (_numVertices >= MAXVBUFFERSIZE)
MessageBoxA(NULL, "Too many vertices for the buffer", "Dolphin DX9 Video Plugin", MB_OK);
}
INCSTAT(stats.thisFrame.numPrimitiveJoins);
AddIndices(_primitive, _numVertices);
}
inline void Draw(int numVertices, int stride)
{
if (collection != C_POINTS)
if(indexGen.GetNumTriangles() > 0)
{
int numPrimitives = indexGen.GetNumPrims();
/* For some reason, this makes things slower!
if ((indexGen.GetNumAdds() == 1 || indexGen.GetOnlyLists()) && lastPrimitive != GX_DRAW_QUADS && gxPrimToD3DPrim[lastPrimitive])
{
if (FAILED(D3D::dev->DrawPrimitiveUP(
gxPrimToD3DPrim[lastPrimitive],
numPrimitives,
fakeVBuffer,
stride))) {
#if defined(_DEBUG) || defined(DEBUGFAST)
std::string error_shaders;
error_shaders.append(VertexShaderCache::GetCurrentShaderCode());
error_shaders.append(PixelShaderCache::GetCurrentShaderCode());
File::WriteStringToFile(true, error_shaders, "bad_shader_combo.txt");
PanicAlert("DrawPrimitiveUP failed. Shaders written to bad_shader_combo.txt.");
#endif
}
INCSTAT(stats.thisFrame.numDrawCalls);
} else*/ {
if (FAILED(D3D::dev->DrawIndexedPrimitiveUP(
pts[(int)collection],
0, numVertices, numPrimitives,
fakeIBuffer,
if (FAILED(D3D::dev->DrawIndexedPrimitiveUP(
D3DPT_TRIANGLELIST,
0, indexGen.GetNumVerts(), indexGen.GetNumTriangles(),
TIBuffer,
D3DFMT_INDEX16,
fakeVBuffer,
stride))) {
LocalVBuffer,
stride)))
{
#if defined(_DEBUG) || defined(DEBUGFAST)
std::string error_shaders;
error_shaders.append(VertexShaderCache::GetCurrentShaderCode());
@ -227,23 +169,58 @@ inline void Draw(int numVertices, int stride)
PanicAlert("DrawIndexedPrimitiveUP failed. Shaders written to bad_shader_combo.txt.");
#endif
}
INCSTAT(stats.thisFrame.numIndexedDrawCalls);
}
INCSTAT(stats.thisFrame.numIndexedDrawCalls);
}
else
if(indexGen.GetNumLines() > 0)
{
D3D::dev->SetIndices(0);
D3D::dev->DrawPrimitiveUP(D3DPT_POINTLIST, numVertices, fakeVBuffer, stride);
INCSTAT(stats.thisFrame.numDrawCalls);
if (FAILED(D3D::dev->DrawIndexedPrimitiveUP(
D3DPT_LINELIST,
0, indexGen.GetNumVerts(), indexGen.GetNumLines(),
LIBuffer,
D3DFMT_INDEX16,
LocalVBuffer,
stride)))
{
#if defined(_DEBUG) || defined(DEBUGFAST)
std::string error_shaders;
error_shaders.append(VertexShaderCache::GetCurrentShaderCode());
error_shaders.append(PixelShaderCache::GetCurrentShaderCode());
File::WriteStringToFile(true, error_shaders, "bad_shader_combo.txt");
PanicAlert("DrawIndexedPrimitiveUP failed. Shaders written to bad_shader_combo.txt.");
#endif
}
INCSTAT(stats.thisFrame.numIndexedDrawCalls);
}
if(indexGen.GetNumPoints() > 0)
{
if (FAILED(D3D::dev->DrawIndexedPrimitiveUP(
D3DPT_POINTLIST,
0, indexGen.GetNumVerts(), indexGen.GetNumPoints(),
PIBuffer,
D3DFMT_INDEX16,
LocalVBuffer,
stride)))
{
#if defined(_DEBUG) || defined(DEBUGFAST)
std::string error_shaders;
error_shaders.append(VertexShaderCache::GetCurrentShaderCode());
error_shaders.append(PixelShaderCache::GetCurrentShaderCode());
File::WriteStringToFile(true, error_shaders, "bad_shader_combo.txt");
PanicAlert("DrawIndexedPrimitiveUP failed. Shaders written to bad_shader_combo.txt.");
#endif
}
INCSTAT(stats.thisFrame.numIndexedDrawCalls);
}
}
void Flush()
{
if (LocalVBuffer == s_pCurBufferPointer) return;
int numVerts = indexGen.GetNumVerts();
if(numVerts == 0) return;
DVSTARTPROFILE();
if (collection != C_NOTHING)
{
u32 usedtextures = 0;
for (u32 i = 0; i < (u32)bpmem.genMode.numtevstages + 1; ++i) {
if (bpmem.tevorders[i/2].getEnable(i & 1))
@ -330,10 +307,9 @@ void Flush()
D3D::SetRenderState(D3DRS_COLORWRITEENABLE, write);
}
DEBUGGER_PAUSE_AT(NEXT_FLUSH,true);
}
shader_fail:
collection = C_NOTHING;
VertexManager::s_pCurBufferPointer = fakeVBuffer;
ResetBuffer();
}
}

View File

@ -38,6 +38,7 @@
#include "VertexLoader.h"
#include "VertexManager.h"
#include "IndexGenerator.h"
#include "OpcodeDecoding.h"
// internal state for loading vertices
extern NativeVertexFormat *g_nativeVertexFmt;
@ -45,43 +46,18 @@ extern NativeVertexFormat *g_nativeVertexFmt;
namespace VertexManager
{
static const GLenum c_RenderprimitiveType[8] =
{
GL_TRIANGLES,
GL_ZERO, //nothing
GL_TRIANGLES,
GL_TRIANGLES,
GL_TRIANGLES,
GL_LINES,
GL_LINES,
GL_POINTS
};
static const GLenum c_primitiveType[8] =
{
GL_QUADS,
GL_ZERO, //nothing
GL_TRIANGLES,
GL_TRIANGLE_STRIP,
GL_TRIANGLE_FAN,
GL_LINES,
GL_LINE_STRIP,
GL_POINTS
};
static IndexGenerator2 indexGen;
static GLenum lastPrimitive;
static GLenum CurrentRenderPrimitive;
static int lastPrimitive;
static u8 *LocalVBuffer;
static u16 *TIBuffer;
static u16 *LIBuffer;
static u16 *PIBuffer;
static GLint max_Index_size = 0;
#define MAXVBUFFERSIZE 0x50000
#define MAXIBUFFERSIZE 0x20000
#define MAXVBOBUFFERCOUNT 0x4
#define MAXIBUFFERSIZE 0xFFFF
#define MAXVBOBUFFERCOUNT 0x8
static GLuint s_vboBuffers[MAXVBOBUFFERCOUNT] = {0};
static int s_nCurVBOIndex = 0; // current free buffer
@ -90,12 +66,16 @@ static int s_nCurVBOIndex = 0; // current free buffer
bool Init()
{
lastPrimitive = GL_ZERO;
CurrentRenderPrimitive = GL_ZERO;
lastPrimitive = GX_DRAW_NONE;
glGetIntegerv(GL_MAX_ELEMENTS_INDICES, (GLint *)&max_Index_size);
if(max_Index_size>MAXIBUFFERSIZE)
max_Index_size = MAXIBUFFERSIZE;
LocalVBuffer = new u8[MAXVBUFFERSIZE];
TIBuffer = new u16[MAXIBUFFERSIZE];
LIBuffer = new u16[MAXIBUFFERSIZE];
PIBuffer = new u16[MAXIBUFFERSIZE];
TIBuffer = new u16[max_Index_size];
LIBuffer = new u16[max_Index_size];
PIBuffer = new u16[max_Index_size];
s_pCurBufferPointer = LocalVBuffer;
s_nCurVBOIndex = 0;
glGenBuffers(ARRAYSIZE(s_vboBuffers), s_vboBuffers);
@ -106,10 +86,7 @@ bool Init()
glEnableClientState(GL_VERTEX_ARRAY);
g_nativeVertexFmt = NULL;
GL_REPORT_ERRORD();
u16 *Tptr = TIBuffer;
u16 *Lptr = LIBuffer;
u16 *Pptr = PIBuffer;
indexGen.Start(Tptr,Lptr,Pptr);
indexGen.Start(TIBuffer,LIBuffer,PIBuffer);
return true;
}
@ -127,68 +104,80 @@ void Shutdown()
void ResetBuffer()
{
s_nCurVBOIndex = (s_nCurVBOIndex + 1) % ARRAYSIZE(s_vboBuffers);
s_pCurBufferPointer = LocalVBuffer;
CurrentRenderPrimitive = GL_ZERO;
u16 *Tptr = TIBuffer;
u16 *Lptr = LIBuffer;
u16 *Pptr = PIBuffer;
indexGen.Start(Tptr,Lptr,Pptr);
s_pCurBufferPointer = LocalVBuffer;
indexGen.Start(TIBuffer,LIBuffer,PIBuffer);
}
void AddIndices(int _primitive, int _numVertices)
{
switch (_primitive)
{
case GL_QUADS: indexGen.AddQuads(_numVertices); return;
case GL_TRIANGLES: indexGen.AddList(_numVertices); return;
case GL_TRIANGLE_STRIP: indexGen.AddStrip(_numVertices); return;
case GL_TRIANGLE_FAN: indexGen.AddFan(_numVertices); return;
case GL_LINE_STRIP: indexGen.AddLineStrip(_numVertices); return;
case GL_LINES: indexGen.AddLineList(_numVertices); return;
case GL_POINTS: indexGen.AddPoints(_numVertices); return;
case GX_DRAW_QUADS: indexGen.AddQuads(_numVertices); return;
case GX_DRAW_TRIANGLES: indexGen.AddList(_numVertices); return;
case GX_DRAW_TRIANGLE_STRIP: indexGen.AddStrip(_numVertices); return;
case GX_DRAW_TRIANGLE_FAN: indexGen.AddFan(_numVertices); return;
case GX_DRAW_LINE_STRIP: indexGen.AddLineStrip(_numVertices); return;
case GX_DRAW_LINES: indexGen.AddLineList(_numVertices); return;
case GX_DRAW_POINTS: indexGen.AddPoints(_numVertices); return;
}
}
int GetRemainingSize()
{
return LocalVBuffer + MAXVBUFFERSIZE - s_pCurBufferPointer;
return MAXVBUFFERSIZE - (int)(s_pCurBufferPointer - LocalVBuffer);
}
void AddVertices(int primitive, int numvertices)
{
if (numvertices <= 0)
if (numvertices < 0)
return;
if (c_primitiveType[primitive] == GL_ZERO)
return;
DVSTARTPROFILE();
switch (primitive)
{
case GX_DRAW_QUADS:
case GX_DRAW_TRIANGLES:
case GX_DRAW_TRIANGLE_STRIP:
case GX_DRAW_TRIANGLE_FAN:
if(max_Index_size - indexGen.GetTriangleindexLen() < 2 * numvertices)
Flush();
break;
case GX_DRAW_LINE_STRIP:
case GX_DRAW_LINES:
if(max_Index_size - indexGen.GetLineindexLen() < 2 * numvertices)
Flush();
break;
case GX_DRAW_POINTS:
if(max_Index_size - indexGen.GetPointindexLen() < numvertices)
Flush();
break;
default: return;
}
lastPrimitive = c_primitiveType[primitive];
lastPrimitive = primitive;
ADDSTAT(stats.thisFrame.numPrims, numvertices);
INCSTAT(stats.thisFrame.numPrimitiveJoins);
AddIndices(c_primitiveType[primitive], numvertices);
AddIndices(primitive, numvertices);
}
inline void Draw()
{
if(indexGen.GetNumTriangles() > 0)
{
glDrawElements(GL_TRIANGLES, indexGen.GetTriangleindexLen(), GL_UNSIGNED_SHORT, TIBuffer);
glDrawElements(GL_TRIANGLES, indexGen.GetTriangleindexLen(), GL_UNSIGNED_SHORT,TIBuffer);
INCSTAT(stats.thisFrame.numIndexedDrawCalls);
}
if(indexGen.GetNumLines() > 0)
{
glDrawElements(GL_LINES, indexGen.GetLineindexLen(), GL_UNSIGNED_SHORT, LIBuffer);
glDrawElements(GL_LINES, indexGen.GetLineindexLen(), GL_UNSIGNED_SHORT,LIBuffer);
INCSTAT(stats.thisFrame.numIndexedDrawCalls);
}
if(indexGen.GetNumPoints() > 0)
{
glDrawElements(GL_POINTS, indexGen.GetPointindexLen(), GL_UNSIGNED_SHORT, PIBuffer);
glDrawElements(GL_POINTS, indexGen.GetPointindexLen(), GL_UNSIGNED_SHORT,PIBuffer);
INCSTAT(stats.thisFrame.numIndexedDrawCalls);
}
}
}
void Flush()
@ -232,7 +221,7 @@ void Flush()
glBindBuffer(GL_ARRAY_BUFFER, s_vboBuffers[s_nCurVBOIndex]);
glBufferSubData(GL_ARRAY_BUFFER,0, s_pCurBufferPointer - LocalVBuffer, LocalVBuffer);
glBufferSubData(GL_ARRAY_BUFFER,0, s_pCurBufferPointer - LocalVBuffer, LocalVBuffer);
GL_REPORT_ERRORD();
// setup the pointers
@ -338,6 +327,7 @@ void Flush()
if (bpmem.blendmode.blendenable || bpmem.blendmode.subtract)
glEnable(GL_BLEND);
}
ResetBuffer();
#if defined(_DEBUG) || defined(DEBUGFAST)
if (g_ActiveConfig.iLog & CONF_SAVESHADERS)
@ -363,7 +353,7 @@ void Flush()
GL_REPORT_ERRORD();
ResetBuffer();
}
} // namespace