From 12a676c27372c8707453d22fd7bda9d0f9318a43 Mon Sep 17 00:00:00 2001 From: Rodolfo Osvaldo Bogado Date: Fri, 2 Jul 2010 17:09:53 +0000 Subject: [PATCH] this is a pure optimization commit: return to the old values in constants in pixelshader, the old values give mi 3 or 4 fps more,(maybe some kind of compiler optimization) in some games and with the current algorithm i notice no difference with this values, please report any problem. optimizes SSAA to make it a little faster and, the quality should be the same but with a little speedup. change the way the frame is processed depending is xfb is enabled or not to make this a little faster. please test and report any problem git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@5820 8ced0084-cf51-0410-be5f-012b33b47a6e --- Source/Core/Common/Src/LinearDiskCache.cpp | 2 +- .../Core/VideoCommon/Src/PixelShaderGen.cpp | 38 ++-- Source/Core/VideoCommon/Src/Render.h | 2 +- .../Plugins/Plugin_VideoDX11/Src/Render.cpp | 84 ++++++--- Source/Plugins/Plugin_VideoDX11/Src/main.cpp | 3 +- .../Plugin_VideoDX9/Src/DlgSettings.cpp | 1 - .../Src/FramebufferManager.cpp | 1 - .../Plugin_VideoDX9/Src/PixelShaderCache.cpp | 84 ++++----- Source/Plugins/Plugin_VideoDX9/Src/Render.cpp | 110 +++++++----- .../Plugin_VideoDX9/Src/TextureCache.cpp | 23 ++- .../Plugin_VideoDX9/Src/VertexShaderCache.cpp | 31 ++-- Source/Plugins/Plugin_VideoDX9/Src/main.cpp | 3 +- Source/Plugins/Plugin_VideoOGL/Src/Render.cpp | 170 +++++++++++------- Source/Plugins/Plugin_VideoOGL/Src/main.cpp | 3 +- 14 files changed, 312 insertions(+), 243 deletions(-) diff --git a/Source/Core/Common/Src/LinearDiskCache.cpp b/Source/Core/Common/Src/LinearDiskCache.cpp index 3637e82180..efed2aefe5 100644 --- a/Source/Core/Common/Src/LinearDiskCache.cpp +++ b/Source/Core/Common/Src/LinearDiskCache.cpp @@ -22,7 +22,7 @@ static const char ID[4] = {'D', 'C', 'A', 'C'}; // Update this to the current SVN revision every time you change shader generation code. // We don't automatically get this from SVN_REV because that would mean regenerating the // shader cache for every revision, graphics-related or not, which is simply annoying. -const int version = 5813; +const int version = 5820; LinearDiskCache::LinearDiskCache() : file_(NULL), num_entries_(0) { diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index 88666e4a38..5a2bdba390 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -152,18 +152,16 @@ static void SampleTexture(char *&p, const char *destination, const char *texcoor static bool WriteAlphaTest(char *&p, API_TYPE ApiType); static void WriteFog(char *&p); -const float epsilon8bit = 1.0f / 255.0f; - static const char *tevKSelTableC[] = // KCSEL { "1.0f,1.0f,1.0f", // 1 = 0x00 - "(223.0f/255.0f),(223.0f/255.0f),(223.0f/255.0f)", // 7_8 = 0x01 - "(191.0f/255.0f),(191.0f/255.0f),(191.0f/255.0f)", // 3_4 = 0x02 - "(159.0f/255.0f),(159.0f/255.0f),(159.0f/255.0f)", // 5_8 = 0x03 - "(127.0f/255.0f),(127.0f/255.0f),(127.0f/255.0f)", // 1_2 = 0x04 - "(95.0f/255.0f),(95.0f/255.0f),(95.0f/255.0f)", // 3_8 = 0x05 - "(63.0f/255.0f),(63.0f/255.0f),(63.0f/255.0f)", // 1_4 = 0x06 - "(31.0f/255.0f),(31.0f/255.0f),(31.0f/255.0f)", // 1_8 = 0x07 + "0.875f,0.875f,0.875f", // 7_8 = 0x01 + "0.75f,0.75f,0.75f", // 3_4 = 0x02 + "0.625f,0.625f,0.625f", // 5_8 = 0x03 + "0.5f,0.5f,0.5f", // 1_2 = 0x04 + "0.375f,0.375f,0.375f", // 3_8 = 0x05 + "0.25f,0.25f,0.25f", // 1_4 = 0x06 + "0.125f,0.125f,0.125f", // 1_8 = 0x07 "ERROR", // 0x08 "ERROR", // 0x09 "ERROR", // 0x0a @@ -193,13 +191,13 @@ static const char *tevKSelTableC[] = // KCSEL static const char *tevKSelTableA[] = // KASEL { "1.0f", // 1 = 0x00 - "(223.0f/255.0f)",// 7_8 = 0x01 - "(191.0f/255.0f)", // 3_4 = 0x02 - "(159.0f/255.0f)",// 5_8 = 0x03 - "(127.0f/255.0f)", // 1_2 = 0x04 - "(95.0f/255.0f)",// 3_8 = 0x05 - "(63.0f/255.0f)", // 1_4 = 0x06 - "(31.0f/255.0f)",// 1_8 = 0x07 + "0.875f",// 7_8 = 0x01 + "0.75f", // 3_4 = 0x02 + "0.625f",// 5_8 = 0x03 + "0.5f", // 1_2 = 0x04 + "0.375f",// 3_8 = 0x05 + "0.25f", // 1_4 = 0x06 + "0.125f",// 1_8 = 0x07 "ERROR", // 0x08 "ERROR", // 0x09 "ERROR", // 0x0a @@ -237,8 +235,8 @@ static const char *tevScaleTable[] = // CS static const char *tevBiasTable[] = // TB { "", // ZERO, - "+(127.0f/255.0f)", // ADDHALF, - "-(127.0f/255.0f)", // SUBHALF, + "+0.5f", // ADDHALF, + "-0.5f", // SUBHALF, "", }; @@ -262,7 +260,7 @@ static const char *tevCInputTable[] = // CC "(rastemp.rgb)", // RASC, "(rastemp.aaa)", // RASA, "float3(1.0f,1.0f,1.0f)", // ONE - "float3((127.0f/255.0f),(127.0f/255.0f),(127.0f/255.0f))", // HALF + "float3(0.5f,0.5f,0.5f)", // HALF "(konsttemp.rgb)", //"konsttemp.rgb", // KONST "float3(0.0f,0.0f,0.0f)", // ZERO ///aded extra values to map clamped values @@ -279,7 +277,7 @@ static const char *tevCInputTable[] = // CC "(rastemp.rgb)", // RASC, "(rastemp.aaa)", // RASA, "float3(1.0f,1.0f,1.0f)", // ONE - "float3((127.0f/255.0f),(127.0f/255.0f),(127.0f/255.0f))", // HALF + "float3(0.5f,0.5f,0.5f)", // HALF "(konsttemp.rgb)", //"konsttemp.rgb", // KONST "float3(0.0f,0.0f,0.0f)", // ZERO "PADERROR", "PADERROR", "PADERROR", "PADERROR", diff --git a/Source/Core/VideoCommon/Src/Render.h b/Source/Core/VideoCommon/Src/Render.h index 31127fdeb7..146c10e9bc 100644 --- a/Source/Core/VideoCommon/Src/Render.h +++ b/Source/Core/VideoCommon/Src/Render.h @@ -95,7 +95,7 @@ public: static void RenderToXFB(u32 xfbAddr, u32 fbWidth, u32 fbHeight, const EFBRectangle& sourceRc); // Finish up the current frame, print some stats - static void Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight); + static void Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight,const EFBRectangle& rc); }; void UpdateViewport(); diff --git a/Source/Plugins/Plugin_VideoDX11/Src/Render.cpp b/Source/Plugins/Plugin_VideoDX11/Src/Render.cpp index 1e18ea17f2..fd38ef53dd 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/Render.cpp +++ b/Source/Plugins/Plugin_VideoDX11/Src/Render.cpp @@ -516,14 +516,16 @@ void Renderer::RenderToXFB(u32 xfbAddr, u32 fbWidth, u32 fbHeight, const EFBRect return; VideoFifo_CheckEFBAccess(); VideoFifo_CheckSwapRequestAt(xfbAddr, fbWidth, fbHeight); - FBManager.CopyToXFB(xfbAddr, fbWidth, fbHeight, sourceRc); XFBWrited = true; - // XXX: Without the VI, how would we know what kind of field this is? So // just use progressive. - if (!g_ActiveConfig.bUseXFB) + if (g_ActiveConfig.bUseXFB) { - Renderer::Swap(xfbAddr, FIELD_PROGRESSIVE, fbWidth, fbHeight); + FBManager.CopyToXFB(xfbAddr, fbWidth, fbHeight, sourceRc); + } + else + { + Renderer::Swap(xfbAddr, FIELD_PROGRESSIVE, fbWidth, fbHeight,sourceRc); Common::AtomicStoreRelease(s_swapRequested, FALSE); } } @@ -801,7 +803,7 @@ void Renderer::SetBlendMode(bool forceUpdate) } } -void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight) +void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight,const EFBRectangle& rc) { if (g_bSkipCurrentFrame || (!XFBWrited && !g_ActiveConfig.bUseRealXFB) || !fbWidth || !fbHeight) { @@ -815,7 +817,7 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight) if (field == FIELD_LOWER) xfbAddr -= fbWidth * 2; u32 xfbCount = 0; const XFBSource** xfbSourceList = FBManager.GetXFBSource(xfbAddr, fbWidth, fbHeight, xfbCount); - if (!xfbSourceList || xfbCount == 0) + if ((!xfbSourceList || xfbCount == 0) && g_ActiveConfig.bUseXFB && !g_ActiveConfig.bUseRealXFB) { g_VideoInitialize.pCopiedToXFB(false); return; @@ -849,27 +851,65 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight) D3D::context->OMSetRenderTargets(1, &D3D::GetBackBuffer()->GetRTV(), NULL); // TODO: Enable linear filtering here - - // draw each xfb source - for (u32 i = 0; i < xfbCount; ++i) + + if(g_ActiveConfig.bUseXFB) { - const XFBSource* xfbSource = xfbSourceList[i]; + const XFBSource* xfbSource; - MathUtil::Rectangle sourceRc; - sourceRc.left = 0; - sourceRc.top = 0; - sourceRc.right = xfbSource->texWidth; - sourceRc.bottom = xfbSource->texHeight; + // draw each xfb source + for (u32 i = 0; i < xfbCount; ++i) + { + xfbSource = xfbSourceList[i]; + MathUtil::Rectangle sourceRc; + + sourceRc.left = 0; + sourceRc.top = 0; + sourceRc.right = xfbSource->texWidth; + sourceRc.bottom = xfbSource->texHeight; - MathUtil::Rectangle drawRc; - drawRc.top = -1; - drawRc.bottom = 1; - drawRc.left = -1; - drawRc.right = 1; + MathUtil::Rectangle drawRc; - D3D::drawShadedTexSubQuad(xfbSource->tex->GetSRV(), &sourceRc, xfbSource->texWidth, xfbSource->texHeight, &drawRc, PixelShaderCache::GetColorCopyProgram(),VertexShaderCache::GetSimpleVertexShader(), VertexShaderCache::GetSimpleInputLayout()); + if (g_ActiveConfig.bUseXFB && !g_ActiveConfig.bUseRealXFB) + { + // use virtual xfb with offset + int xfbHeight = xfbSource->srcHeight; + int xfbWidth = xfbSource->srcWidth; + int hOffset = ((s32)xfbSource->srcAddr - (s32)xfbAddr) / ((s32)fbWidth * 2); + + drawRc.bottom = 1.0f - 2.0f * ((hOffset) / (float)fbHeight); + drawRc.top = 1.0f - 2.0f * ((hOffset + xfbHeight) / (float)fbHeight); + drawRc.left = -(xfbWidth / (float)fbWidth); + drawRc.right = (xfbWidth / (float)fbWidth); + + + if (!g_ActiveConfig.bAutoScale) + { + // scale draw area for a 1 to 1 pixel mapping with the draw target + float vScale = (float)fbHeight / (float)s_backbuffer_height; + float hScale = (float)fbWidth / (float)s_backbuffer_width; + + drawRc.top *= vScale; + drawRc.bottom *= vScale; + drawRc.left *= hScale; + drawRc.right *= hScale; + } + } + else + { + drawRc.top = -1; + drawRc.bottom = 1; + drawRc.left = -1; + drawRc.right = 1; + } + D3D::drawShadedTexSubQuad(xfbSource->tex->GetSRV(), &sourceRc, xfbSource->texWidth, xfbSource->texHeight, &drawRc, PixelShaderCache::GetColorCopyProgram(),VertexShaderCache::GetSimpleVertexShader(), VertexShaderCache::GetSimpleInputLayout()); + } + } + else + { + TargetRectangle targetRc = Renderer::ConvertEFBRectangle(rc); + D3DTexture2D* read_texture = FBManager.GetEFBColorTexture(); + D3D::drawShadedTexQuad(read_texture->GetSRV(), targetRc.AsRECT(), Renderer::GetFullTargetWidth(), Renderer::GetFullTargetHeight(), PixelShaderCache::GetColorCopyProgram(),VertexShaderCache::GetSimpleVertexShader(), VertexShaderCache::GetSimpleInputLayout()); } - // done with drawing the game stuff, good moment to save a screenshot if (s_bScreenshot) { diff --git a/Source/Plugins/Plugin_VideoDX11/Src/main.cpp b/Source/Plugins/Plugin_VideoDX11/Src/main.cpp index 6c7ca735ed..7ace253277 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/main.cpp +++ b/Source/Plugins/Plugin_VideoDX11/Src/main.cpp @@ -312,7 +312,8 @@ void VideoFifo_CheckSwapRequest() { if (Common::AtomicLoadAcquire(s_swapRequested)) { - Renderer::Swap(s_beginFieldArgs.xfbAddr, s_beginFieldArgs.field, s_beginFieldArgs.fbWidth, s_beginFieldArgs.fbHeight); + EFBRectangle rc; + Renderer::Swap(s_beginFieldArgs.xfbAddr, s_beginFieldArgs.field, s_beginFieldArgs.fbWidth, s_beginFieldArgs.fbHeight,rc); Common::AtomicStoreRelease(s_swapRequested, FALSE); } } diff --git a/Source/Plugins/Plugin_VideoDX9/Src/DlgSettings.cpp b/Source/Plugins/Plugin_VideoDX9/Src/DlgSettings.cpp index f953ce08ed..2b94247d23 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/DlgSettings.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/DlgSettings.cpp @@ -311,7 +311,6 @@ void GFXConfigDialogDX::CreateGUIControls() sbDebuggingTools = new wxStaticBoxSizer( new wxStaticBox( m_PageAdvanced, wxID_ANY, wxT("Debugging tools") ), wxVERTICAL ); m_OverlayStats = new wxCheckBox( m_PageAdvanced, ID_OVERLAYSTATS, wxT("Overlay Some Statics"), wxDefaultPosition, wxDefaultSize, 0 ); m_ShaderErrors = new wxCheckBox( m_PageAdvanced, ID_SHADERERRORS, wxT("Show Shader Compilation Errors"), wxDefaultPosition, wxDefaultSize, 0 ); - m_ShaderErrors->Enable( false ); m_TexfmtOverlay = new wxCheckBox( m_PageAdvanced, ID_TEXFMT_OVERLAY, wxT("Enable TexFmt Overlay"), wxDefaultPosition, wxDefaultSize, 0 ); m_TexfmtCenter = new wxCheckBox( m_PageAdvanced, ID_TEXFMT_CENTER, wxT("Centered"), wxDefaultPosition, wxDefaultSize, 0 ); m_ProjStats = new wxCheckBox( m_PageAdvanced, wxID_ANY, wxT("Overlay Projection Stats"), wxDefaultPosition, wxDefaultSize, 0 ); diff --git a/Source/Plugins/Plugin_VideoDX9/Src/FramebufferManager.cpp b/Source/Plugins/Plugin_VideoDX9/Src/FramebufferManager.cpp index c9d215f386..4ea68869bc 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/FramebufferManager.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/FramebufferManager.cpp @@ -332,7 +332,6 @@ void FramebufferManager::copyToVirtualXFB(u32 xfbAddr, u32 fbWidth, u32 fbHeight --it; } - float SuperSampleCompensation = 1.0f; float scaleX = Renderer::GetXFBScaleX(); float scaleY = Renderer::GetXFBScaleY(); TargetRectangle targetSource,efbSource; diff --git a/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp b/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp index 7290061b6a..fbf6bb9e27 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp @@ -46,24 +46,26 @@ static std::set unique_shaders; static float lastPSconstants[C_COLORMATRIX+16][4]; -static LPDIRECT3DPIXELSHADER9 s_ColorMatrixProgram[3]; -static LPDIRECT3DPIXELSHADER9 s_ColorCopyProgram[3]; -static LPDIRECT3DPIXELSHADER9 s_DepthMatrixProgram[3]; +#define MAX_SSAA_SHADERS 3 + +static LPDIRECT3DPIXELSHADER9 s_ColorMatrixProgram[MAX_SSAA_SHADERS]; +static LPDIRECT3DPIXELSHADER9 s_ColorCopyProgram[MAX_SSAA_SHADERS]; +static LPDIRECT3DPIXELSHADER9 s_DepthMatrixProgram[MAX_SSAA_SHADERS]; static LPDIRECT3DPIXELSHADER9 s_ClearProgram = 0; LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetColorMatrixProgram(int SSAAMode) { - return s_ColorMatrixProgram[SSAAMode % 3]; + return s_ColorMatrixProgram[SSAAMode % MAX_SSAA_SHADERS]; } LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetDepthMatrixProgram(int SSAAMode) { - return s_DepthMatrixProgram[SSAAMode % 3]; + return s_DepthMatrixProgram[SSAAMode % MAX_SSAA_SHADERS]; } LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetColorCopyProgram(int SSAAMode) { - return s_ColorCopyProgram[SSAAMode % 3]; + return s_ColorCopyProgram[SSAAMode % MAX_SSAA_SHADERS]; } LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetClearProgram() @@ -134,34 +136,32 @@ void PixelShaderCache::Init() "in float2 uv0 : TEXCOORD0){\n" "ocol0 = tex2D(samp0,uv0);\n" "}\n"); - s_ColorCopyProgram[0] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); + s_ColorCopyProgram[0] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); - //4 Samples - sprintf(pprog, "uniform sampler samp0 : register(s0);\n" + //1 Samples SSAA + sprintf(pprog, "uniform sampler samp0 : register(s0);\n" "void main(\n" "out float4 ocol0 : COLOR0,\n" "in float4 uv0 : TEXCOORD0,\n" - "in float4 uv1 : TEXCOORD1,\n" - "in float4 uv2 : TEXCOORD2,\n" - "in float4 uv3 : TEXCOORD3,\n" - "in float4 uv4 : TEXCOORD4){\n" - "ocol0 = (tex2D(samp0,uv1.xy) + tex2D(samp0,uv2.xy) + tex2D(samp0,uv3.xy) + tex2D(samp0,uv4.xy))*0.25f;\n" + "in float4 uv1 : TEXCOORD1){\n" + "ocol0 = tex2D(samp0,uv0.xy);\n" "}\n"); s_ColorCopyProgram[1] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); - - //9 Samples + + //4 Samples SSAA sprintf(pprog, "uniform sampler samp0 : register(s0);\n" "void main(\n" "out float4 ocol0 : COLOR0,\n" "in float4 uv0 : TEXCOORD0,\n" "in float4 uv1 : TEXCOORD1,\n" "in float4 uv2 : TEXCOORD2,\n" - "in float4 uv3 : TEXCOORD3,\n" - "in float4 uv4 : TEXCOORD4){\n" - "ocol0 = (tex2D(samp0,uv1.xy) + tex2D(samp0,uv1.wz) + tex2D(samp0,uv2.xy) + tex2D(samp0,uv2.wz) + tex2D(samp0,uv3.xy) + tex2D(samp0,uv3.wz) + tex2D(samp0,uv4.xy) + tex2D(samp0,uv4.wz) + tex2D(samp0,uv0.xy))/9.0f;\n" + "in float4 uv3 : TEXCOORD3){\n" + "ocol0 = (tex2D(samp0,uv1.xy) + tex2D(samp0,uv1.wz) + tex2D(samp0,uv2.xy) + tex2D(samp0,uv2.wz))*0.25;\n" "}\n"); s_ColorCopyProgram[2] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); + + //Color conversion Programs //1 sample sprintf(pprog, "uniform sampler samp0 : register(s0);\n" @@ -172,25 +172,21 @@ void PixelShaderCache::Init() "float4 texcol = tex2D(samp0,uv0);\n" "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" "}\n",C_COLORMATRIX); - s_ColorMatrixProgram[0] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); + s_ColorMatrixProgram[0] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); - //4 samples - sprintf(pprog, "uniform sampler samp0 : register(s0);\n" + //1 samples SSAA + sprintf(pprog, "uniform sampler samp0 : register(s0);\n" "uniform float4 cColMatrix[5] : register(c%d);\n" "void main(\n" "out float4 ocol0 : COLOR0,\n" "in float4 uv0 : TEXCOORD0,\n" - "in float4 uv1 : TEXCOORD1,\n" - "in float4 uv2 : TEXCOORD2,\n" - "in float4 uv3 : TEXCOORD3,\n" - "in float4 uv4 : TEXCOORD4,\n" - "in float4 uv5 : TEXCOORD5){\n" - "float4 texcol = (tex2D(samp0,float2(clamp(uv1.x,uv5.x,uv5.z),clamp(uv1.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv2.x,uv5.x,uv5.z),clamp(uv2.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv3.x,uv5.x,uv5.z),clamp(uv3.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv4.x,uv5.x,uv5.z),clamp(uv4.y,uv5.y,uv5.w))))*0.25f;\n" + "in float4 uv1 : TEXCOORD1){\n" + "float4 texcol = tex2D(samp0,uv0.xy);\n" "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" "}\n",C_COLORMATRIX); s_ColorMatrixProgram[1] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); - //9 samples + //4 samples SSAA sprintf(pprog, "uniform sampler samp0 : register(s0);\n" "uniform float4 cColMatrix[5] : register(c%d);\n" "void main(\n" @@ -198,10 +194,8 @@ void PixelShaderCache::Init() "in float4 uv0 : TEXCOORD0,\n" "in float4 uv1 : TEXCOORD1,\n" "in float4 uv2 : TEXCOORD2,\n" - "in float4 uv3 : TEXCOORD3,\n" - "in float4 uv4 : TEXCOORD4,\n" - "in float4 uv5 : TEXCOORD5){\n" - "float4 texcol = (tex2D(samp0,float2(clamp(uv1.x,uv5.x,uv5.z),clamp(uv1.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv1.w,uv5.x,uv5.z),clamp(uv1.z,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv2.x,uv5.x,uv5.z),clamp(uv2.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv2.w,uv5.x,uv5.z),clamp(uv2.z,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv3.x,uv5.x,uv5.z),clamp(uv3.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv3.w,uv5.x,uv5.z),clamp(uv3.z,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv4.x,uv5.x,uv5.z),clamp(uv4.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv4.w,uv5.x,uv5.z),clamp(uv4.z,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv0.x,uv5.x,uv5.z),clamp(uv0.y,uv5.y,uv5.w))))/9;\n" + "in float4 uv3 : TEXCOORD3){\n" + "float4 texcol = (tex2D(samp0,uv1.xy) + tex2D(samp0,uv1.wz) + tex2D(samp0,uv2.xy) + tex2D(samp0,uv2.wz))*0.25f;\n" "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" "}\n",C_COLORMATRIX); s_ColorMatrixProgram[2] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); @@ -216,29 +210,25 @@ void PixelShaderCache::Init() "float4 texcol = tex2D(samp0,uv0);\n" "float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n" "texcol = float4((EncodedDepth.rgb * (16777216.0f/16777215.0f)),1.0f);\n" - "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" + "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" "}\n",C_COLORMATRIX); - s_DepthMatrixProgram[0] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); + s_DepthMatrixProgram[0] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); - //4 sample + //1 sample SSAA sprintf(pprog, "uniform sampler samp0 : register(s0);\n" "uniform float4 cColMatrix[5] : register(c%d);\n" "void main(\n" "out float4 ocol0 : COLOR0,\n" "in float4 uv0 : TEXCOORD0,\n" - "in float4 uv1 : TEXCOORD1,\n" - "in float4 uv2 : TEXCOORD2,\n" - "in float4 uv3 : TEXCOORD3,\n" - "in float4 uv4 : TEXCOORD4,\n" - "in float4 uv5 : TEXCOORD5){\n" - "float4 texcol = (tex2D(samp0,float2(clamp(uv1.x,uv5.x,uv5.z),clamp(uv1.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv2.x,uv5.x,uv5.z),clamp(uv2.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv3.x,uv5.x,uv5.z),clamp(uv3.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv4.x,uv5.x,uv5.z),clamp(uv4.y,uv5.y,uv5.w))))*0.25f;\n" + "in float4 uv1 : TEXCOORD1){\n" + "float4 texcol = tex2D(samp0,uv0.xy);\n" "float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n" "texcol = float4((EncodedDepth.rgb * (16777216.0f/16777215.0f)),1.0f);\n" "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" "}\n",C_COLORMATRIX); s_DepthMatrixProgram[1] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); - //9 sample + //4 sample SSAA sprintf(pprog, "uniform sampler samp0 : register(s0);\n" "uniform float4 cColMatrix[5] : register(c%d);\n" "void main(\n" @@ -246,10 +236,8 @@ void PixelShaderCache::Init() "in float4 uv0 : TEXCOORD0,\n" "in float4 uv1 : TEXCOORD1,\n" "in float4 uv2 : TEXCOORD2,\n" - "in float4 uv3 : TEXCOORD3,\n" - "in float4 uv4 : TEXCOORD4,\n" - "in float4 uv5 : TEXCOORD5){\n" - "float4 texcol = (tex2D(samp0,float2(clamp(uv1.x,uv5.x,uv5.z),clamp(uv1.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv1.w,uv5.x,uv5.z),clamp(uv1.z,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv2.x,uv5.x,uv5.z),clamp(uv2.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv2.w,uv5.x,uv5.z),clamp(uv2.z,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv3.x,uv5.x,uv5.z),clamp(uv3.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv3.w,uv5.x,uv5.z),clamp(uv3.z,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv4.x,uv5.x,uv5.z),clamp(uv4.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv4.w,uv5.x,uv5.z),clamp(uv4.z,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv0.x,uv5.x,uv5.z),clamp(uv0.y,uv5.y,uv5.w))))/9;\n" + "in float4 uv3 : TEXCOORD3){\n" + "float4 texcol = (tex2D(samp0,uv1.xy) + tex2D(samp0,uv1.wz) + tex2D(samp0,uv2.xy) + tex2D(samp0,uv2.wz))*0.25f;\n" "float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n" "texcol = float4((EncodedDepth.rgb * (16777216.0f/16777215.0f)),1.0f);\n" "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" @@ -285,7 +273,7 @@ void PixelShaderCache::Clear() void PixelShaderCache::Shutdown() { - for(int i = 0;i<3;i++) + for(int i = 0;i < MAX_SSAA_SHADERS; i++) { if (s_ColorMatrixProgram[i]) s_ColorMatrixProgram[i]->Release(); s_ColorMatrixProgram[i] = NULL; diff --git a/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp b/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp index 9d23773f80..7eee57410e 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp @@ -543,14 +543,17 @@ void Renderer::RenderToXFB(u32 xfbAddr, u32 fbWidth, u32 fbHeight, const EFBRect if(!fbWidth || !fbHeight) return; VideoFifo_CheckEFBAccess(); - VideoFifo_CheckSwapRequestAt(xfbAddr, fbWidth, fbHeight); - FBManager.CopyToXFB(xfbAddr, fbWidth, fbHeight, sourceRc); + VideoFifo_CheckSwapRequestAt(xfbAddr, fbWidth, fbHeight); XFBWrited = true; // XXX: Without the VI, how would we know what kind of field this is? So // just use progressive. - if (!g_ActiveConfig.bUseXFB) + if (g_ActiveConfig.bUseXFB) { - Renderer::Swap(xfbAddr, FIELD_PROGRESSIVE, fbWidth, fbHeight); + FBManager.CopyToXFB(xfbAddr, fbWidth, fbHeight, sourceRc); + } + else + { + Renderer::Swap(xfbAddr, FIELD_PROGRESSIVE, fbWidth, fbHeight,sourceRc); Common::AtomicStoreRelease(s_swapRequested, FALSE); } } @@ -953,7 +956,7 @@ void Renderer::SetBlendMode(bool forceUpdate) -void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight) +void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight,const EFBRectangle& rc) { if (g_bSkipCurrentFrame || (!XFBWrited && !g_ActiveConfig.bUseRealXFB) || !fbWidth || !fbHeight) { @@ -967,7 +970,7 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight) if (field == FIELD_LOWER) xfbAddr -= fbWidth * 2; u32 xfbCount = 0; const XFBSource** xfbSourceList = FBManager.GetXFBSource(xfbAddr, fbWidth, fbHeight, xfbCount); - if (!xfbSourceList || xfbCount == 0) + if ((!xfbSourceList || xfbCount == 0) && g_ActiveConfig.bUseXFB && !g_ActiveConfig.bUseRealXFB) { g_VideoInitialize.pCopiedToXFB(false); return; @@ -1015,57 +1018,65 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight) D3D::ChangeSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR); D3D::ChangeSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR); - const XFBSource* xfbSource; - - // draw each xfb source - for (u32 i = 0; i < xfbCount; ++i) + if(g_ActiveConfig.bUseXFB) { - xfbSource = xfbSourceList[i]; - MathUtil::Rectangle sourceRc; - - sourceRc.left = 0; - sourceRc.top = 0; - sourceRc.right = xfbSource->texWidth; - sourceRc.bottom = xfbSource->texHeight; + const XFBSource* xfbSource; - MathUtil::Rectangle drawRc; - - if (g_ActiveConfig.bUseXFB && !g_ActiveConfig.bUseRealXFB) + // draw each xfb source + for (u32 i = 0; i < xfbCount; ++i) { - // use virtual xfb with offset - int xfbHeight = xfbSource->srcHeight; - int xfbWidth = xfbSource->srcWidth; - int hOffset = ((s32)xfbSource->srcAddr - (s32)xfbAddr) / ((s32)fbWidth * 2); - - drawRc.bottom = 1.0f - 2.0f * ((hOffset) / (float)fbHeight); - drawRc.top = 1.0f - 2.0f * ((hOffset + xfbHeight) / (float)fbHeight); - drawRc.left = -(xfbWidth / (float)fbWidth); - drawRc.right = (xfbWidth / (float)fbWidth); + xfbSource = xfbSourceList[i]; + MathUtil::Rectangle sourceRc; + sourceRc.left = 0; + sourceRc.top = 0; + sourceRc.right = xfbSource->texWidth; + sourceRc.bottom = xfbSource->texHeight; - if (!g_ActiveConfig.bAutoScale) + MathUtil::Rectangle drawRc; + + if (g_ActiveConfig.bUseXFB && !g_ActiveConfig.bUseRealXFB) { - // scale draw area for a 1 to 1 pixel mapping with the draw target - float vScale = (float)fbHeight / (float)s_backbuffer_height; - float hScale = (float)fbWidth / (float)s_backbuffer_width; + // use virtual xfb with offset + int xfbHeight = xfbSource->srcHeight; + int xfbWidth = xfbSource->srcWidth; + int hOffset = ((s32)xfbSource->srcAddr - (s32)xfbAddr) / ((s32)fbWidth * 2); - drawRc.top *= vScale; - drawRc.bottom *= vScale; - drawRc.left *= hScale; - drawRc.right *= hScale; + drawRc.bottom = 1.0f - 2.0f * ((hOffset) / (float)fbHeight); + drawRc.top = 1.0f - 2.0f * ((hOffset + xfbHeight) / (float)fbHeight); + drawRc.left = -(xfbWidth / (float)fbWidth); + drawRc.right = (xfbWidth / (float)fbWidth); + + + if (!g_ActiveConfig.bAutoScale) + { + // scale draw area for a 1 to 1 pixel mapping with the draw target + float vScale = (float)fbHeight / (float)s_backbuffer_height; + float hScale = (float)fbWidth / (float)s_backbuffer_width; + + drawRc.top *= vScale; + drawRc.bottom *= vScale; + drawRc.left *= hScale; + drawRc.right *= hScale; + } + } + else + { + drawRc.top = -1; + drawRc.bottom = 1; + drawRc.left = -1; + drawRc.right = 1; } - } - else - { - drawRc.top = -1; - drawRc.bottom = 1; - drawRc.left = -1; - drawRc.right = 1; - } - D3D::drawShadedTexSubQuad(xfbSource->texture,&sourceRc,xfbSource->texWidth,xfbSource->texHeight,&drawRc,Width,Height,PixelShaderCache::GetColorCopyProgram(0),VertexShaderCache::GetSimpleVertexShader(0)); + D3D::drawShadedTexSubQuad(xfbSource->texture,&sourceRc,xfbSource->texWidth,xfbSource->texHeight,&drawRc,Width,Height,PixelShaderCache::GetColorCopyProgram(0),VertexShaderCache::GetSimpleVertexShader(0)); + } + } + else + { + TargetRectangle targetRc = Renderer::ConvertEFBRectangle(rc); + LPDIRECT3DTEXTURE9 read_texture = FBManager.GetEFBColorTexture(rc); + D3D::drawShadedTexQuad(read_texture,targetRc.AsRECT(),Renderer::GetFullTargetWidth(),Renderer::GetFullTargetHeight(),Width,Height,PixelShaderCache::GetColorCopyProgram(g_Config.iMultisampleMode),VertexShaderCache::GetSimpleVertexShader(g_Config.iMultisampleMode)); } - D3D::RefreshSamplerState(0, D3DSAMP_MINFILTER); D3D::RefreshSamplerState(0, D3DSAMP_MAGFILTER); vp.X = 0; @@ -1203,7 +1214,7 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight) yScale = (float)(dst_rect.bottom - dst_rect.top) / (float)s_XFB_height; } - float SupersampleCoeficient = s_LastAA + 1; + float SupersampleCoeficient = s_LastAA + 1; switch(s_LastEFBScale) { case 0: @@ -1373,7 +1384,8 @@ void Renderer::SetSamplerState(int stage, int texindex) D3D::SetSamplerState(stage, D3DSAMP_ADDRESSU, d3dClamps[tm0.wrap_s]); D3D::SetSamplerState(stage, D3DSAMP_ADDRESSV, d3dClamps[tm0.wrap_t]); - float lodbias = tm0.lod_bias / 32.0f; + //float SuperSampleCoeficient = (s_LastAA < 3)? s_LastAA + 1 : s_LastAA - 1;// uncoment this changes to conserve detail when incresing ssaa level + float lodbias = (tm0.lod_bias / 32.0f);// + (s_LastAA)?(log(SuperSampleCoeficient) / log(2.0f)):0; D3D::SetSamplerState(stage,D3DSAMP_MIPMAPLODBIAS,*(DWORD*)&lodbias); D3D::SetSamplerState(stage,D3DSAMP_MAXMIPLEVEL,tm1.min_lod>>4); } diff --git a/Source/Plugins/Plugin_VideoDX9/Src/TextureCache.cpp b/Source/Plugins/Plugin_VideoDX9/Src/TextureCache.cpp index 49ca8f3cdf..63e0e995e3 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/TextureCache.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/TextureCache.cpp @@ -551,20 +551,29 @@ void TextureCache::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, boo sourcerect.right = targetSource.right; sourcerect.top = targetSource.top; - if(bScaleByHalf) + + if(bFromZBuffer) + { + if(bScaleByHalf || g_ActiveConfig.iMultisampleMode) + { + D3D::ChangeSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR); + D3D::ChangeSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR); + } + else + { + D3D::ChangeSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT); + D3D::ChangeSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_POINT); + } + } + else { D3D::ChangeSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR); D3D::ChangeSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR); } - else - { - D3D::ChangeSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT); - D3D::ChangeSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_POINT); - } D3DFORMAT bformat = FBManager.GetEFBDepthRTSurfaceFormat(); - int SSAAMode = ( g_ActiveConfig.iMultisampleMode > 3 )? 0 : g_ActiveConfig.iMultisampleMode; + int SSAAMode = g_ActiveConfig.iMultisampleMode; D3D::drawShadedTexQuad( read_texture, &sourcerect, diff --git a/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.cpp b/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.cpp index e062fb270a..60bd7b2f18 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.cpp @@ -38,15 +38,16 @@ VertexShaderCache::VSCache VertexShaderCache::vshaders; const VertexShaderCache::VSCacheEntry *VertexShaderCache::last_entry; static float GC_ALIGNED16(lastVSconstants[C_FOGPARAMS + 8][4]); +#define MAX_SSAA_SHADERS 3 -static LPDIRECT3DVERTEXSHADER9 SimpleVertexShader[3]; +static LPDIRECT3DVERTEXSHADER9 SimpleVertexShader[MAX_SSAA_SHADERS]; static LPDIRECT3DVERTEXSHADER9 ClearVertexShader; LinearDiskCache g_vs_disk_cache; LPDIRECT3DVERTEXSHADER9 VertexShaderCache::GetSimpleVertexShader(int level) { - return SimpleVertexShader[level % 3]; + return SimpleVertexShader[level % MAX_SSAA_SHADERS]; } LPDIRECT3DVERTEXSHADER9 VertexShaderCache::GetClearVertexShader() @@ -163,22 +164,14 @@ void VertexShaderCache::Init() "{\n" "float4 vPosition : POSITION;\n" "float4 vTexCoord : TEXCOORD0;\n" - "float4 vTexCoord1 : TEXCOORD1;\n" - "float4 vTexCoord2 : TEXCOORD2;\n" - "float4 vTexCoord3 : TEXCOORD3;\n" - "float4 vTexCoord4 : TEXCOORD4;\n" - "float4 vTexCoord5 : TEXCOORD5;\n" + "float4 vTexCoord1 : TEXCOORD1;\n" "};\n" "VSOUTPUT main(float4 inPosition : POSITION,float2 inTEX0 : TEXCOORD0,float2 inTEX1 : TEXCOORD1,float4 inTEX2 : TEXCOORD2)\n" "{\n" "VSOUTPUT OUT;" "OUT.vPosition = inPosition;\n" "OUT.vTexCoord = inTEX0.xyyx;\n" - "OUT.vTexCoord1 = inTEX0.xyyx + (float4(-0.5f,-0.5f,-0.5f,-0.5f) * inTEX1.xyyx);\n" - "OUT.vTexCoord2 = inTEX0.xyyx + (float4(-0.5f, 0.5f, 0.5f,-0.5f) * inTEX1.xyyx);\n" - "OUT.vTexCoord3 = inTEX0.xyyx + (float4( 0.5f,-0.5f,-0.5f, 0.5f) * inTEX1.xyyx);\n" - "OUT.vTexCoord4 = inTEX0.xyyx + (float4( 0.5f, 0.5f, 0.5f, 0.5f) * inTEX1.xyyx);\n" - "OUT.vTexCoord5 = inTEX2;\n" + "OUT.vTexCoord1 = inTEX2;\n" "return OUT;\n" "}\n"); SimpleVertexShader[1] = D3D::CompileAndCreateVertexShader(vProg, (int)strlen(vProg)); @@ -189,20 +182,16 @@ void VertexShaderCache::Init() "float4 vTexCoord : TEXCOORD0;\n" "float4 vTexCoord1 : TEXCOORD1;\n" "float4 vTexCoord2 : TEXCOORD2;\n" - "float4 vTexCoord3 : TEXCOORD3;\n" - "float4 vTexCoord4 : TEXCOORD4;\n" - "float4 vTexCoord5 : TEXCOORD5;\n" + "float4 vTexCoord3 : TEXCOORD3;\n" "};\n" "VSOUTPUT main(float4 inPosition : POSITION,float2 inTEX0 : TEXCOORD0,float2 inTEX1 : TEXCOORD1,float4 inTEX2 : TEXCOORD2)\n" "{\n" "VSOUTPUT OUT;" "OUT.vPosition = inPosition;\n" "OUT.vTexCoord = inTEX0.xyyx;\n" - "OUT.vTexCoord1 = inTEX0.xyyx + (float4(-1.0f,-1.0f,-1.0f, 1.0f) * inTEX1.xyyx);\n" - "OUT.vTexCoord2 = inTEX0.xyyx + (float4(-1.0f, 0.0f, 0.0f, 1.0f) * inTEX1.xyyx);\n" - "OUT.vTexCoord3 = inTEX0.xyyx + (float4(-1.0f, 1.0f, 1.0f, 1.0f) * inTEX1.xyyx);\n" - "OUT.vTexCoord4 = inTEX0.xyyx + (float4( 0.0f, 1.0f,-1.0f, 0.0f) * inTEX1.xyyx);\n" - "OUT.vTexCoord5 = inTEX2;\n" + "OUT.vTexCoord1 = inTEX0.xyyx + (float4(-1.0f,-0.5f, 1.0f,-0.5f) * inTEX1.xyyx);\n" + "OUT.vTexCoord2 = inTEX0.xyyx + (float4( 1.0f, 0.5f,-1.0f, 0.5f) * inTEX1.xyyx);\n" + "OUT.vTexCoord3 = inTEX2;\n" "return OUT;\n" "}\n"); SimpleVertexShader[2] = D3D::CompileAndCreateVertexShader(vProg, (int)strlen(vProg)); @@ -236,7 +225,7 @@ void VertexShaderCache::Clear() void VertexShaderCache::Shutdown() { - for (int i = 0; i < 3; i++) + for (int i = 0; i < MAX_SSAA_SHADERS; i++) { if (SimpleVertexShader[i]) SimpleVertexShader[i]->Release(); diff --git a/Source/Plugins/Plugin_VideoDX9/Src/main.cpp b/Source/Plugins/Plugin_VideoDX9/Src/main.cpp index d460b27154..b99dd1fbec 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/main.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/main.cpp @@ -347,7 +347,8 @@ void VideoFifo_CheckSwapRequest() { if (Common::AtomicLoadAcquire(s_swapRequested)) { - Renderer::Swap(s_beginFieldArgs.xfbAddr, s_beginFieldArgs.field, s_beginFieldArgs.fbWidth, s_beginFieldArgs.fbHeight); + EFBRectangle rc; + Renderer::Swap(s_beginFieldArgs.xfbAddr, s_beginFieldArgs.field, s_beginFieldArgs.fbWidth, s_beginFieldArgs.fbHeight,rc); Common::AtomicStoreRelease(s_swapRequested, FALSE); } } diff --git a/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp b/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp index ce0c3132e3..0962004496 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp @@ -824,19 +824,22 @@ void Renderer::RenderToXFB(u32 xfbAddr, u32 fbWidth, u32 fbHeight, const EFBRect s_skipSwap = g_bSkipCurrentFrame; VideoFifo_CheckEFBAccess(); VideoFifo_CheckSwapRequestAt(xfbAddr, fbWidth, fbHeight); - g_framebufferManager.CopyToXFB(xfbAddr, fbWidth, fbHeight, sourceRc); XFBWrited = true; // XXX: Without the VI, how would we know what kind of field this is? So // just use progressive. - if (!g_ActiveConfig.bUseXFB) + if (g_ActiveConfig.bUseXFB) { - Renderer::Swap(xfbAddr, FIELD_PROGRESSIVE, fbWidth, fbHeight); + g_framebufferManager.CopyToXFB(xfbAddr, fbWidth, fbHeight, sourceRc); + } + else + { + Renderer::Swap(xfbAddr, FIELD_PROGRESSIVE, fbWidth, fbHeight,sourceRc); Common::AtomicStoreRelease(s_swapRequested, FALSE); } } // This function has the final picture. We adjust the aspect ratio here. -void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight) +void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight,const EFBRectangle& Rc) { if (g_bSkipCurrentFrame || (!XFBWrited && !g_ActiveConfig.bUseRealXFB) || !fbWidth || !fbHeight) { @@ -846,7 +849,7 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight) if (field == FIELD_LOWER) xfbAddr -= fbWidth * 2; u32 xfbCount = 0; const XFBSource** xfbSourceList = g_framebufferManager.GetXFBSource(xfbAddr, fbWidth, fbHeight, xfbCount); - if (!xfbSourceList) + if ((!xfbSourceList || xfbCount == 0) && g_ActiveConfig.bUseXFB && !g_ActiveConfig.bUseRealXFB) { g_VideoInitialize.pCopiedToXFB(false); WARN_LOG(VIDEO, "Failed to get video for this frame"); @@ -892,90 +895,119 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight) const XFBSource* xfbSource = NULL; - // draw each xfb source - for (u32 i = 0; i < xfbCount; ++i) + if(g_ActiveConfig.bUseXFB) { - xfbSource = xfbSourceList[i]; - - TargetRectangle sourceRc; - - if (g_ActiveConfig.bAutoScale || g_ActiveConfig.bUseXFB) + // draw each xfb source + for (u32 i = 0; i < xfbCount; ++i) { - sourceRc = xfbSource->sourceRc; - } - else - { - sourceRc.left = 0; - sourceRc.top = xfbSource->texHeight; - sourceRc.right = xfbSource->texWidth; - sourceRc.bottom = 0; - } + xfbSource = xfbSourceList[i]; - MathUtil::Rectangle drawRc; + TargetRectangle sourceRc; - if (g_ActiveConfig.bUseXFB && !g_ActiveConfig.bUseRealXFB) - { - // use virtual xfb with offset - int xfbHeight = xfbSource->srcHeight; - int xfbWidth = xfbSource->srcWidth; - int hOffset = ((s32)xfbSource->srcAddr - (s32)xfbAddr) / ((s32)fbWidth * 2); - - drawRc.top = 1.0f - (2.0f * (hOffset) / (float)fbHeight); - drawRc.bottom = 1.0f - (2.0f * (hOffset + xfbHeight) / (float)fbHeight); - drawRc.left = -(xfbWidth / (float)fbWidth); - drawRc.right = (xfbWidth / (float)fbWidth); - - if (!g_ActiveConfig.bAutoScale) + if (g_ActiveConfig.bAutoScale || g_ActiveConfig.bUseXFB) { - // scale draw area for a 1 to 1 pixel mapping with the draw target - float vScale = (float)fbHeight / (float)back_rc.GetHeight(); - float hScale = (float)fbWidth / (float)back_rc.GetWidth(); - - drawRc.top *= vScale; - drawRc.bottom *= vScale; - drawRc.left *= hScale; - drawRc.right *= hScale; + sourceRc = xfbSource->sourceRc; + } + else + { + sourceRc.left = 0; + sourceRc.top = xfbSource->texHeight; + sourceRc.right = xfbSource->texWidth; + sourceRc.bottom = 0; } - } - else - { - drawRc.top = 1; - drawRc.bottom = -1; - drawRc.left = -1; - drawRc.right = 1; - } - - // Tell the OSD Menu about the current internal resolution - OSDInternalW = xfbSource->sourceRc.GetWidth(); OSDInternalH = xfbSource->sourceRc.GetHeight(); - // Texture map xfbSource->texture onto the main buffer - glBindTexture(GL_TEXTURE_RECTANGLE_ARB, xfbSource->texture); + MathUtil::Rectangle drawRc; - // We must call ApplyShader here even if no post proc is selected - it takes - // care of disabling it in that case. It returns false in case of no post processing. + if (g_ActiveConfig.bUseXFB && !g_ActiveConfig.bUseRealXFB) + { + // use virtual xfb with offset + int xfbHeight = xfbSource->srcHeight; + int xfbWidth = xfbSource->srcWidth; + int hOffset = ((s32)xfbSource->srcAddr - (s32)xfbAddr) / ((s32)fbWidth * 2); + + drawRc.top = 1.0f - (2.0f * (hOffset) / (float)fbHeight); + drawRc.bottom = 1.0f - (2.0f * (hOffset + xfbHeight) / (float)fbHeight); + drawRc.left = -(xfbWidth / (float)fbWidth); + drawRc.right = (xfbWidth / (float)fbWidth); + + if (!g_ActiveConfig.bAutoScale) + { + // scale draw area for a 1 to 1 pixel mapping with the draw target + float vScale = (float)fbHeight / (float)back_rc.GetHeight(); + float hScale = (float)fbWidth / (float)back_rc.GetWidth(); + + drawRc.top *= vScale; + drawRc.bottom *= vScale; + drawRc.left *= hScale; + drawRc.right *= hScale; + } + } + else + { + drawRc.top = 1; + drawRc.bottom = -1; + drawRc.left = -1; + drawRc.right = 1; + } + + // Tell the OSD Menu about the current internal resolution + OSDInternalW = xfbSource->sourceRc.GetWidth(); OSDInternalH = xfbSource->sourceRc.GetHeight(); + + // Texture map xfbSource->texture onto the main buffer + glBindTexture(GL_TEXTURE_RECTANGLE_ARB, xfbSource->texture); + + // We must call ApplyShader here even if no post proc is selected - it takes + // care of disabling it in that case. It returns false in case of no post processing. + if (applyShader) + { + glBegin(GL_QUADS); + glTexCoord2f(sourceRc.left, sourceRc.bottom); glMultiTexCoord2fARB(GL_TEXTURE1, 0, 0); glVertex2f(drawRc.left, drawRc.bottom); + glTexCoord2f(sourceRc.left, sourceRc.top); glMultiTexCoord2fARB(GL_TEXTURE1, 0, 1); glVertex2f(drawRc.left, drawRc.top); + glTexCoord2f(sourceRc.right, sourceRc.top); glMultiTexCoord2fARB(GL_TEXTURE1, 1, 1); glVertex2f(drawRc.right, drawRc.top); + glTexCoord2f(sourceRc.right, sourceRc.bottom); glMultiTexCoord2fARB(GL_TEXTURE1, 1, 0); glVertex2f(drawRc.right, drawRc.bottom); + glEnd(); + PixelShaderCache::DisableShader(); + } + else + { + glBegin(GL_QUADS); + glTexCoord2f(sourceRc.left, sourceRc.bottom); glVertex2f(drawRc.left, drawRc.bottom); + glTexCoord2f(sourceRc.left, sourceRc.top); glVertex2f(drawRc.left, drawRc.top); + glTexCoord2f(sourceRc.right, sourceRc.top); glVertex2f(drawRc.right, drawRc.top); + glTexCoord2f(sourceRc.right, sourceRc.bottom); glVertex2f(drawRc.right, drawRc.bottom); + glEnd(); + } + + GL_REPORT_ERRORD(); + } + } + else + { + TargetRectangle targetRc = Renderer::ConvertEFBRectangle(Rc); + GLuint read_texture = g_framebufferManager.ResolveAndGetRenderTarget(Rc); + glBindTexture(GL_TEXTURE_RECTANGLE_ARB, read_texture); if (applyShader) { glBegin(GL_QUADS); - glTexCoord2f(sourceRc.left, sourceRc.bottom); glMultiTexCoord2fARB(GL_TEXTURE1, 0, 0); glVertex2f(drawRc.left, drawRc.bottom); - glTexCoord2f(sourceRc.left, sourceRc.top); glMultiTexCoord2fARB(GL_TEXTURE1, 0, 1); glVertex2f(drawRc.left, drawRc.top); - glTexCoord2f(sourceRc.right, sourceRc.top); glMultiTexCoord2fARB(GL_TEXTURE1, 1, 1); glVertex2f(drawRc.right, drawRc.top); - glTexCoord2f(sourceRc.right, sourceRc.bottom); glMultiTexCoord2fARB(GL_TEXTURE1, 1, 0); glVertex2f(drawRc.right, drawRc.bottom); + glTexCoord2f(targetRc.left, targetRc.bottom); glMultiTexCoord2fARB(GL_TEXTURE1, 0, 0); glVertex2f(-1, -1); + glTexCoord2f(targetRc.left, targetRc.top); glMultiTexCoord2fARB(GL_TEXTURE1, 0, 1); glVertex2f(-1, 1); + glTexCoord2f(targetRc.right, targetRc.top); glMultiTexCoord2fARB(GL_TEXTURE1, 1, 1); glVertex2f( 1, 1); + glTexCoord2f(targetRc.right, targetRc.bottom); glMultiTexCoord2fARB(GL_TEXTURE1, 1, 0); glVertex2f( 1, -1); glEnd(); PixelShaderCache::DisableShader(); } else { glBegin(GL_QUADS); - glTexCoord2f(sourceRc.left, sourceRc.bottom); glVertex2f(drawRc.left, drawRc.bottom); - glTexCoord2f(sourceRc.left, sourceRc.top); glVertex2f(drawRc.left, drawRc.top); - glTexCoord2f(sourceRc.right, sourceRc.top); glVertex2f(drawRc.right, drawRc.top); - glTexCoord2f(sourceRc.right, sourceRc.bottom); glVertex2f(drawRc.right, drawRc.bottom); + glTexCoord2f(targetRc.left, targetRc.bottom); glVertex2f(-1, -1); + glTexCoord2f(targetRc.left, targetRc.top); glVertex2f(-1, 1); + glTexCoord2f(targetRc.right, targetRc.top); glVertex2f( 1, 1); + glTexCoord2f(targetRc.right, targetRc.bottom); glVertex2f( 1, -1); glEnd(); } - - GL_REPORT_ERRORD(); + + } - glBindTexture(GL_TEXTURE_RECTANGLE_ARB, 0); TextureMngr::DisableStage(0); diff --git a/Source/Plugins/Plugin_VideoOGL/Src/main.cpp b/Source/Plugins/Plugin_VideoOGL/Src/main.cpp index e291a8803e..3f0d18b023 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/main.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/main.cpp @@ -370,7 +370,8 @@ void VideoFifo_CheckSwapRequest() { if (Common::AtomicLoadAcquire(s_swapRequested)) { - Renderer::Swap(s_beginFieldArgs.xfbAddr, s_beginFieldArgs.field, s_beginFieldArgs.fbWidth, s_beginFieldArgs.fbHeight); + EFBRectangle rc; + Renderer::Swap(s_beginFieldArgs.xfbAddr, s_beginFieldArgs.field, s_beginFieldArgs.fbWidth, s_beginFieldArgs.fbHeight,rc); Common::AtomicStoreRelease(s_swapRequested, FALSE); } }