diff --git a/Source/Core/VideoBackends/OGL/BoundingBox.cpp b/Source/Core/VideoBackends/OGL/BoundingBox.cpp index 6bce10754e..44ba0ab118 100644 --- a/Source/Core/VideoBackends/OGL/BoundingBox.cpp +++ b/Source/Core/VideoBackends/OGL/BoundingBox.cpp @@ -2,20 +2,45 @@ // Licensed under GPLv2+ // Refer to the license.txt file included. +#include +#include #include #include "Common/GL/GLUtil.h" #include "VideoBackends/OGL/BoundingBox.h" +#include "VideoBackends/OGL/FramebufferManager.h" #include "VideoCommon/DriverDetails.h" #include "VideoCommon/VideoConfig.h" static GLuint s_bbox_buffer_id; +static GLuint s_pbo; + +static std::array s_stencil_bounds; +static bool s_stencil_updated; +static bool s_stencil_cleared; + +static int s_target_width; +static int s_target_height; namespace OGL { -void BoundingBox::Init() +void BoundingBox::SetTargetSizeChanged(int target_width, int target_height) +{ + if (g_ActiveConfig.backend_info.bSupportsFragmentStoresAndAtomics) + return; + + s_target_width = target_width; + s_target_height = target_height; + s_stencil_updated = false; + + glBindBuffer(GL_PIXEL_PACK_BUFFER, s_pbo); + glBufferData(GL_PIXEL_PACK_BUFFER, s_target_width * s_target_height, nullptr, GL_STREAM_READ); + glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); +} + +void BoundingBox::Init(int target_width, int target_height) { if (g_ActiveConfig.backend_info.bSupportsFragmentStoresAndAtomics) { @@ -25,6 +50,12 @@ void BoundingBox::Init() glBufferData(GL_SHADER_STORAGE_BUFFER, 4 * sizeof(s32), initial_values, GL_DYNAMIC_DRAW); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, s_bbox_buffer_id); } + else + { + s_stencil_bounds = {{0, 0, 0, 0}}; + glGenBuffers(1, &s_pbo); + SetTargetSizeChanged(target_width, target_height); + } } void BoundingBox::Shutdown() @@ -33,40 +64,107 @@ void BoundingBox::Shutdown() { glDeleteBuffers(1, &s_bbox_buffer_id); } + else + { + glDeleteBuffers(1, &s_pbo); + } } void BoundingBox::Set(int index, int value) { - glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id); - glBufferSubData(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), &value); - glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); + if (g_ActiveConfig.backend_info.bSupportsFragmentStoresAndAtomics) + { + glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id); + glBufferSubData(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), &value); + glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); + } + else + { + s_stencil_bounds[index] = value; + + if (!s_stencil_cleared) + { + // Assumes that the EFB framebuffer is currently bound + glClearStencil(0); + glClear(GL_STENCIL_BUFFER_BIT); + s_stencil_updated = false; + s_stencil_cleared = true; + } + } } int BoundingBox::Get(int index) { - int data = 0; - glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id); - - if (!DriverDetails::HasBug(DriverDetails::BUG_SLOW_GETBUFFERSUBDATA)) + if (g_ActiveConfig.backend_info.bSupportsFragmentStoresAndAtomics) { - // Using glMapBufferRange to read back the contents of the SSBO is extremely slow - // on nVidia drivers. This is more noticeable at higher internal resolutions. - // Using glGetBufferSubData instead does not seem to exhibit this slowdown. - glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), &data); + int data = 0; + glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id); + if (!DriverDetails::HasBug(DriverDetails::BUG_SLOW_GETBUFFERSUBDATA)) + { + // Using glMapBufferRange to read back the contents of the SSBO is extremely slow + // on nVidia drivers. This is more noticeable at higher internal resolutions. + // Using glGetBufferSubData instead does not seem to exhibit this slowdown. + glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), &data); + } + else + { + // Using glMapBufferRange is faster on AMD cards by a measurable margin. + void* ptr = glMapBufferRange(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), + GL_MAP_READ_BIT); + if (ptr) + { + memcpy(&data, ptr, sizeof(int)); + glUnmapBuffer(GL_SHADER_STORAGE_BUFFER); + } + } + glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); + return data; } else { - // Using glMapBufferRange is faster on AMD cards by a measurable margin. - void* ptr = glMapBufferRange(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), - GL_MAP_READ_BIT); - if (ptr) + if (s_stencil_updated) { - memcpy(&data, ptr, sizeof(int)); - glUnmapBuffer(GL_SHADER_STORAGE_BUFFER); - } - } + s_stencil_updated = false; - glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); - return data; + FramebufferManager::ResolveEFBStencilTexture(); + glBindFramebuffer(GL_READ_FRAMEBUFFER, FramebufferManager::GetResolvedFramebuffer()); + glBindBuffer(GL_PIXEL_PACK_BUFFER, s_pbo); + glPixelStorei(GL_PACK_ALIGNMENT, 1); + glReadPixels(0, 0, s_target_width, s_target_height, GL_STENCIL_INDEX, GL_UNSIGNED_BYTE, 0); + glBindFramebuffer(GL_READ_FRAMEBUFFER, FramebufferManager::GetEFBFramebuffer()); + + // Eke every bit of performance out of the compiler that we can + std::array bounds = s_stencil_bounds; + + u8* data = static_cast(glMapBufferRange( + GL_PIXEL_PACK_BUFFER, 0, s_target_height * s_target_width, GL_MAP_READ_BIT)); + + for (int row = 0; row < s_target_height; row++) + { + for (int col = 0; col < s_target_width; col++) + { + if (data[row * s_target_width + col] == 0) + continue; + bounds[0] = std::min(bounds[0], col); + bounds[1] = std::max(bounds[1], col); + bounds[2] = std::min(bounds[2], row); + bounds[3] = std::max(bounds[3], row); + } + } + + s_stencil_bounds = bounds; + + glUnmapBuffer(GL_PIXEL_PACK_BUFFER); + glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); + } + + return s_stencil_bounds[index]; + } +} + +void BoundingBox::StencilWasUpdated() +{ + s_stencil_updated = true; + s_stencil_cleared = false; } }; diff --git a/Source/Core/VideoBackends/OGL/BoundingBox.h b/Source/Core/VideoBackends/OGL/BoundingBox.h index 0aedff54df..033ea56cd5 100644 --- a/Source/Core/VideoBackends/OGL/BoundingBox.h +++ b/Source/Core/VideoBackends/OGL/BoundingBox.h @@ -9,9 +9,16 @@ namespace OGL class BoundingBox { public: - static void Init(); + static void Init(int target_width, int target_height); static void Shutdown(); + static void SetTargetSizeChanged(int target_width, int target_height); + + // When SSBO isn't available, the bounding box is calculated directly from the + // stencil buffer. When the stencil buffer is changed, this function needs to + // be called to invalidate the cached bounding box data. + static void StencilWasUpdated(); + static void Set(int index, int value); static int Get(int index); }; diff --git a/Source/Core/VideoBackends/OGL/FramebufferManager.cpp b/Source/Core/VideoBackends/OGL/FramebufferManager.cpp index 001cbb59fc..13b6759a01 100644 --- a/Source/Core/VideoBackends/OGL/FramebufferManager.cpp +++ b/Source/Core/VideoBackends/OGL/FramebufferManager.cpp @@ -152,12 +152,13 @@ FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int ms m_resolvedColorTexture = CreateTexture(resolvedType, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE); m_resolvedDepthTexture = - CreateTexture(resolvedType, GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT); + CreateTexture(resolvedType, GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV); // Bind resolved textures to resolved framebuffer. glGenFramebuffers(m_EFBLayers, m_resolvedFramebuffer.data()); BindLayeredTexture(m_resolvedColorTexture, m_resolvedFramebuffer, GL_COLOR_ATTACHMENT0, resolvedType); BindLayeredTexture(m_resolvedDepthTexture, m_resolvedFramebuffer, GL_DEPTH_ATTACHMENT, resolvedType); + BindLayeredTexture(m_resolvedDepthTexture, m_resolvedFramebuffer, GL_STENCIL_ATTACHMENT, resolvedType); } m_efbColor = CreateTexture(m_textureType, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE); @@ -172,13 +173,15 @@ FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int ms glGenFramebuffers(m_EFBLayers, m_efbFramebuffer.data()); BindLayeredTexture(m_efbColor, m_efbFramebuffer, GL_COLOR_ATTACHMENT0, m_textureType); BindLayeredTexture(m_efbDepth, m_efbFramebuffer, GL_DEPTH_ATTACHMENT, m_textureType); + BindLayeredTexture(m_efbDepth, m_efbFramebuffer, GL_STENCIL_ATTACHMENT, m_textureType); // EFB framebuffer is currently bound, make sure to clear it before use. glViewport(0, 0, m_targetWidth, m_targetHeight); glScissor(0, 0, m_targetWidth, m_targetHeight); glClearColor(0.f, 0.f, 0.f, 0.f); glClearDepthf(1.0f); - glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + glClearStencil(0); + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); // reinterpret pixel format const char* vs = m_EFBLayers > 1 ? "void main(void) {\n" @@ -478,6 +481,24 @@ GLuint FramebufferManager::GetEFBDepthTexture(const EFBRectangle& sourceRc) } } +void FramebufferManager::ResolveEFBStencilTexture() +{ + if (m_msaaSamples <= 1) + return; + + // Resolve. + for (unsigned int i = 0; i < m_EFBLayers; i++) + { + glBindFramebuffer(GL_READ_FRAMEBUFFER, m_efbFramebuffer[i]); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_resolvedFramebuffer[i]); + glBlitFramebuffer(0, 0, m_targetWidth, m_targetHeight, 0, 0, m_targetWidth, m_targetHeight, + GL_STENCIL_BUFFER_BIT, GL_NEAREST); + } + + // Return to EFB. + glBindFramebuffer(GL_FRAMEBUFFER, m_efbFramebuffer[0]); +} + void FramebufferManager::CopyToRealXFB(u32 xfbAddr, u32 fbStride, u32 fbHeight, const EFBRectangle& sourceRc, float Gamma) { @@ -493,6 +514,13 @@ void FramebufferManager::CopyToRealXFB(u32 xfbAddr, u32 fbStride, u32 fbHeight, sourceRc.GetWidth(), fbStride, fbHeight); } +GLuint FramebufferManager::GetResolvedFramebuffer() +{ + if (m_msaaSamples <= 1) + return m_efbFramebuffer[0]; + return m_resolvedFramebuffer[0]; +} + void FramebufferManager::SetFramebuffer(GLuint fb) { glBindFramebuffer(GL_FRAMEBUFFER, fb != 0 ? fb : GetEFBFramebuffer()); diff --git a/Source/Core/VideoBackends/OGL/FramebufferManager.h b/Source/Core/VideoBackends/OGL/FramebufferManager.h index 54dd93936b..93f8e5bad3 100644 --- a/Source/Core/VideoBackends/OGL/FramebufferManager.h +++ b/Source/Core/VideoBackends/OGL/FramebufferManager.h @@ -70,6 +70,7 @@ public: // the EFB to a resolved texture first. static GLuint GetEFBColorTexture(const EFBRectangle& sourceRc); static GLuint GetEFBDepthTexture(const EFBRectangle& sourceRc); + static void ResolveEFBStencilTexture(); static GLuint GetEFBFramebuffer(unsigned int layer = 0) { @@ -77,7 +78,7 @@ public: } static GLuint GetXFBFramebuffer() { return m_xfbFramebuffer; } // Resolved framebuffer is only used in MSAA mode. - static GLuint GetResolvedFramebuffer() { return m_resolvedFramebuffer[0]; } + static GLuint GetResolvedFramebuffer(); static void SetFramebuffer(GLuint fb); static void FramebufferTexture(GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level); diff --git a/Source/Core/VideoBackends/OGL/Render.cpp b/Source/Core/VideoBackends/OGL/Render.cpp index 341e6b3686..d2124c6c5c 100644 --- a/Source/Core/VideoBackends/OGL/Render.cpp +++ b/Source/Core/VideoBackends/OGL/Render.cpp @@ -411,7 +411,8 @@ Renderer::Renderer() g_Config.backend_info.bSupportsPrimitiveRestart = !DriverDetails::HasBug(DriverDetails::BUG_PRIMITIVE_RESTART) && ((GLExtensions::Version() >= 310) || GLExtensions::Supports("GL_NV_primitive_restart")); - g_Config.backend_info.bSupportsBBox = g_Config.backend_info.bSupportsFragmentStoresAndAtomics = + g_Config.backend_info.bSupportsBBox = true; + g_Config.backend_info.bSupportsFragmentStoresAndAtomics = GLExtensions::Supports("GL_ARB_shader_storage_buffer_object"); g_Config.backend_info.bSupportsGSInstancing = GLExtensions::Supports("GL_ARB_gpu_shader5"); g_Config.backend_info.bSupportsSSAA = GLExtensions::Supports("GL_ARB_gpu_shader5") && @@ -497,7 +498,6 @@ Renderer::Renderer() g_Config.backend_info.bSupportsGSInstancing = g_Config.backend_info.bSupportsGeometryShaders && g_ogl_config.SupportedESPointSize > 0; g_Config.backend_info.bSupportsSSAA = g_ogl_config.bSupportsAEP; - g_Config.backend_info.bSupportsBBox = true; g_Config.backend_info.bSupportsFragmentStoresAndAtomics = true; g_ogl_config.bSupportsMSAA = true; g_ogl_config.bSupports2DTextureStorage = true; @@ -519,7 +519,6 @@ Renderer::Renderer() g_Config.backend_info.bSupportsGSInstancing = g_ogl_config.SupportedESPointSize > 0; g_Config.backend_info.bSupportsPaletteConversion = true; g_Config.backend_info.bSupportsSSAA = true; - g_Config.backend_info.bSupportsBBox = true; g_Config.backend_info.bSupportsFragmentStoresAndAtomics = true; g_ogl_config.bSupportsCopySubImage = true; g_ogl_config.bSupportsGLBaseVertex = true; @@ -657,10 +656,13 @@ Renderer::Renderer() // options while running g_Config.bRunning = true; - glStencilFunc(GL_ALWAYS, 0, 0); - glBlendFunc(GL_ONE, GL_ONE); + // The stencil is used for bounding box emulation when SSBOs are not available + glDisable(GL_STENCIL_TEST); + glStencilFunc(GL_ALWAYS, 1, 0xFF); + glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE); - glViewport(0, 0, GetTargetWidth(), GetTargetHeight()); // Reset The Current Viewport + // Reset The Current Viewport + glViewport(0, 0, GetTargetWidth(), GetTargetHeight()); if (g_ActiveConfig.backend_info.bSupportsClipControl) glClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE); @@ -677,10 +679,9 @@ Renderer::Renderer() glPixelStorei(GL_UNPACK_ALIGNMENT, 4); // 4-byte pixel alignment - glDisable(GL_STENCIL_TEST); glEnable(GL_SCISSOR_TEST); - glScissor(0, 0, GetTargetWidth(), GetTargetHeight()); + glBlendFunc(GL_ONE, GL_ONE); glBlendColor(0, 0, 0, 0.5f); glClearDepthf(1.0f); @@ -1364,6 +1365,7 @@ void Renderer::SwapImpl(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, g_framebuffer_manager.reset(); g_framebuffer_manager = std::make_unique(m_target_width, m_target_height, s_MSAASamples); + BoundingBox::SetTargetSizeChanged(m_target_width, m_target_height); } } diff --git a/Source/Core/VideoBackends/OGL/VertexManager.cpp b/Source/Core/VideoBackends/OGL/VertexManager.cpp index bd9878b692..d8d9249965 100644 --- a/Source/Core/VideoBackends/OGL/VertexManager.cpp +++ b/Source/Core/VideoBackends/OGL/VertexManager.cpp @@ -14,9 +14,11 @@ #include "Common/GL/GLExtensions/GLExtensions.h" #include "Common/StringUtil.h" +#include "VideoBackends/OGL/BoundingBox.h" #include "VideoBackends/OGL/ProgramShaderCache.h" #include "VideoBackends/OGL/Render.h" #include "VideoBackends/OGL/StreamBuffer.h" +#include "VideoCommon/BoundingBox.h" #include "VideoCommon/IndexGenerator.h" #include "VideoCommon/Statistics.h" @@ -156,8 +158,19 @@ void VertexManager::vFlush() // setup the pointers nativeVertexFmt->SetupVertexPointers(); + if (!g_Config.backend_info.bSupportsFragmentStoresAndAtomics && ::BoundingBox::active) + { + glEnable(GL_STENCIL_TEST); + } + Draw(stride); + if (!g_Config.backend_info.bSupportsFragmentStoresAndAtomics && ::BoundingBox::active) + { + OGL::BoundingBox::StencilWasUpdated(); + glDisable(GL_STENCIL_TEST); + } + #if defined(_DEBUG) || defined(DEBUGFAST) if (g_ActiveConfig.iLog & CONF_SAVESHADERS) { @@ -177,7 +190,6 @@ void VertexManager::vFlush() } #endif g_Config.iSaveTargetId++; - ClearEFBCache(); } diff --git a/Source/Core/VideoBackends/OGL/main.cpp b/Source/Core/VideoBackends/OGL/main.cpp index 3be16a6cbf..c1cf73f413 100644 --- a/Source/Core/VideoBackends/OGL/main.cpp +++ b/Source/Core/VideoBackends/OGL/main.cpp @@ -212,7 +212,7 @@ void VideoBackend::Video_Prepare() g_sampler_cache = std::make_unique(); static_cast(g_renderer.get())->Init(); TextureConverter::Init(); - BoundingBox::Init(); + BoundingBox::Init(g_renderer->GetTargetWidth(), g_renderer->GetTargetHeight()); } void VideoBackend::Shutdown()