From a5b4ac6faa11201ef90ff2a9881af8b0146f9c1f Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Sat, 21 Feb 2015 16:57:41 -0600 Subject: [PATCH 1/2] [GLExtensions] Add support for NV_occlusion_query_samples. --- .../OGL/GLExtensions/GLExtensions.cpp | 18 ++++++++++++++++++ .../OGL/GLExtensions/GLExtensions.h | 1 + .../GLExtensions/NV_occlusion_query_samples.h | 13 +++++++++++++ Source/Core/VideoBackends/OGL/OGL.vcxproj | 1 + .../Core/VideoBackends/OGL/OGL.vcxproj.filters | 3 +++ 5 files changed, 36 insertions(+) create mode 100644 Source/Core/VideoBackends/OGL/GLExtensions/NV_occlusion_query_samples.h diff --git a/Source/Core/VideoBackends/OGL/GLExtensions/GLExtensions.cpp b/Source/Core/VideoBackends/OGL/GLExtensions/GLExtensions.cpp index 78f59514d9..fd6dc2485a 100644 --- a/Source/Core/VideoBackends/OGL/GLExtensions/GLExtensions.cpp +++ b/Source/Core/VideoBackends/OGL/GLExtensions/GLExtensions.cpp @@ -776,6 +776,15 @@ PFNGLPUSHDEBUGGROUPPROC glPushDebugGroup; PFNGLBUFFERSTORAGEPROC glBufferStorage; PFNGLNAMEDBUFFERSTORAGEEXTPROC glNamedBufferStorageEXT; +// GL_NV_occlusion_query_samples +PFNGLGENOCCLUSIONQUERIESNVPROC glGenOcclusionQueriesNV; +PFNGLDELETEOCCLUSIONQUERIESNVPROC glDeleteOcclusionQueriesNV; +PFNGLISOCCLUSIONQUERYNVPROC glIsOcclusionQueryNV; +PFNGLBEGINOCCLUSIONQUERYNVPROC glBeginOcclusionQueryNV; +PFNGLENDOCCLUSIONQUERYNVPROC glEndOcclusionQueryNV; +PFNGLGETOCCLUSIONQUERYIVNVPROC glGetOcclusionQueryivNV; +PFNGLGETOCCLUSIONQUERYUIVNVPROC glGetOcclusionQueryuivNV; + // Creates a GLFunc object that requires a feature #define GLFUNC_REQUIRES(x, y) { (void**)&x, #x, y } // Creates a GLFunc object with a different function suffix @@ -1254,6 +1263,15 @@ const GLFunc gl_function_array[] = // EXT_geometry_shader GLFUNC_SUFFIX(glFramebufferTexture, EXT, "GL_EXT_geometry_shader !VERSION_3_2"), + // NV_occlusion_query_samples + GLFUNC_REQUIRES(glGenOcclusionQueriesNV, "GL_NV_occlusion_query_samples"), + GLFUNC_REQUIRES(glDeleteOcclusionQueriesNV, "GL_NV_occlusion_query_samples"), + GLFUNC_REQUIRES(glIsOcclusionQueryNV, "GL_NV_occlusion_query_samples"), + GLFUNC_REQUIRES(glBeginOcclusionQueryNV, "GL_NV_occlusion_query_samples"), + GLFUNC_REQUIRES(glEndOcclusionQueryNV, "GL_NV_occlusion_query_samples"), + GLFUNC_REQUIRES(glGetOcclusionQueryivNV, "GL_NV_occlusion_query_samples"), + GLFUNC_REQUIRES(glGetOcclusionQueryuivNV, "GL_NV_occlusion_query_samples"), + // gl_1_1 // OpenGL 1.1 is at the end due to a bug in Android's EGL stack. // eglGetProcAddress can only return a finite amount of function pointers diff --git a/Source/Core/VideoBackends/OGL/GLExtensions/GLExtensions.h b/Source/Core/VideoBackends/OGL/GLExtensions/GLExtensions.h index ccc835eae6..7645fcd8c3 100644 --- a/Source/Core/VideoBackends/OGL/GLExtensions/GLExtensions.h +++ b/Source/Core/VideoBackends/OGL/GLExtensions/GLExtensions.h @@ -31,6 +31,7 @@ #include "VideoBackends/OGL/GLExtensions/gl_3_1.h" #include "VideoBackends/OGL/GLExtensions/gl_3_2.h" #include "VideoBackends/OGL/GLExtensions/KHR_debug.h" +#include "VideoBackends/OGL/GLExtensions/NV_occlusion_query_samples.h" #include "VideoBackends/OGL/GLExtensions/NV_primitive_restart.h" namespace GLExtensions diff --git a/Source/Core/VideoBackends/OGL/GLExtensions/NV_occlusion_query_samples.h b/Source/Core/VideoBackends/OGL/GLExtensions/NV_occlusion_query_samples.h new file mode 100644 index 0000000000..10020e230b --- /dev/null +++ b/Source/Core/VideoBackends/OGL/GLExtensions/NV_occlusion_query_samples.h @@ -0,0 +1,13 @@ +// Copyright 2013 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#include "VideoBackends/OGL/GLExtensions/gl_common.h" + +extern PFNGLGENOCCLUSIONQUERIESNVPROC glGenOcclusionQueriesNV; +extern PFNGLDELETEOCCLUSIONQUERIESNVPROC glDeleteOcclusionQueriesNV; +extern PFNGLISOCCLUSIONQUERYNVPROC glIsOcclusionQueryNV; +extern PFNGLBEGINOCCLUSIONQUERYNVPROC glBeginOcclusionQueryNV; +extern PFNGLENDOCCLUSIONQUERYNVPROC glEndOcclusionQueryNV; +extern PFNGLGETOCCLUSIONQUERYIVNVPROC glGetOcclusionQueryivNV; +extern PFNGLGETOCCLUSIONQUERYUIVNVPROC glGetOcclusionQueryuivNV; diff --git a/Source/Core/VideoBackends/OGL/OGL.vcxproj b/Source/Core/VideoBackends/OGL/OGL.vcxproj index 1b60169220..8e8bbdcf2b 100644 --- a/Source/Core/VideoBackends/OGL/OGL.vcxproj +++ b/Source/Core/VideoBackends/OGL/OGL.vcxproj @@ -83,6 +83,7 @@ + diff --git a/Source/Core/VideoBackends/OGL/OGL.vcxproj.filters b/Source/Core/VideoBackends/OGL/OGL.vcxproj.filters index 8e65e8e2f5..9483aecbca 100644 --- a/Source/Core/VideoBackends/OGL/OGL.vcxproj.filters +++ b/Source/Core/VideoBackends/OGL/OGL.vcxproj.filters @@ -187,6 +187,9 @@ GLExtensions + + GLExtensions + GLExtensions From e9ac4d53a6b50e61375ffc1994d7d278e4c94e7a Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Sat, 21 Feb 2015 16:58:53 -0600 Subject: [PATCH 2/2] Implement full occlusion queries for the Nexus 9. GLES3 spec is worthless and only returns a boolean result for occlusion queries. This is fine for simple cellular games but we need more than a boolean result. Thankfully Nvidia exposes GL_NV_occlusion_queries under a OpenGL ES extension, which allows us to get full samples rendered. The only device this change affects is the Nexus 9, since it is an Nvidia K1 crippled to only support OpenGL ES. No other OpenGL ES device that I know of supports this extension. --- Source/Core/VideoBackends/OGL/PerfQuery.cpp | 255 ++++++++++++++------ Source/Core/VideoBackends/OGL/PerfQuery.h | 52 +++- Source/Core/VideoBackends/OGL/main.cpp | 2 +- 3 files changed, 232 insertions(+), 77 deletions(-) diff --git a/Source/Core/VideoBackends/OGL/PerfQuery.cpp b/Source/Core/VideoBackends/OGL/PerfQuery.cpp index 178ef7108e..372cd17cf8 100644 --- a/Source/Core/VideoBackends/OGL/PerfQuery.cpp +++ b/Source/Core/VideoBackends/OGL/PerfQuery.cpp @@ -9,54 +9,32 @@ namespace OGL { +PerfQueryBase* GetPerfQuery() +{ + if (GLInterface->GetMode() == GLInterfaceMode::MODE_OPENGLES3 && + GLExtensions::Supports("GL_NV_occlusion_query_samples")) + return new PerfQueryGLESNV(); + else if (GLInterface->GetMode() == GLInterfaceMode::MODE_OPENGLES3) + return new PerfQueryGL(GL_ANY_SAMPLES_PASSED); + else + return new PerfQueryGL(GL_SAMPLES_PASSED); +} PerfQuery::PerfQuery() : m_query_read_pos() , m_query_count() { - for (ActiveQuery& query : m_query_buffer) - glGenQueries(1, &query.query_id); - ResetQuery(); } -PerfQuery::~PerfQuery() -{ - for (ActiveQuery& query : m_query_buffer) - glDeleteQueries(1, &query.query_id); -} - void PerfQuery::EnableQuery(PerfQueryGroup type) { - // Is this sane? - if (m_query_count > m_query_buffer.size() / 2) - WeakFlush(); - - if (m_query_buffer.size() == m_query_count) - { - FlushOne(); - //ERROR_LOG(VIDEO, "Flushed query buffer early!"); - } - - // start query - if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP) - { - auto& entry = m_query_buffer[(m_query_read_pos + m_query_count) % m_query_buffer.size()]; - - glBeginQuery(GLInterface->GetMode() == GLInterfaceMode::MODE_OPENGL ? GL_SAMPLES_PASSED : GL_ANY_SAMPLES_PASSED, entry.query_id); - entry.query_type = type; - - ++m_query_count; - } + m_query->EnableQuery(type); } void PerfQuery::DisableQuery(PerfQueryGroup type) { - // stop query - if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP) - { - glEndQuery(GLInterface->GetMode() == GLInterfaceMode::MODE_OPENGL ? GL_SAMPLES_PASSED : GL_ANY_SAMPLES_PASSED); - } + m_query->DisableQuery(type); } bool PerfQuery::IsFlushed() const @@ -64,45 +42,10 @@ bool PerfQuery::IsFlushed() const return 0 == m_query_count; } -void PerfQuery::FlushOne() -{ - auto& entry = m_query_buffer[m_query_read_pos]; - - GLuint result = 0; - glGetQueryObjectuiv(entry.query_id, GL_QUERY_RESULT, &result); - - // NOTE: Reported pixel metrics should be referenced to native resolution - m_results[entry.query_type] += (u64)result * EFB_WIDTH / g_renderer->GetTargetWidth() * EFB_HEIGHT / g_renderer->GetTargetHeight(); - - m_query_read_pos = (m_query_read_pos + 1) % m_query_buffer.size(); - --m_query_count; -} - // TODO: could selectively flush things, but I don't think that will do much void PerfQuery::FlushResults() { - while (!IsFlushed()) - FlushOne(); -} - -void PerfQuery::WeakFlush() -{ - while (!IsFlushed()) - { - auto& entry = m_query_buffer[m_query_read_pos]; - - GLuint result = GL_FALSE; - glGetQueryObjectuiv(entry.query_id, GL_QUERY_RESULT_AVAILABLE, &result); - - if (GL_TRUE == result) - { - FlushOne(); - } - else - { - break; - } - } + m_query->FlushResults(); } void PerfQuery::ResetQuery() @@ -135,4 +78,176 @@ u32 PerfQuery::GetQueryResult(PerfQueryType type) return result / 4; } +// Implementations +PerfQueryGL::PerfQueryGL(GLenum query_type) + : m_query_type(query_type) +{ + for (ActiveQuery& query : m_query_buffer) + glGenQueries(1, &query.query_id); +} + +PerfQueryGL::~PerfQueryGL() +{ + for (ActiveQuery& query : m_query_buffer) + glDeleteQueries(1, &query.query_id); +} + +void PerfQueryGL::EnableQuery(PerfQueryGroup type) +{ + // Is this sane? + if (m_query_count > m_query_buffer.size() / 2) + WeakFlush(); + + if (m_query_buffer.size() == m_query_count) + { + FlushOne(); + //ERROR_LOG(VIDEO, "Flushed query buffer early!"); + } + + // start query + if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP) + { + auto& entry = m_query_buffer[(m_query_read_pos + m_query_count) % m_query_buffer.size()]; + + glBeginQuery(m_query_type, entry.query_id); + entry.query_type = type; + + ++m_query_count; + } +} +void PerfQueryGL::DisableQuery(PerfQueryGroup type) +{ + // stop query + if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP) + { + glEndQuery(m_query_type); + } +} + +void PerfQueryGL::WeakFlush() +{ + while (!IsFlushed()) + { + auto& entry = m_query_buffer[m_query_read_pos]; + + GLuint result = GL_FALSE; + glGetQueryObjectuiv(entry.query_id, GL_QUERY_RESULT_AVAILABLE, &result); + + if (GL_TRUE == result) + { + FlushOne(); + } + else + { + break; + } + } +} + +void PerfQueryGL::FlushOne() +{ + auto& entry = m_query_buffer[m_query_read_pos]; + + GLuint result = 0; + glGetQueryObjectuiv(entry.query_id, GL_QUERY_RESULT, &result); + + // NOTE: Reported pixel metrics should be referenced to native resolution + m_results[entry.query_type] += (u64)result * EFB_WIDTH / g_renderer->GetTargetWidth() * EFB_HEIGHT / g_renderer->GetTargetHeight(); + + m_query_read_pos = (m_query_read_pos + 1) % m_query_buffer.size(); + --m_query_count; +} + +// TODO: could selectively flush things, but I don't think that will do much +void PerfQueryGL::FlushResults() +{ + while (!IsFlushed()) + FlushOne(); +} + +PerfQueryGLESNV::PerfQueryGLESNV() +{ + for (ActiveQuery& query : m_query_buffer) + glGenOcclusionQueriesNV(1, &query.query_id); +} + +PerfQueryGLESNV::~PerfQueryGLESNV() +{ + for (ActiveQuery& query : m_query_buffer) + glDeleteOcclusionQueriesNV(1, &query.query_id); +} + +void PerfQueryGLESNV::EnableQuery(PerfQueryGroup type) +{ + // Is this sane? + if (m_query_count > m_query_buffer.size() / 2) + WeakFlush(); + + if (m_query_buffer.size() == m_query_count) + { + FlushOne(); + //ERROR_LOG(VIDEO, "Flushed query buffer early!"); + } + + // start query + if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP) + { + auto& entry = m_query_buffer[(m_query_read_pos + m_query_count) % m_query_buffer.size()]; + + glBeginOcclusionQueryNV(entry.query_id); + entry.query_type = type; + + ++m_query_count; + } +} +void PerfQueryGLESNV::DisableQuery(PerfQueryGroup type) +{ + // stop query + if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP) + { + glEndOcclusionQueryNV(); + } +} + +void PerfQueryGLESNV::WeakFlush() +{ + while (!IsFlushed()) + { + auto& entry = m_query_buffer[m_query_read_pos]; + + GLuint result = GL_FALSE; + glGetOcclusionQueryuivNV(entry.query_id, GL_PIXEL_COUNT_AVAILABLE_NV, &result); + + if (GL_TRUE == result) + { + FlushOne(); + } + else + { + break; + } + } +} + +void PerfQueryGLESNV::FlushOne() +{ + auto& entry = m_query_buffer[m_query_read_pos]; + + GLuint result = 0; + glGetOcclusionQueryuivNV(entry.query_id, GL_OCCLUSION_TEST_RESULT_HP, &result); + + // NOTE: Reported pixel metrics should be referenced to native resolution + m_results[entry.query_type] += (u64)result * EFB_WIDTH / g_renderer->GetTargetWidth() * EFB_HEIGHT / g_renderer->GetTargetHeight(); + + m_query_read_pos = (m_query_read_pos + 1) % m_query_buffer.size(); + --m_query_count; +} + +// TODO: could selectively flush things, but I don't think that will do much +void PerfQueryGLESNV::FlushResults() +{ + while (!IsFlushed()) + FlushOne(); +} + } // namespace diff --git a/Source/Core/VideoBackends/OGL/PerfQuery.h b/Source/Core/VideoBackends/OGL/PerfQuery.h index ae1e5ddf59..aaf6863eac 100644 --- a/Source/Core/VideoBackends/OGL/PerfQuery.h +++ b/Source/Core/VideoBackends/OGL/PerfQuery.h @@ -1,18 +1,20 @@ #pragma once #include +#include #include "VideoBackends/OGL/GLExtensions/GLExtensions.h" #include "VideoCommon/PerfQueryBase.h" namespace OGL { +PerfQueryBase* GetPerfQuery(); class PerfQuery : public PerfQueryBase { public: PerfQuery(); - ~PerfQuery(); + ~PerfQuery() {} void EnableQuery(PerfQueryGroup type) override; void DisableQuery(PerfQueryGroup type) override; @@ -21,7 +23,7 @@ public: void FlushResults() override; bool IsFlushed() const override; -private: +protected: struct ActiveQuery { GLuint query_id; @@ -31,10 +33,6 @@ private: // when testing in SMS: 64 was too small, 128 was ok static const u32 PERF_QUERY_BUFFER_SIZE = 512; - void WeakFlush(); - // Only use when non-empty - void FlushOne(); - // This contains gl query objects with unretrieved results. std::array m_query_buffer; u32 m_query_read_pos; @@ -42,6 +40,48 @@ private: // TODO: sloppy volatile u32 m_query_count; volatile u32 m_results[PQG_NUM_MEMBERS]; + +private: + // Implementation + std::unique_ptr m_query; }; +// Implementations +class PerfQueryGL : public PerfQuery +{ +public: + PerfQueryGL(GLenum query_type); + ~PerfQueryGL(); + + void EnableQuery(PerfQueryGroup type) override; + void DisableQuery(PerfQueryGroup type) override; + void FlushResults() override; + +private: + + void WeakFlush(); + // Only use when non-empty + void FlushOne(); + + GLenum m_query_type; +}; + +class PerfQueryGLESNV : public PerfQuery +{ +public: + PerfQueryGLESNV(); + ~PerfQueryGLESNV(); + + void EnableQuery(PerfQueryGroup type) override; + void DisableQuery(PerfQueryGroup type) override; + void FlushResults() override; + +private: + + void WeakFlush(); + // Only use when non-empty + void FlushOne(); +}; + + } // namespace diff --git a/Source/Core/VideoBackends/OGL/main.cpp b/Source/Core/VideoBackends/OGL/main.cpp index 83ab7593e7..9ab73a6e12 100644 --- a/Source/Core/VideoBackends/OGL/main.cpp +++ b/Source/Core/VideoBackends/OGL/main.cpp @@ -199,7 +199,7 @@ void VideoBackend::Video_Prepare() BPInit(); g_vertex_manager = new VertexManager; - g_perf_query = new PerfQuery; + g_perf_query = GetPerfQuery(); Fifo_Init(); // must be done before OpcodeDecoder_Init() OpcodeDecoder_Init(); IndexGenerator::Init();