From 27f48fbc065cd57f73e4fef3f26ced2c5635db13 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sat, 12 Oct 2019 01:05:05 +0300 Subject: [PATCH] gl: Rewrite image transfer operations to support image subregions - Working exclusively with full sized images is very expensive --- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 15 +++-- rpcs3/Emu/RSX/GL/GLHelpers.h | 105 +++++++++++++++++------------- rpcs3/Emu/RSX/GL/GLOverlays.h | 4 +- rpcs3/Emu/RSX/GL/GLProcTable.h | 2 + rpcs3/Emu/RSX/GL/GLTexture.cpp | 21 ++++-- rpcs3/Emu/RSX/GL/GLTexture.h | 4 ++ rpcs3/Emu/RSX/GL/GLTextureCache.h | 14 ++-- 7 files changed, 103 insertions(+), 62 deletions(-) diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 84665ac916..82ba8c1630 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -756,19 +756,19 @@ void GLGSRender::on_init_thread() //1D auto tex1D = std::make_unique(GL_TEXTURE_1D, 1, 1, 1, 1, GL_RGBA8); - tex1D->copy_from(pixeldata.data(), gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8); + tex1D->copy_from(pixeldata.data(), gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8, {}); //2D auto tex2D = std::make_unique(GL_TEXTURE_2D, 1, 1, 1, 1, GL_RGBA8); - tex2D->copy_from(pixeldata.data(), gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8); + tex2D->copy_from(pixeldata.data(), gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8, {}); //3D auto tex3D = std::make_unique(GL_TEXTURE_3D, 1, 1, 1, 1, GL_RGBA8); - tex3D->copy_from(pixeldata.data(), gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8); + tex3D->copy_from(pixeldata.data(), gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8, {}); //CUBE auto texCUBE = std::make_unique(GL_TEXTURE_CUBE_MAP, 1, 1, 1, 1, GL_RGBA8); - texCUBE->copy_from(pixeldata.data(), gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8); + texCUBE->copy_from(pixeldata.data(), gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8, {}); m_null_textures[GL_TEXTURE_1D] = std::move(tex1D); m_null_textures[GL_TEXTURE_2D] = std::move(tex2D); @@ -966,8 +966,15 @@ void GLGSRender::on_init_thread() void GLGSRender::on_exit() { + // Globals + // TODO: Move these gl::destroy_compute_tasks(); + if (gl::g_typeless_transfer_buffer) + { + gl::g_typeless_transfer_buffer.remove(); + } + zcull_ctrl.release(); m_prog_buffer.clear(); diff --git a/rpcs3/Emu/RSX/GL/GLHelpers.h b/rpcs3/Emu/RSX/GL/GLHelpers.h index 1f516ea94b..8d51ae3f8e 100644 --- a/rpcs3/Emu/RSX/GL/GLHelpers.h +++ b/rpcs3/Emu/RSX/GL/GLHelpers.h @@ -702,7 +702,8 @@ namespace gl array = GL_ARRAY_BUFFER, element_array = GL_ELEMENT_ARRAY_BUFFER, uniform = GL_UNIFORM_BUFFER, - texture = GL_TEXTURE_BUFFER + texture = GL_TEXTURE_BUFFER, + ssbo = GL_SHADER_STORAGE_BUFFER }; enum class access @@ -925,6 +926,11 @@ namespace gl { glBindBufferRange((GLenum)current_target(), index, id(), offset, size); } + + void bind_range(target target_, u32 index, u32 offset, u32 size) const + { + glBindBufferRange((GLenum)target_, index, id(), offset, size); + } }; class ring_buffer : public buffer @@ -1764,9 +1770,15 @@ namespace gl return m_aspect_flags; } - sizei size2D() const + sizeu size2D() const { - return{ (int)m_width, (int)m_height }; + return{ m_width, m_height }; + } + + size3u size3D() const + { + const auto depth = (m_target == target::textureCUBE) ? 6 : m_depth; + return{ m_width, m_height, depth }; } texture::internal_format get_internal_format() const @@ -1779,7 +1791,7 @@ namespace gl return m_component_layout; } - void copy_from(const void* src, texture::format format, texture::type type, class pixel_unpack_settings pixel_settings) + void copy_from(const void* src, texture::format format, texture::type type, const coord3u region, const pixel_unpack_settings& pixel_settings) { pixel_settings.apply(); @@ -1787,33 +1799,34 @@ namespace gl { case GL_TEXTURE_1D: { - DSA_CALL(TextureSubImage1D, m_id, GL_TEXTURE_1D, 0, 0, m_width, (GLenum)format, (GLenum)type, src); + DSA_CALL(TextureSubImage1D, m_id, GL_TEXTURE_1D, 0, region.x, region.width, (GLenum)format, (GLenum)type, src); break; } case GL_TEXTURE_2D: { - DSA_CALL(TextureSubImage2D, m_id, GL_TEXTURE_2D, 0, 0, 0, m_width, m_height, (GLenum)format, (GLenum)type, src); + DSA_CALL(TextureSubImage2D, m_id, GL_TEXTURE_2D, 0, region.x, region.y, region.width, region.height, (GLenum)format, (GLenum)type, src); break; } case GL_TEXTURE_3D: { - DSA_CALL(TextureSubImage3D, m_id, GL_TEXTURE_3D, 0, 0, 0, 0, m_width, m_height, m_depth, (GLenum)format, (GLenum)type, src); + DSA_CALL(TextureSubImage3D, m_id, GL_TEXTURE_3D, 0, region.x, region.y, region.z, region.width, region.height, region.depth, (GLenum)format, (GLenum)type, src); break; } case GL_TEXTURE_CUBE_MAP: { - if (::gl::get_driver_caps().ARB_dsa_supported) + if (get_driver_caps().ARB_dsa_supported) { - glTextureSubImage3D(m_id, 0, 0, 0, 0, m_width, m_height, 6, (GLenum)format, (GLenum)type, src); + glTextureSubImage3D(m_id, 0, region.x, region.y, region.z, region.width, region.height, region.depth, (GLenum)format, (GLenum)type, src); } else { LOG_WARNING(RSX, "Cubemap upload via texture::copy_from is halfplemented!"); u8* ptr = (u8*)src; - for (int face = 0; face < 6; ++face) + const auto end = std::min(6u, region.z + region.depth); + for (unsigned face = region.z; face < end; ++face) { - glTextureSubImage2DEXT(m_id, GL_TEXTURE_CUBE_MAP_POSITIVE_X + face, 0, 0, 0, m_width, m_height, (GLenum)format, (GLenum)type, ptr); - ptr += (m_width * m_height * 4); //TODO + glTextureSubImage2DEXT(m_id, GL_TEXTURE_CUBE_MAP_POSITIVE_X + face, 0, region.x, region.y, region.width, region.height, (GLenum)format, (GLenum)type, ptr); + ptr += (region.width * region.height * 4); //TODO } } break; @@ -1821,6 +1834,12 @@ namespace gl } } + void copy_from(const void* src, texture::format format, texture::type type, const pixel_unpack_settings& pixel_settings) + { + const coord3u region = { {}, size3D() }; + copy_from(src, format, type, region, pixel_settings); + } + void copy_from(buffer &buf, u32 gl_format_type, u32 offset, u32 length) { if (get_target() != target::textureBuffer) @@ -1834,45 +1853,41 @@ namespace gl copy_from(*view.value(), view.format(), view.offset(), view.range()); } - void copy_from(const buffer& buf, texture::format format, texture::type type, class pixel_unpack_settings pixel_settings) - { - buffer::save_binding_state save_buffer(buffer::target::pixel_unpack, buf); - copy_from(nullptr, format, type, pixel_settings); - } - - void copy_from(void* src, texture::format format, texture::type type) - { - copy_from(src, format, type, pixel_unpack_settings()); - } - - void copy_from(const buffer& buf, texture::format format, texture::type type) - { - copy_from(buf, format, type, pixel_unpack_settings()); - } - - void copy_to(void* dst, texture::format format, texture::type type, class pixel_pack_settings pixel_settings) const + void copy_to(void* dst, texture::format format, texture::type type, const coord3u& region, const pixel_pack_settings& pixel_settings) const { pixel_settings.apply(); - if (gl::get_driver_caps().ARB_dsa_supported) - glGetTextureImage(m_id, 0, (GLenum)format, (GLenum)type, m_width * m_height * 16, dst); + const auto& caps = get_driver_caps(); + + if (!region.x && !region.y && !region.z && + region.width == m_width && region.height == m_height && region.depth == m_depth) + { + if (caps.ARB_dsa_supported) + glGetTextureImage(m_id, 0, (GLenum)format, (GLenum)type, INT32_MAX, dst); + else + glGetTextureImageEXT(m_id, (GLenum)m_target, 0, (GLenum)format, (GLenum)type, dst); + } + else if (caps.ARB_dsa_supported) + { + glGetTextureSubImage(m_id, 0, region.x, region.y, region.z, region.width, region.height, region.depth, + (GLenum)format, (GLenum)type, INT32_MAX, dst); + } else - glGetTextureImageEXT(m_id, (GLenum)m_target, 0, (GLenum)format, (GLenum)type, dst); + { + // Worst case scenario. For some reason, EXT_dsa does not have glGetTextureSubImage + const auto target_ = static_cast(m_target); + texture tmp{ target_, region.width, region.height, region.depth, 1, (GLenum)m_internal_format }; + glCopyImageSubData(m_id, target_, 0, region.x, region.y, region.z, tmp.id(), target_, 0, 0, 0, 0, + region.width, region.height, region.depth); + + const coord3u region2 = { {0, 0, 0}, region.size }; + tmp.copy_to(dst, format, type, region2, pixel_settings); + } } - void copy_to(const buffer& buf, texture::format format, texture::type type, class pixel_pack_settings pixel_settings) const + void copy_to(void* dst, texture::format format, texture::type type, const pixel_pack_settings& pixel_settings) const { - buffer::save_binding_state save_buffer(buffer::target::pixel_pack, buf); - copy_to(nullptr, format, type, pixel_settings); - } - - void copy_to(void* dst, texture::format format, texture::type type) const - { - copy_to(dst, format, type, pixel_pack_settings()); - } - - void copy_to(const buffer& buf, texture::format format, texture::type type) const - { - copy_to(buf, format, type, pixel_pack_settings()); + const coord3u region = { {}, size3D() }; + copy_to(dst, format, type, region, pixel_settings); } }; diff --git a/rpcs3/Emu/RSX/GL/GLOverlays.h b/rpcs3/Emu/RSX/GL/GLOverlays.h index 92126defe3..b24c404956 100644 --- a/rpcs3/Emu/RSX/GL/GLOverlays.h +++ b/rpcs3/Emu/RSX/GL/GLOverlays.h @@ -480,7 +480,7 @@ namespace gl gl::texture_view* load_simple_image(rsx::overlays::image_info* desc, bool temp_resource, u32 owner_uid) { auto tex = std::make_unique(GL_TEXTURE_2D, desc->w, desc->h, 1, 1, GL_RGBA8); - tex->copy_from(desc->data, gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8); + tex->copy_from(desc->data, gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8, {}); GLenum remap[] = { GL_RED, GL_ALPHA, GL_BLUE, GL_GREEN }; auto view = std::make_unique(tex.get(), remap); @@ -551,7 +551,7 @@ namespace gl //Create font file auto tex = std::make_unique(GL_TEXTURE_2D, (int)font->width, (int)font->height, 1, 1, GL_R8); - tex->copy_from(font->glyph_data.data(), gl::texture::format::r, gl::texture::type::ubyte); + tex->copy_from(font->glyph_data.data(), gl::texture::format::r, gl::texture::type::ubyte, {}); GLenum remap[] = { GL_RED, GL_RED, GL_RED, GL_RED }; auto view = std::make_unique(tex.get(), remap); diff --git a/rpcs3/Emu/RSX/GL/GLProcTable.h b/rpcs3/Emu/RSX/GL/GLProcTable.h index b9ab358fae..1a78810ddb 100644 --- a/rpcs3/Emu/RSX/GL/GLProcTable.h +++ b/rpcs3/Emu/RSX/GL/GLProcTable.h @@ -166,6 +166,7 @@ OPENGL_PROC(PFNGLPRIMITIVERESTARTINDEXPROC, PrimitiveRestartIndex); OPENGL_PROC(PFNGLGETINTEGER64VPROC, GetInteger64v); OPENGL_PROC(PFNGLGETSTRINGIPROC, GetStringi); +OPENGL_PROC(PFNGLGETINTEGERI_VPROC, GetIntegeri_v); OPENGL_PROC(PFNGLCHECKFRAMEBUFFERSTATUSPROC, CheckFramebufferStatus); @@ -178,6 +179,7 @@ OPENGL_PROC(PFNGLMULTIDRAWARRAYSPROC, MultiDrawArrays); OPENGL_PROC(PFNGLGETTEXTUREIMAGEEXTPROC, GetTextureImageEXT); OPENGL_PROC(PFNGLGETTEXTUREIMAGEPROC, GetTextureImage); +OPENGL_PROC(PFNGLGETTEXTURESUBIMAGEPROC, GetTextureSubImage); OPENGL_PROC(PFNGLTEXTURESUBIMAGE1DEXTPROC, TextureSubImage1DEXT); OPENGL_PROC(PFNGLTEXTURESUBIMAGE1DPROC, TextureSubImage1D); OPENGL_PROC(PFNGLTEXTURESUBIMAGE2DEXTPROC, TextureSubImage2DEXT); diff --git a/rpcs3/Emu/RSX/GL/GLTexture.cpp b/rpcs3/Emu/RSX/GL/GLTexture.cpp index b652484907..f97d01ca1f 100644 --- a/rpcs3/Emu/RSX/GL/GLTexture.cpp +++ b/rpcs3/Emu/RSX/GL/GLTexture.cpp @@ -7,7 +7,7 @@ namespace gl { - static buffer g_typeless_transfer_buffer; + buffer g_typeless_transfer_buffer; GLenum get_target(rsx::texture_dimension_extended type) { @@ -775,10 +775,10 @@ namespace gl } } - void copy_typeless(texture * dst, const texture * src) + void copy_typeless(texture * dst, const texture * src, const coord3u& dst_region, const coord3u& src_region) { - GLsizeiptr src_mem = src->pitch() * src->height(); - GLsizeiptr dst_mem = dst->pitch() * dst->height(); + const u32 src_mem = src->pitch() * src_region.height; + const u32 dst_mem = dst->pitch() * dst_region.height; auto max_mem = std::max(src_mem, dst_mem); if (!g_typeless_transfer_buffer || max_mem > g_typeless_transfer_buffer.size()) @@ -797,13 +797,13 @@ namespace gl if (LIKELY(caps.ARB_compute_shader_supported)) { // Raw copy - src->copy_to(nullptr, (texture::format)pack_info.format, (texture::type)pack_info.type); + src->copy_to(nullptr, (texture::format)pack_info.format, (texture::type)pack_info.type, src_region, {}); } else { pixel_pack_settings pack_settings{}; pack_settings.swap_bytes(pack_info.swap_bytes); - src->copy_to(nullptr, (texture::format)pack_info.format, (texture::type)pack_info.type, pack_settings); + src->copy_to(nullptr, (texture::format)pack_info.format, (texture::type)pack_info.type, src_region, pack_settings); } glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE); @@ -850,7 +850,14 @@ namespace gl } g_typeless_transfer_buffer.bind(buffer::target::pixel_unpack); - dst->copy_from(nullptr, (texture::format)unpack_info.format, (texture::type)unpack_info.type, unpack_settings); + dst->copy_from(nullptr, (texture::format)unpack_info.format, (texture::type)unpack_info.type, dst_region, unpack_settings); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, GL_NONE); } + + void copy_typeless(texture* dst, const texture* src) + { + const coord3u src_area = { {}, src->size3D() }; + const coord3u dst_area = { {}, dst->size3D() }; + copy_typeless(dst, src, dst_area, src_area); + } } diff --git a/rpcs3/Emu/RSX/GL/GLTexture.h b/rpcs3/Emu/RSX/GL/GLTexture.h index cba49bc068..d6a9224ee3 100644 --- a/rpcs3/Emu/RSX/GL/GLTexture.h +++ b/rpcs3/Emu/RSX/GL/GLTexture.h @@ -32,7 +32,9 @@ namespace gl viewable_image* create_texture(u32 gcm_format, u16 width, u16 height, u16 depth, u16 mipmaps, rsx::texture_dimension_extended type); bool formats_are_bitcast_compatible(GLenum format1, GLenum format2); + void copy_typeless(texture* dst, const texture* src, const coord3u& dst_region, const coord3u& src_region); void copy_typeless(texture* dst, const texture* src); + /** * is_swizzled - determines whether input bytes are in morton order * subresources_layout - descriptor of the mipmap levels in memory @@ -110,4 +112,6 @@ namespace gl void apply_defaults(GLenum default_filter = GL_NEAREST); }; + + extern buffer g_typeless_transfer_buffer; } diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.h b/rpcs3/Emu/RSX/GL/GLTextureCache.h index 739c4af054..98c8c655db 100644 --- a/rpcs3/Emu/RSX/GL/GLTextureCache.h +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.h @@ -26,7 +26,7 @@ namespace gl class blitter; extern GLenum get_sized_internal_format(u32); - extern void copy_typeless(texture*, const texture*); + extern void copy_typeless(texture*, const texture*, const coord3u&, const coord3u&); extern blitter *g_hw_blitter; class cached_texture_section; @@ -632,11 +632,17 @@ namespace gl tmp = std::make_unique(GL_TEXTURE_2D, convert_w, slice.src->height(), 1, 1, (GLenum)dst_image->get_internal_format()); src_image = tmp.get(); - gl::copy_typeless(src_image, slice.src); // Compute src region in dst format layout - src_x = u16(src_x * src_bpp) / dst_bpp; - src_w = u16(src_w * src_bpp) / dst_bpp; + const u16 src_w2 = u16(src_w * src_bpp) / dst_bpp; + const u16 src_x2 = u16(src_x * src_bpp) / dst_bpp; + + const coord3u src_region = { { src_x, src_y, 0 }, { src_w, src_h, 1 } }; + const coord3u dst_region = { { src_x2, src_y, 0 }, { src_w2, src_h, 1 } }; + gl::copy_typeless(src_image, slice.src, dst_region, src_region); + + src_x = src_x2; + src_w = src_w2; } if (src_w == slice.dst_w && src_h == slice.dst_h)