gl: Rewrite image transfer operations to support image subregions

- Working exclusively with full sized images is very expensive
This commit is contained in:
kd-11 2019-10-12 01:05:05 +03:00 committed by kd-11
parent d9a9766e41
commit 27f48fbc06
7 changed files with 103 additions and 62 deletions

View File

@ -756,19 +756,19 @@ void GLGSRender::on_init_thread()
//1D
auto tex1D = std::make_unique<gl::texture>(GL_TEXTURE_1D, 1, 1, 1, 1, GL_RGBA8);
tex1D->copy_from(pixeldata.data(), gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8);
tex1D->copy_from(pixeldata.data(), gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8, {});
//2D
auto tex2D = std::make_unique<gl::texture>(GL_TEXTURE_2D, 1, 1, 1, 1, GL_RGBA8);
tex2D->copy_from(pixeldata.data(), gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8);
tex2D->copy_from(pixeldata.data(), gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8, {});
//3D
auto tex3D = std::make_unique<gl::texture>(GL_TEXTURE_3D, 1, 1, 1, 1, GL_RGBA8);
tex3D->copy_from(pixeldata.data(), gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8);
tex3D->copy_from(pixeldata.data(), gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8, {});
//CUBE
auto texCUBE = std::make_unique<gl::texture>(GL_TEXTURE_CUBE_MAP, 1, 1, 1, 1, GL_RGBA8);
texCUBE->copy_from(pixeldata.data(), gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8);
texCUBE->copy_from(pixeldata.data(), gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8, {});
m_null_textures[GL_TEXTURE_1D] = std::move(tex1D);
m_null_textures[GL_TEXTURE_2D] = std::move(tex2D);
@ -966,8 +966,15 @@ void GLGSRender::on_init_thread()
void GLGSRender::on_exit()
{
// Globals
// TODO: Move these
gl::destroy_compute_tasks();
if (gl::g_typeless_transfer_buffer)
{
gl::g_typeless_transfer_buffer.remove();
}
zcull_ctrl.release();
m_prog_buffer.clear();

View File

@ -702,7 +702,8 @@ namespace gl
array = GL_ARRAY_BUFFER,
element_array = GL_ELEMENT_ARRAY_BUFFER,
uniform = GL_UNIFORM_BUFFER,
texture = GL_TEXTURE_BUFFER
texture = GL_TEXTURE_BUFFER,
ssbo = GL_SHADER_STORAGE_BUFFER
};
enum class access
@ -925,6 +926,11 @@ namespace gl
{
glBindBufferRange((GLenum)current_target(), index, id(), offset, size);
}
void bind_range(target target_, u32 index, u32 offset, u32 size) const
{
glBindBufferRange((GLenum)target_, index, id(), offset, size);
}
};
class ring_buffer : public buffer
@ -1764,9 +1770,15 @@ namespace gl
return m_aspect_flags;
}
sizei size2D() const
sizeu size2D() const
{
return{ (int)m_width, (int)m_height };
return{ m_width, m_height };
}
size3u size3D() const
{
const auto depth = (m_target == target::textureCUBE) ? 6 : m_depth;
return{ m_width, m_height, depth };
}
texture::internal_format get_internal_format() const
@ -1779,7 +1791,7 @@ namespace gl
return m_component_layout;
}
void copy_from(const void* src, texture::format format, texture::type type, class pixel_unpack_settings pixel_settings)
void copy_from(const void* src, texture::format format, texture::type type, const coord3u region, const pixel_unpack_settings& pixel_settings)
{
pixel_settings.apply();
@ -1787,33 +1799,34 @@ namespace gl
{
case GL_TEXTURE_1D:
{
DSA_CALL(TextureSubImage1D, m_id, GL_TEXTURE_1D, 0, 0, m_width, (GLenum)format, (GLenum)type, src);
DSA_CALL(TextureSubImage1D, m_id, GL_TEXTURE_1D, 0, region.x, region.width, (GLenum)format, (GLenum)type, src);
break;
}
case GL_TEXTURE_2D:
{
DSA_CALL(TextureSubImage2D, m_id, GL_TEXTURE_2D, 0, 0, 0, m_width, m_height, (GLenum)format, (GLenum)type, src);
DSA_CALL(TextureSubImage2D, m_id, GL_TEXTURE_2D, 0, region.x, region.y, region.width, region.height, (GLenum)format, (GLenum)type, src);
break;
}
case GL_TEXTURE_3D:
{
DSA_CALL(TextureSubImage3D, m_id, GL_TEXTURE_3D, 0, 0, 0, 0, m_width, m_height, m_depth, (GLenum)format, (GLenum)type, src);
DSA_CALL(TextureSubImage3D, m_id, GL_TEXTURE_3D, 0, region.x, region.y, region.z, region.width, region.height, region.depth, (GLenum)format, (GLenum)type, src);
break;
}
case GL_TEXTURE_CUBE_MAP:
{
if (::gl::get_driver_caps().ARB_dsa_supported)
if (get_driver_caps().ARB_dsa_supported)
{
glTextureSubImage3D(m_id, 0, 0, 0, 0, m_width, m_height, 6, (GLenum)format, (GLenum)type, src);
glTextureSubImage3D(m_id, 0, region.x, region.y, region.z, region.width, region.height, region.depth, (GLenum)format, (GLenum)type, src);
}
else
{
LOG_WARNING(RSX, "Cubemap upload via texture::copy_from is halfplemented!");
u8* ptr = (u8*)src;
for (int face = 0; face < 6; ++face)
const auto end = std::min(6u, region.z + region.depth);
for (unsigned face = region.z; face < end; ++face)
{
glTextureSubImage2DEXT(m_id, GL_TEXTURE_CUBE_MAP_POSITIVE_X + face, 0, 0, 0, m_width, m_height, (GLenum)format, (GLenum)type, ptr);
ptr += (m_width * m_height * 4); //TODO
glTextureSubImage2DEXT(m_id, GL_TEXTURE_CUBE_MAP_POSITIVE_X + face, 0, region.x, region.y, region.width, region.height, (GLenum)format, (GLenum)type, ptr);
ptr += (region.width * region.height * 4); //TODO
}
}
break;
@ -1821,6 +1834,12 @@ namespace gl
}
}
void copy_from(const void* src, texture::format format, texture::type type, const pixel_unpack_settings& pixel_settings)
{
const coord3u region = { {}, size3D() };
copy_from(src, format, type, region, pixel_settings);
}
void copy_from(buffer &buf, u32 gl_format_type, u32 offset, u32 length)
{
if (get_target() != target::textureBuffer)
@ -1834,45 +1853,41 @@ namespace gl
copy_from(*view.value(), view.format(), view.offset(), view.range());
}
void copy_from(const buffer& buf, texture::format format, texture::type type, class pixel_unpack_settings pixel_settings)
{
buffer::save_binding_state save_buffer(buffer::target::pixel_unpack, buf);
copy_from(nullptr, format, type, pixel_settings);
}
void copy_from(void* src, texture::format format, texture::type type)
{
copy_from(src, format, type, pixel_unpack_settings());
}
void copy_from(const buffer& buf, texture::format format, texture::type type)
{
copy_from(buf, format, type, pixel_unpack_settings());
}
void copy_to(void* dst, texture::format format, texture::type type, class pixel_pack_settings pixel_settings) const
void copy_to(void* dst, texture::format format, texture::type type, const coord3u& region, const pixel_pack_settings& pixel_settings) const
{
pixel_settings.apply();
if (gl::get_driver_caps().ARB_dsa_supported)
glGetTextureImage(m_id, 0, (GLenum)format, (GLenum)type, m_width * m_height * 16, dst);
const auto& caps = get_driver_caps();
if (!region.x && !region.y && !region.z &&
region.width == m_width && region.height == m_height && region.depth == m_depth)
{
if (caps.ARB_dsa_supported)
glGetTextureImage(m_id, 0, (GLenum)format, (GLenum)type, INT32_MAX, dst);
else
glGetTextureImageEXT(m_id, (GLenum)m_target, 0, (GLenum)format, (GLenum)type, dst);
}
else if (caps.ARB_dsa_supported)
{
glGetTextureSubImage(m_id, 0, region.x, region.y, region.z, region.width, region.height, region.depth,
(GLenum)format, (GLenum)type, INT32_MAX, dst);
}
else
glGetTextureImageEXT(m_id, (GLenum)m_target, 0, (GLenum)format, (GLenum)type, dst);
{
// Worst case scenario. For some reason, EXT_dsa does not have glGetTextureSubImage
const auto target_ = static_cast<GLenum>(m_target);
texture tmp{ target_, region.width, region.height, region.depth, 1, (GLenum)m_internal_format };
glCopyImageSubData(m_id, target_, 0, region.x, region.y, region.z, tmp.id(), target_, 0, 0, 0, 0,
region.width, region.height, region.depth);
const coord3u region2 = { {0, 0, 0}, region.size };
tmp.copy_to(dst, format, type, region2, pixel_settings);
}
}
void copy_to(const buffer& buf, texture::format format, texture::type type, class pixel_pack_settings pixel_settings) const
void copy_to(void* dst, texture::format format, texture::type type, const pixel_pack_settings& pixel_settings) const
{
buffer::save_binding_state save_buffer(buffer::target::pixel_pack, buf);
copy_to(nullptr, format, type, pixel_settings);
}
void copy_to(void* dst, texture::format format, texture::type type) const
{
copy_to(dst, format, type, pixel_pack_settings());
}
void copy_to(const buffer& buf, texture::format format, texture::type type) const
{
copy_to(buf, format, type, pixel_pack_settings());
const coord3u region = { {}, size3D() };
copy_to(dst, format, type, region, pixel_settings);
}
};

View File

@ -480,7 +480,7 @@ namespace gl
gl::texture_view* load_simple_image(rsx::overlays::image_info* desc, bool temp_resource, u32 owner_uid)
{
auto tex = std::make_unique<gl::texture>(GL_TEXTURE_2D, desc->w, desc->h, 1, 1, GL_RGBA8);
tex->copy_from(desc->data, gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8);
tex->copy_from(desc->data, gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8, {});
GLenum remap[] = { GL_RED, GL_ALPHA, GL_BLUE, GL_GREEN };
auto view = std::make_unique<gl::texture_view>(tex.get(), remap);
@ -551,7 +551,7 @@ namespace gl
//Create font file
auto tex = std::make_unique<gl::texture>(GL_TEXTURE_2D, (int)font->width, (int)font->height, 1, 1, GL_R8);
tex->copy_from(font->glyph_data.data(), gl::texture::format::r, gl::texture::type::ubyte);
tex->copy_from(font->glyph_data.data(), gl::texture::format::r, gl::texture::type::ubyte, {});
GLenum remap[] = { GL_RED, GL_RED, GL_RED, GL_RED };
auto view = std::make_unique<gl::texture_view>(tex.get(), remap);

View File

@ -166,6 +166,7 @@ OPENGL_PROC(PFNGLPRIMITIVERESTARTINDEXPROC, PrimitiveRestartIndex);
OPENGL_PROC(PFNGLGETINTEGER64VPROC, GetInteger64v);
OPENGL_PROC(PFNGLGETSTRINGIPROC, GetStringi);
OPENGL_PROC(PFNGLGETINTEGERI_VPROC, GetIntegeri_v);
OPENGL_PROC(PFNGLCHECKFRAMEBUFFERSTATUSPROC, CheckFramebufferStatus);
@ -178,6 +179,7 @@ OPENGL_PROC(PFNGLMULTIDRAWARRAYSPROC, MultiDrawArrays);
OPENGL_PROC(PFNGLGETTEXTUREIMAGEEXTPROC, GetTextureImageEXT);
OPENGL_PROC(PFNGLGETTEXTUREIMAGEPROC, GetTextureImage);
OPENGL_PROC(PFNGLGETTEXTURESUBIMAGEPROC, GetTextureSubImage);
OPENGL_PROC(PFNGLTEXTURESUBIMAGE1DEXTPROC, TextureSubImage1DEXT);
OPENGL_PROC(PFNGLTEXTURESUBIMAGE1DPROC, TextureSubImage1D);
OPENGL_PROC(PFNGLTEXTURESUBIMAGE2DEXTPROC, TextureSubImage2DEXT);

View File

@ -7,7 +7,7 @@
namespace gl
{
static buffer g_typeless_transfer_buffer;
buffer g_typeless_transfer_buffer;
GLenum get_target(rsx::texture_dimension_extended type)
{
@ -775,10 +775,10 @@ namespace gl
}
}
void copy_typeless(texture * dst, const texture * src)
void copy_typeless(texture * dst, const texture * src, const coord3u& dst_region, const coord3u& src_region)
{
GLsizeiptr src_mem = src->pitch() * src->height();
GLsizeiptr dst_mem = dst->pitch() * dst->height();
const u32 src_mem = src->pitch() * src_region.height;
const u32 dst_mem = dst->pitch() * dst_region.height;
auto max_mem = std::max(src_mem, dst_mem);
if (!g_typeless_transfer_buffer || max_mem > g_typeless_transfer_buffer.size())
@ -797,13 +797,13 @@ namespace gl
if (LIKELY(caps.ARB_compute_shader_supported))
{
// Raw copy
src->copy_to(nullptr, (texture::format)pack_info.format, (texture::type)pack_info.type);
src->copy_to(nullptr, (texture::format)pack_info.format, (texture::type)pack_info.type, src_region, {});
}
else
{
pixel_pack_settings pack_settings{};
pack_settings.swap_bytes(pack_info.swap_bytes);
src->copy_to(nullptr, (texture::format)pack_info.format, (texture::type)pack_info.type, pack_settings);
src->copy_to(nullptr, (texture::format)pack_info.format, (texture::type)pack_info.type, src_region, pack_settings);
}
glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE);
@ -850,7 +850,14 @@ namespace gl
}
g_typeless_transfer_buffer.bind(buffer::target::pixel_unpack);
dst->copy_from(nullptr, (texture::format)unpack_info.format, (texture::type)unpack_info.type, unpack_settings);
dst->copy_from(nullptr, (texture::format)unpack_info.format, (texture::type)unpack_info.type, dst_region, unpack_settings);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, GL_NONE);
}
void copy_typeless(texture* dst, const texture* src)
{
const coord3u src_area = { {}, src->size3D() };
const coord3u dst_area = { {}, dst->size3D() };
copy_typeless(dst, src, dst_area, src_area);
}
}

View File

@ -32,7 +32,9 @@ namespace gl
viewable_image* create_texture(u32 gcm_format, u16 width, u16 height, u16 depth, u16 mipmaps, rsx::texture_dimension_extended type);
bool formats_are_bitcast_compatible(GLenum format1, GLenum format2);
void copy_typeless(texture* dst, const texture* src, const coord3u& dst_region, const coord3u& src_region);
void copy_typeless(texture* dst, const texture* src);
/**
* is_swizzled - determines whether input bytes are in morton order
* subresources_layout - descriptor of the mipmap levels in memory
@ -110,4 +112,6 @@ namespace gl
void apply_defaults(GLenum default_filter = GL_NEAREST);
};
extern buffer g_typeless_transfer_buffer;
}

View File

@ -26,7 +26,7 @@ namespace gl
class blitter;
extern GLenum get_sized_internal_format(u32);
extern void copy_typeless(texture*, const texture*);
extern void copy_typeless(texture*, const texture*, const coord3u&, const coord3u&);
extern blitter *g_hw_blitter;
class cached_texture_section;
@ -632,11 +632,17 @@ namespace gl
tmp = std::make_unique<texture>(GL_TEXTURE_2D, convert_w, slice.src->height(), 1, 1, (GLenum)dst_image->get_internal_format());
src_image = tmp.get();
gl::copy_typeless(src_image, slice.src);
// Compute src region in dst format layout
src_x = u16(src_x * src_bpp) / dst_bpp;
src_w = u16(src_w * src_bpp) / dst_bpp;
const u16 src_w2 = u16(src_w * src_bpp) / dst_bpp;
const u16 src_x2 = u16(src_x * src_bpp) / dst_bpp;
const coord3u src_region = { { src_x, src_y, 0 }, { src_w, src_h, 1 } };
const coord3u dst_region = { { src_x2, src_y, 0 }, { src_w2, src_h, 1 } };
gl::copy_typeless(src_image, slice.src, dst_region, src_region);
src_x = src_x2;
src_w = src_w2;
}
if (src_w == slice.dst_w && src_h == slice.dst_h)