gl: Use the GPU to scale textures; use ARB_sampler_object

Improve scaling and separate sampler state from texture state

gl: Unify all texture cache objects under one structure separate by use case
gl: Texture cache fixes

- Acquire lock when finding matching textures
- Account for swizzled surfaces when deciding whether to cpu memcpy
- Handle swizzled images on the GPU
This commit is contained in:
kd-11 2017-03-29 22:27:29 +03:00
parent d94986ff0d
commit 6d6d0e4e36
11 changed files with 880 additions and 370 deletions

View File

@ -408,6 +408,7 @@ void GLGSRender::end()
std::chrono::time_point<steady_clock> textures_start = steady_clock::now();
//Setup textures
//Setting unused texture to 0 is not needed, but makes program validation happy if we choose to enforce it
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
{
int location;
@ -422,6 +423,7 @@ void GLGSRender::end()
{
m_gl_textures[i].set_target(get_gl_target_for_texture(rsx::method_registers.fragment_textures[i]));
__glcheck m_gl_texture_cache.upload_texture(i, rsx::method_registers.fragment_textures[i], m_gl_textures[i], m_rtts);
__glcheck m_gl_sampler_states[i].apply(rsx::method_registers.fragment_textures[i]);
}
}
@ -572,6 +574,12 @@ void GLGSRender::on_init_thread()
if (g_cfg_rsx_overlay)
m_text_printer.init();
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
{
m_gl_sampler_states[i].create();
m_gl_sampler_states[i].bind(i);
}
m_gl_texture_cache.initialize(this);
}
@ -606,6 +614,11 @@ void GLGSRender::on_exit()
tex.remove();
}
for (auto &sampler : m_gl_sampler_states)
{
sampler.remove();
}
m_attrib_ring_buffer->remove();
m_transform_constants_buffer->remove();
m_fragment_constants_buffer->remove();
@ -879,7 +892,7 @@ void GLGSRender::flip(int buffer)
gl::screen.clear(gl::buffers::color_depth_stencil);
__glcheck flip_fbo->blit(gl::screen, screen_area, areai(aspect_ratio).flipped_vertical());
__glcheck flip_fbo->blit(gl::screen, screen_area, areai(aspect_ratio).flipped_vertical(), gl::buffers::color, gl::filter::linear);
if (g_cfg_rsx_overlay)
{
@ -960,7 +973,7 @@ void GLGSRender::do_local_task()
}
}
work_item& GLGSRender::post_flush_request(u32 address, gl::texture_cache::cached_rtt_section *section)
work_item& GLGSRender::post_flush_request(u32 address, gl::texture_cache::cached_texture_section *section)
{
std::lock_guard<std::mutex> lock(queue_guard);
@ -979,3 +992,8 @@ void GLGSRender::synchronize_buffers()
flush_draw_buffers = false;
}
}
bool GLGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate)
{
return m_gl_texture_cache.upload_scaled_image(src, dst, interpolate, m_rtts);
}

View File

@ -9,6 +9,7 @@
#include "define_new_memleakdetect.h"
#include "GLProgramBuffer.h"
#include "GLTextOut.h"
#include "../rsx_cache.h"
#pragma comment(lib, "opengl32.lib")
@ -18,7 +19,7 @@ struct work_item
std::mutex guard_mutex;
u32 address_to_flush = 0;
gl::texture_cache::cached_rtt_section *section_to_flush = nullptr;
gl::texture_cache::cached_texture_section *section_to_flush = nullptr;
volatile bool processed = false;
volatile bool result = false;
@ -57,6 +58,7 @@ private:
rsx::gl::texture m_gl_textures[rsx::limits::fragment_textures_count];
rsx::gl::texture m_gl_vertex_textures[rsx::limits::vertex_textures_count];
gl::sampler_state m_gl_sampler_states[rsx::limits::fragment_textures_count];
gl::glsl::program *m_program;
@ -129,7 +131,9 @@ public:
void set_viewport();
void synchronize_buffers();
work_item& post_flush_request(u32 address, gl::texture_cache::cached_rtt_section *section);
work_item& post_flush_request(u32 address, gl::texture_cache::cached_texture_section *section);
bool scaled_image_from_memory(rsx::blit_src_info& src_info, rsx::blit_dst_info& dst_info, bool interpolate) override;
protected:
void begin() override;

View File

@ -172,6 +172,13 @@ OPENGL_PROC(PFNGLMULTIDRAWARRAYSPROC, MultiDrawArrays);
OPENGL_PROC(PFNGLGETTEXTUREIMAGEEXTPROC, GetTextureImageEXT);
//Sampler Objects
OPENGL_PROC(PFNGLGENSAMPLERSPROC, GenSamplers);
OPENGL_PROC(PFNGLDELETESAMPLERSPROC, DeleteSamplers);
OPENGL_PROC(PFNGLBINDSAMPLERPROC, BindSampler);
OPENGL_PROC(PFNGLSAMPLERPARAMETERIPROC, SamplerParameteri);
OPENGL_PROC(PFNGLSAMPLERPARAMETERFVPROC, SamplerParameterfv);
//Texture Buffers
OPENGL_PROC(PFNGLTEXBUFFERPROC, TexBuffer);
OPENGL_PROC(PFNGLTEXTUREBUFFERRANGEEXTPROC, TextureBufferRangeEXT);

View File

@ -116,7 +116,9 @@ namespace gl
}
// For an address within the texture, extract this sub-section's rect origin
std::tuple<bool, u16, u16> get_texture_subresource(u32 offset)
// Checks whether we need to scale the subresource if it is not handled in shader
// NOTE1: When surface->real_pitch < rsx_pitch, the surface is assumed to have been scaled to fill the rsx_region
std::tuple<bool, u16, u16> get_texture_subresource(u32 offset, bool scale_to_fit)
{
if (!offset)
{
@ -132,9 +134,14 @@ namespace gl
if (!surface_pixel_size)
surface_pixel_size = native_pitch / surface_width;
u32 pixel_offset = (offset / surface_pixel_size);
u32 y = (pixel_offset / surface_width);
u32 x = (pixel_offset % surface_width);
const u32 y = (offset / rsx_pitch);
u32 x = (offset % rsx_pitch) / surface_pixel_size;
if (scale_to_fit)
{
const f32 x_scale = (f32)rsx_pitch / native_pitch;
x = (u32)((f32)x / x_scale);
}
return std::make_tuple(true, (u16)x, (u16)y);
}
@ -291,18 +298,19 @@ struct surface_subresource
bool is_bound = false;
bool is_depth_surface = false;
bool is_clipped = false;
surface_subresource() {}
surface_subresource(gl::render_target *src, u16 X, u16 Y, u16 W, u16 H, bool _Bound, bool _Depth)
: surface(src), x(X), y(Y), w(W), h(H), is_bound(_Bound), is_depth_surface(_Depth)
surface_subresource(gl::render_target *src, u16 X, u16 Y, u16 W, u16 H, bool _Bound, bool _Depth, bool _Clipped = false)
: surface(src), x(X), y(Y), w(W), h(H), is_bound(_Bound), is_depth_surface(_Depth), is_clipped(_Clipped)
{}
};
class gl_render_targets : public rsx::surface_store<gl_render_target_traits>
{
private:
bool surface_overlaps(gl::render_target *surface, u32 surface_address, u32 texaddr, u16 *x, u16 *y)
bool surface_overlaps(gl::render_target *surface, u32 surface_address, u32 texaddr, u16 *x, u16 *y, bool scale_to_fit)
{
bool is_subslice = false;
u16 x_offset = 0;
@ -314,7 +322,7 @@ private:
u32 offset = texaddr - surface_address;
if (offset >= 0)
{
std::tie(is_subslice, x_offset, y_offset) = surface->get_texture_subresource(offset);
std::tie(is_subslice, x_offset, y_offset) = surface->get_texture_subresource(offset, scale_to_fit);
if (is_subslice)
{
*x = x_offset;
@ -354,7 +362,7 @@ private:
}
public:
surface_subresource get_surface_subresource_if_applicable(u32 texaddr, u16 requested_width, u16 requested_height, u16 requested_pitch)
surface_subresource get_surface_subresource_if_applicable(u32 texaddr, u16 requested_width, u16 requested_height, u16 requested_pitch, bool scale_to_fit =false, bool crop=false)
{
gl::render_target *surface = nullptr;
bool is_subslice = false;
@ -366,21 +374,35 @@ public:
u32 this_address = std::get<0>(tex_info);
surface = std::get<1>(tex_info).get();
if (surface_overlaps(surface, this_address, texaddr, &x_offset, &y_offset))
if (surface_overlaps(surface, this_address, texaddr, &x_offset, &y_offset, scale_to_fit))
{
if (surface->get_rsx_pitch() != requested_pitch)
continue;
auto dims = surface->get_dimensions();
if (scale_to_fit)
{
f32 pitch_scaling = (f32)requested_pitch / surface->get_native_pitch();
requested_width /= pitch_scaling;
}
if (fits(surface, dims, x_offset, y_offset, requested_width, requested_height))
return{ surface, x_offset, y_offset, requested_width, requested_height, is_bound(this_address, false), false };
else
{
if (crop) //Forcefully fit the requested region by clipping and scaling
{
u16 remaining_width = dims.first - x_offset;
u16 remaining_height = dims.second - y_offset;
return{ surface, x_offset, y_offset, remaining_width, remaining_height, is_bound(this_address, false), false, true };
}
if (dims.first >= requested_width && dims.second >= requested_height)
{
LOG_WARNING(RSX, "Overlapping surface exceeds bounds; returning full surface region");
return{ surface, 0, 0, requested_width, requested_height, is_bound(this_address, false), false };
return{ surface, 0, 0, requested_width, requested_height, is_bound(this_address, false), false, true };
}
}
}
@ -392,21 +414,35 @@ public:
u32 this_address = std::get<0>(tex_info);
surface = std::get<1>(tex_info).get();
if (surface_overlaps(surface, this_address, texaddr, &x_offset, &y_offset))
if (surface_overlaps(surface, this_address, texaddr, &x_offset, &y_offset, scale_to_fit))
{
if (surface->get_rsx_pitch() != requested_pitch)
continue;
auto dims = surface->get_dimensions();
if (scale_to_fit)
{
f32 pitch_scaling = (f32)requested_pitch / surface->get_native_pitch();
requested_width /= pitch_scaling;
}
if (fits(surface, dims, x_offset, y_offset, requested_width, requested_height))
return{ surface, x_offset, y_offset, requested_width, requested_height, is_bound(this_address, true), true };
else
{
if (crop) //Forcefully fit the requested region by clipping and scaling
{
u16 remaining_width = dims.first - x_offset;
u16 remaining_height = dims.second - y_offset;
return{ surface, x_offset, y_offset, remaining_width, remaining_height, is_bound(this_address, true), true, true };
}
if (dims.first >= requested_width && dims.second >= requested_height)
{
LOG_WARNING(RSX, "Overlapping depth surface exceeds bounds; returning full surface region");
return{ surface, 0, 0, requested_width, requested_height, is_bound(this_address, true), true };
return{ surface, 0, 0, requested_width, requested_height, is_bound(this_address, true), true, true };
}
}
}

View File

@ -67,6 +67,116 @@ namespace gl
}
fmt::throw_exception("Compressed or unknown texture format 0x%x" HERE, texture_format);
}
GLenum wrap_mode(rsx::texture_wrap_mode wrap)
{
switch (wrap)
{
case rsx::texture_wrap_mode::wrap: return GL_REPEAT;
case rsx::texture_wrap_mode::mirror: return GL_MIRRORED_REPEAT;
case rsx::texture_wrap_mode::clamp_to_edge: return GL_CLAMP_TO_EDGE;
case rsx::texture_wrap_mode::border: return GL_CLAMP_TO_BORDER;
case rsx::texture_wrap_mode::clamp: return GL_CLAMP_TO_EDGE;
case rsx::texture_wrap_mode::mirror_once_clamp_to_edge: return GL_MIRROR_CLAMP_TO_EDGE_EXT;
case rsx::texture_wrap_mode::mirror_once_border: return GL_MIRROR_CLAMP_TO_BORDER_EXT;
case rsx::texture_wrap_mode::mirror_once_clamp: return GL_MIRROR_CLAMP_EXT;
}
LOG_ERROR(RSX, "Texture wrap error: bad wrap (%d)", (u32)wrap);
return GL_REPEAT;
}
float max_aniso(rsx::texture_max_anisotropy aniso)
{
switch (aniso)
{
case rsx::texture_max_anisotropy::x1: return 1.0f;
case rsx::texture_max_anisotropy::x2: return 2.0f;
case rsx::texture_max_anisotropy::x4: return 4.0f;
case rsx::texture_max_anisotropy::x6: return 6.0f;
case rsx::texture_max_anisotropy::x8: return 8.0f;
case rsx::texture_max_anisotropy::x10: return 10.0f;
case rsx::texture_max_anisotropy::x12: return 12.0f;
case rsx::texture_max_anisotropy::x16: return 16.0f;
}
LOG_ERROR(RSX, "Texture anisotropy error: bad max aniso (%d)", (u32)aniso);
return 1.0f;
}
int tex_min_filter(rsx::texture_minify_filter min_filter)
{
switch (min_filter)
{
case rsx::texture_minify_filter::nearest: return GL_NEAREST;
case rsx::texture_minify_filter::linear: return GL_LINEAR;
case rsx::texture_minify_filter::nearest_nearest: return GL_NEAREST_MIPMAP_NEAREST;
case rsx::texture_minify_filter::linear_nearest: return GL_LINEAR_MIPMAP_NEAREST;
case rsx::texture_minify_filter::nearest_linear: return GL_NEAREST_MIPMAP_LINEAR;
case rsx::texture_minify_filter::linear_linear: return GL_LINEAR_MIPMAP_LINEAR;
case rsx::texture_minify_filter::convolution_min: return GL_LINEAR_MIPMAP_LINEAR;
}
fmt::throw_exception("Unknow min filter" HERE);
}
int tex_mag_filter(rsx::texture_magnify_filter mag_filter)
{
switch (mag_filter)
{
case rsx::texture_magnify_filter::nearest: return GL_NEAREST;
case rsx::texture_magnify_filter::linear: return GL_LINEAR;
case rsx::texture_magnify_filter::convolution_mag: return GL_LINEAR;
}
fmt::throw_exception("Unknow mag filter" HERE);
}
//Apply sampler state settings
void sampler_state::apply(rsx::fragment_texture& tex)
{
const f32 border_color = (f32)tex.border_color() / 255;
const f32 border_color_array[] = { border_color, border_color, border_color, border_color };
glSamplerParameteri(samplerHandle, GL_TEXTURE_WRAP_S, wrap_mode(tex.wrap_s()));
glSamplerParameteri(samplerHandle, GL_TEXTURE_WRAP_T, wrap_mode(tex.wrap_t()));
glSamplerParameteri(samplerHandle, GL_TEXTURE_WRAP_R, wrap_mode(tex.wrap_r()));
glSamplerParameterfv(samplerHandle, GL_TEXTURE_BORDER_COLOR, border_color_array);
if (tex.get_exact_mipmap_count() <= 1)
{
GLint min_filter = tex_min_filter(tex.min_filter());
if (min_filter != GL_LINEAR && min_filter != GL_NEAREST)
{
switch (min_filter)
{
case GL_NEAREST_MIPMAP_NEAREST:
case GL_NEAREST_MIPMAP_LINEAR:
min_filter = GL_NEAREST; break;
case GL_LINEAR_MIPMAP_NEAREST:
case GL_LINEAR_MIPMAP_LINEAR:
min_filter = GL_LINEAR; break;
default:
LOG_ERROR(RSX, "No mipmap fallback defined for rsx_min_filter = 0x%X", (u32)tex.min_filter());
min_filter = GL_NEAREST;
}
}
glSamplerParameteri(samplerHandle, GL_TEXTURE_MIN_FILTER, min_filter);
glSamplerParameteri(samplerHandle, GL_TEXTURE_LOD_BIAS, 0.);
glSamplerParameteri(samplerHandle, GL_TEXTURE_MIN_LOD, 0);
glSamplerParameteri(samplerHandle, GL_TEXTURE_MAX_LOD, 0);
}
else
{
glSamplerParameteri(samplerHandle, GL_TEXTURE_MIN_FILTER, tex_min_filter(tex.min_filter()));
glSamplerParameteri(samplerHandle, GL_TEXTURE_LOD_BIAS, tex.bias());
glSamplerParameteri(samplerHandle, GL_TEXTURE_MIN_LOD, (tex.min_lod() >> 8));
glSamplerParameteri(samplerHandle, GL_TEXTURE_MAX_LOD, (tex.max_lod() >> 8));
}
glSamplerParameteri(samplerHandle, GL_TEXTURE_MAG_FILTER, tex_mag_filter(tex.mag_filter()));
glSamplerParameteri(samplerHandle, GL_TEXTURE_MAX_ANISOTROPY_EXT, ::gl::max_aniso(tex.max_aniso()));
}
}
namespace
@ -182,32 +292,6 @@ namespace rsx
{
namespace gl
{
int gl_tex_min_filter(rsx::texture_minify_filter min_filter)
{
switch (min_filter)
{
case rsx::texture_minify_filter::nearest: return GL_NEAREST;
case rsx::texture_minify_filter::linear: return GL_LINEAR;
case rsx::texture_minify_filter::nearest_nearest: return GL_NEAREST_MIPMAP_NEAREST;
case rsx::texture_minify_filter::linear_nearest: return GL_LINEAR_MIPMAP_NEAREST;
case rsx::texture_minify_filter::nearest_linear: return GL_NEAREST_MIPMAP_LINEAR;
case rsx::texture_minify_filter::linear_linear: return GL_LINEAR_MIPMAP_LINEAR;
case rsx::texture_minify_filter::convolution_min: return GL_LINEAR_MIPMAP_LINEAR;
}
fmt::throw_exception("Unknow min filter" HERE);
}
int gl_tex_mag_filter(rsx::texture_magnify_filter mag_filter)
{
switch (mag_filter)
{
case rsx::texture_magnify_filter::nearest: return GL_NEAREST;
case rsx::texture_magnify_filter::linear: return GL_LINEAR;
case rsx::texture_magnify_filter::convolution_mag: return GL_LINEAR;
}
fmt::throw_exception("Unknow mag filter" HERE);
}
static const int gl_tex_zfunc[] =
{
GL_NEVER,
@ -230,42 +314,6 @@ namespace rsx
glGenTextures(1, &m_id);
}
int texture::gl_wrap(rsx::texture_wrap_mode wrap)
{
switch (wrap)
{
case rsx::texture_wrap_mode::wrap: return GL_REPEAT;
case rsx::texture_wrap_mode::mirror: return GL_MIRRORED_REPEAT;
case rsx::texture_wrap_mode::clamp_to_edge: return GL_CLAMP_TO_EDGE;
case rsx::texture_wrap_mode::border: return GL_CLAMP_TO_BORDER;
case rsx::texture_wrap_mode::clamp: return GL_CLAMP_TO_EDGE;
case rsx::texture_wrap_mode::mirror_once_clamp_to_edge: return GL_MIRROR_CLAMP_TO_EDGE_EXT;
case rsx::texture_wrap_mode::mirror_once_border: return GL_MIRROR_CLAMP_TO_BORDER_EXT;
case rsx::texture_wrap_mode::mirror_once_clamp: return GL_MIRROR_CLAMP_EXT;
}
LOG_ERROR(RSX, "Texture wrap error: bad wrap (%d)", (u32)wrap);
return GL_REPEAT;
}
float texture::max_aniso(rsx::texture_max_anisotropy aniso)
{
switch (aniso)
{
case rsx::texture_max_anisotropy::x1: return 1.0f;
case rsx::texture_max_anisotropy::x2: return 2.0f;
case rsx::texture_max_anisotropy::x4: return 4.0f;
case rsx::texture_max_anisotropy::x6: return 6.0f;
case rsx::texture_max_anisotropy::x8: return 8.0f;
case rsx::texture_max_anisotropy::x10: return 10.0f;
case rsx::texture_max_anisotropy::x12: return 12.0f;
case rsx::texture_max_anisotropy::x16: return 16.0f;
}
LOG_ERROR(RSX, "Texture anisotropy error: bad max aniso (%d)", (u32)aniso);
return 1.0f;
}
u16 texture::get_pitch_modifier(u32 format)
{
switch (format)
@ -535,49 +583,7 @@ namespace rsx
__glcheck glTexParameteri(m_target, GL_TEXTURE_SWIZZLE_G, remap_values[2]);
__glcheck glTexParameteri(m_target, GL_TEXTURE_SWIZZLE_B, remap_values[3]);
__glcheck glTexParameteri(m_target, GL_TEXTURE_WRAP_S, gl_wrap(tex.wrap_s()));
__glcheck glTexParameteri(m_target, GL_TEXTURE_WRAP_T, gl_wrap(tex.wrap_t()));
__glcheck glTexParameteri(m_target, GL_TEXTURE_WRAP_R, gl_wrap(tex.wrap_r()));
if (tex.get_exact_mipmap_count() <= 1 || m_target == GL_TEXTURE_RECTANGLE)
{
GLint min_filter = gl_tex_min_filter(tex.min_filter());
if (min_filter != GL_LINEAR && min_filter != GL_NEAREST)
{
LOG_WARNING(RSX, "Texture %d, target 0x%x, requesting mipmap filtering without any mipmaps set!", m_id, m_target);
switch (min_filter)
{
case GL_NEAREST_MIPMAP_NEAREST:
case GL_NEAREST_MIPMAP_LINEAR:
min_filter = GL_NEAREST; break;
case GL_LINEAR_MIPMAP_NEAREST:
case GL_LINEAR_MIPMAP_LINEAR:
min_filter = GL_LINEAR; break;
default:
LOG_ERROR(RSX, "No mipmap fallback defined for rsx_min_filter = 0x%X", (u32)tex.min_filter());
min_filter = GL_NEAREST;
}
}
__glcheck glTexParameteri(m_target, GL_TEXTURE_MIN_FILTER, min_filter);
__glcheck glTexParameterf(m_target, GL_TEXTURE_LOD_BIAS, 0.);
__glcheck glTexParameteri(m_target, GL_TEXTURE_MIN_LOD, 0);
__glcheck glTexParameteri(m_target, GL_TEXTURE_MAX_LOD, 0);
}
else
{
__glcheck glTexParameteri(m_target, GL_TEXTURE_MIN_FILTER, gl_tex_min_filter(tex.min_filter()));
__glcheck glTexParameterf(m_target, GL_TEXTURE_LOD_BIAS, tex.bias());
__glcheck glTexParameteri(m_target, GL_TEXTURE_MIN_LOD, (tex.min_lod() >> 8));
__glcheck glTexParameteri(m_target, GL_TEXTURE_MAX_LOD, (tex.max_lod() >> 8));
}
__glcheck glTexParameteri(m_target, GL_TEXTURE_MAG_FILTER, gl_tex_mag_filter(tex.mag_filter()));
__glcheck glTexParameterf(m_target, GL_TEXTURE_MAX_ANISOTROPY_EXT, max_aniso(tex.max_aniso()));
//The rest of sampler state is now handled by sampler state objects
}
void texture::init(int index, rsx::vertex_texture& tex)

View File

@ -5,7 +5,42 @@ namespace rsx
{
class vertex_texture;
class fragment_texture;
}
namespace gl
{
GLenum get_sized_internal_format(u32 gcm_format);
std::tuple<GLenum, GLenum> get_format_type(u32 texture_format);
GLenum wrap_mode(rsx::texture_wrap_mode wrap);
float max_aniso(rsx::texture_max_anisotropy aniso);
class sampler_state
{
GLuint samplerHandle = 0;
public:
void create()
{
glGenSamplers(1, &samplerHandle);
}
void remove()
{
glDeleteSamplers(1, &samplerHandle);
}
void bind(int index)
{
glBindSampler(index, samplerHandle);
}
void apply(rsx::fragment_texture& tex);
};
}
namespace rsx
{
namespace gl
{
class texture
@ -16,28 +51,6 @@ namespace rsx
public:
void create();
int gl_wrap(rsx::texture_wrap_mode in);
float max_aniso(rsx::texture_max_anisotropy aniso);
inline static u8 convert_4_to_8(u8 v)
{
// Swizzle bits: 00001234 -> 12341234
return (v << 4) | (v);
}
inline static u8 convert_5_to_8(u8 v)
{
// Swizzle bits: 00012345 -> 12345123
return (v << 3) | (v >> 2);
}
inline static u8 convert_6_to_8(u8 v)
{
// Swizzle bits: 00123456 -> 12345612
return (v << 2) | (v >> 4);
}
void init(int index, rsx::fragment_texture& tex);
void init(int index, rsx::vertex_texture& tex);
@ -64,9 +77,3 @@ namespace rsx
};
}
}
namespace gl
{
GLenum get_sized_internal_format(u32 gcm_format);
std::tuple<GLenum, GLenum> get_format_type(u32 texture_format);
}

View File

@ -9,25 +9,25 @@ namespace gl
{
bool texture_cache::flush_section(u32 address)
{
if (address < rtt_cache_range.first ||
address >= rtt_cache_range.second)
if (address < no_access_range.first ||
address >= no_access_range.second)
return false;
bool post_task = false;
cached_rtt_section* section_to_post = nullptr;
cached_texture_section* section_to_post = nullptr;
{
std::lock_guard<std::mutex> lock(m_section_mutex);
for (cached_rtt_section &rtt : m_rtt_cache)
for (cached_texture_section &tex : no_access_memory_sections)
{
if (rtt.is_dirty()) continue;
if (tex.is_dirty()) continue;
if (rtt.is_locked() && rtt.overlaps(address))
if (tex.is_locked() && tex.overlaps(address))
{
if (rtt.is_flushed())
if (tex.is_flushed())
{
LOG_WARNING(RSX, "Section matches range, but marked as already flushed!, 0x%X+0x%X", rtt.get_section_base(), rtt.get_section_size());
LOG_WARNING(RSX, "Section matches range, but marked as already flushed!, 0x%X+0x%X", tex.get_section_base(), tex.get_section_size());
continue;
}
@ -36,11 +36,11 @@ namespace gl
if (std::this_thread::get_id() != m_renderer_thread)
{
post_task = true;
section_to_post = &rtt;
section_to_post = &tex;
break;
}
rtt.flush();
tex.flush();
return true;
}
}

File diff suppressed because it is too large Load Diff

View File

@ -2,6 +2,7 @@
#include <rsx_decompiler.h>
#include "Utilities/VirtualMemory.h"
#include "Emu/Memory/vm.h"
#include "gcm_enums.h"
namespace rsx
{
@ -37,14 +38,18 @@ namespace rsx
u16 offset_y;
u16 width;
u16 height;
u16 slice;
u16 slice_h;
u16 pitch;
void *pixels;
u32 rsx_address;
};
struct blit_dst_info
{
blit_engine::transfer_destination_format format;
u16 offset_x;
u16 offset_y;
u16 width;
u16 height;
u16 pitch;
@ -52,8 +57,11 @@ namespace rsx
u16 clip_y;
u16 clip_width;
u16 clip_height;
bool swizzled;
void *pixels;
u32 rsx_address;
};
class shaders_cache
@ -107,26 +115,9 @@ namespace rsx
bool locked = false;
bool dirty = false;
bool region_overlaps(u32 base1, u32 limit1, u32 base2, u32 limit2)
inline bool region_overlaps(u32 base1, u32 limit1, u32 base2, u32 limit2)
{
//Check for memory area overlap. unlock page(s) if needed and add this index to array.
//Axis separation test
const u32 &block_start = base1;
const u32 block_end = limit1;
if (limit2 < block_start) return false;
if (base2 > block_end) return false;
u32 min_separation = (limit2 - base2) + (limit1 - base1);
u32 range_limit = (block_end > limit2) ? block_end : limit2;
u32 range_base = (block_start < base2) ? block_start : base2;
u32 actual_separation = (range_limit - range_base);
if (actual_separation < min_separation)
return true;
return false;
return (base1 < limit2 && base2 < limit1);
}
public:
@ -171,6 +162,19 @@ namespace rsx
return (locked_address_base <= address && (address - locked_address_base) < locked_address_range);
}
/**
* Check if range overlaps with this section.
* ignore_protection_range - if true, the test should not check against the aligned protection range, instead
* tests against actual range of contents in memory
*/
bool overlaps(std::pair<u32, u32> range, bool ignore_protection_range)
{
if (!ignore_protection_range)
return region_overlaps(locked_address_base, locked_address_base + locked_address_range, range.first, range.first + range.second);
else
return region_overlaps(cpu_address_base, cpu_address_base + cpu_address_range, range.first, range.first + range.second);
}
/**
* Check if the page containing the address tramples this section. Also compares a former trampled page range to compare
* If true, returns the range <min, max> with updated invalid range

View File

@ -573,29 +573,41 @@ namespace rsx
}
}
blit_src_info src_info;
blit_dst_info dst_info;
if (dst_dma == CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER)
{
//For now, only use this for actual scaled images, there are use cases that should not go through 3d engine, e.g program ucode transfer
//TODO: Figure out more instances where we can use this without problems
src_info.format = src_color_format;
src_info.width = in_w;
src_info.height = in_h;
src_info.pitch = in_pitch;
src_info.slice = slice_h;
src_info.pixels = pixels_src;
blit_src_info src_info;
blit_dst_info dst_info;
dst_info.format = dst_color_format;
dst_info.width = convert_w;
dst_info.height = convert_h;
dst_info.clip_x = clip_x;
dst_info.clip_y = clip_y;
dst_info.clip_width = clip_w;
dst_info.clip_height = clip_h;
dst_info.pitch = in_pitch;
dst_info.pixels = pixels_dst;
dst_info.swizzled = (method_registers.blit_engine_context_surface() == blit_engine::context_surface::swizzle2d);
src_info.format = src_color_format;
src_info.width = in_w;
src_info.height = in_h;
src_info.pitch = in_pitch;
src_info.slice_h = slice_h;
src_info.offset_x = in_x;
src_info.offset_y = in_y;
src_info.pixels = pixels_src;
src_info.rsx_address = get_address(src_offset, src_dma);
if (rsx->scaled_image_from_memory(src_info, dst_info, in_inter == blit_engine::transfer_interpolator::foh))
return;
dst_info.format = dst_color_format;
dst_info.width = convert_w;
dst_info.height = convert_h;
dst_info.clip_x = clip_x;
dst_info.clip_y = clip_y;
dst_info.clip_width = clip_w;
dst_info.clip_height = clip_h;
dst_info.offset_x = out_x;
dst_info.offset_y = out_y;
dst_info.pitch = out_pitch;
dst_info.pixels = pixels_dst;
dst_info.rsx_address = get_address(dst_offset, dst_dma);
dst_info.swizzled = (method_registers.blit_engine_context_surface() == blit_engine::context_surface::swizzle2d);
if (rsx->scaled_image_from_memory(src_info, dst_info, in_inter == blit_engine::transfer_interpolator::foh))
return;
}
if (method_registers.blit_engine_context_surface() != blit_engine::context_surface::swizzle2d)
{

View File

@ -30,12 +30,15 @@ namespace rsx
void clip_image(u8 *dst, const u8 *src, int clip_x, int clip_y, int clip_w, int clip_h, int bpp, int src_pitch, int dst_pitch)
{
u8 *pixels_src = (u8*)src + clip_y * src_pitch + clip_x * bpp;
u8 *pixels_dst = dst;
const u32 row_length = clip_w * bpp;
for (int y = 0; y < clip_h; ++y)
{
u8 *dst_row = dst + y * dst_pitch;
const u8 *src_row = src + (y + clip_y) * src_pitch + clip_x * bpp;
std::memmove(dst_row, src_row, clip_w * bpp);
std::memmove(pixels_dst, pixels_src, row_length);
pixels_src += src_pitch;
pixels_dst += dst_pitch;
}
}