Implemented writing swizzled textures

Optimized protected_region::combine
Implemented conditional async mode for nv3089::image_in & nv0039::buffer_notify
This commit is contained in:
DHrpcs3 2016-03-15 23:00:05 +03:00
parent 53e3833aa1
commit 82a32fcb5a
6 changed files with 173 additions and 32 deletions

View File

@ -57,10 +57,11 @@ public:
m_end = m_begin + value;
return *this;
}
void extend(const range& other)
{
m_begin = std::min(m_begin, other.m_begin);
m_end = std::min(m_end, other.m_end);
m_end = std::max(m_end, other.m_end);
}
constexpr bool valid() const

View File

@ -1059,7 +1059,6 @@ bool nv3089_image_in(u32 arg, GLGSRender* renderer)
return false;
}
u32 in_bpp = src_color_format == CELL_GCM_TRANSFER_SCALE_FORMAT_R5G6B5 ? 2 : 4; // bytes per pixel
u32 out_bpp = dst_color_format == CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5 ? 2 : 4;
@ -1204,6 +1203,15 @@ bool nv3089_image_in(u32 arg, GLGSRender* renderer)
dst_info.swizzled = context_surface == CELL_GCM_CONTEXT_SWIZZLE2D;
if (dst_info.swizzled)
{
u8 sw_width_log2 = rsx::method_registers[NV309E_SET_FORMAT] >> 16;
u8 sw_height_log2 = rsx::method_registers[NV309E_SET_FORMAT] >> 24;
dst_info.log2_width = sw_width_log2 ? sw_width_log2 : 1;
dst_info.log2_height = sw_height_log2 ? sw_height_log2 : 1;
}
switch (dst_color_format)
{
case CELL_GCM_TRANSFER_SURFACE_FORMAT_R5G6B5:
@ -1443,6 +1451,7 @@ gl::texture_info surface_info(rsx::thread &rsx, rsx::surface_color_format format
{
gl::texture_info info{};
info.format = gl::get_texture_format(surface_format_to_texture_format(format));
info.format.flags &= gl::texture_flags::allow_swizzle;
rsx::tiled_region region = rsx.get_tiled_address(offset, location);
@ -1510,24 +1519,29 @@ void GLGSRender::init_buffers(bool skip_reading)
m_surface.width = clip_width * m_surface.width_mult + clip_x;
m_surface.height = clip_height * m_surface.height_mult + clip_y;
bool swizzled_surface = m_surface.type == CELL_GCM_SURFACE_SWIZZLE;
rsx::for_each_active_color_surface([&](int index)
{
u32 offset = rsx::method_registers[mr_color_offset[index]];
u32 location = rsx::method_registers[mr_color_dma[index]];
u32 pitch = rsx::method_registers[mr_color_pitch[index]];
bool swizzled = m_surface.type == CELL_GCM_SURFACE_SWIZZLE;
gl::texture_info info = surface_info(*this, m_surface.color_format, offset, location, m_surface.width, m_surface.height, pitch);
info.swizzled = swizzled;
info.swizzled = swizzled_surface;
if (swizzled_surface)
{
info.log2_width = m_surface.log2width;
info.log2_height = m_surface.log2height;
}
cached_color_buffers[index] = &texture_cache.entry(info, skip_reading ? gl::cache_buffers::none : gl::cache_buffers::local);
draw_fbo.color[index] = cached_color_buffers[index]->view();
});
{
u32 offset = rsx::method_registers[NV4097_SET_SURFACE_ZETA_OFFSET];
u32 location = rsx::method_registers[NV4097_SET_CONTEXT_DMA_ZETA];
u32 pitch = rsx::method_registers[NV4097_SET_SURFACE_PITCH_Z] & ~63;
int bpp;
@ -1542,13 +1556,16 @@ void GLGSRender::init_buffers(bool skip_reading)
break;
}
if (pitch && pitch < bpp * m_surface.width)
if (swizzled_surface || (pitch && pitch < bpp * m_surface.width))
{
__glcheck draw_fbo.depth_stencil = null_texture;
cached_depth_buffer = nullptr;
}
else
{
u32 offset = rsx::method_registers[NV4097_SET_SURFACE_ZETA_OFFSET];
u32 location = rsx::method_registers[NV4097_SET_CONTEXT_DMA_ZETA];
if (!pitch)
{
pitch = m_surface.width * bpp;

View File

@ -333,19 +333,59 @@ namespace gl
}
else
{
if (info.swizzled && (info.format.flags & texture_flags::allow_swizzle) != texture_flags::none)
{
//TODO
LOG_ERROR(RSX, "writing swizzled texture[0x%x] to host buffer", info.start_address);
}
gl::pixel_pack_settings{}
.row_length(info.pitch / info.format.bpp)
.aligment(1)
.swap_bytes((info.format.flags & gl::texture_flags::swap_bytes) != gl::texture_flags::none)
.apply();
__glcheck glGetTexImage((GLenum)info.target, 0, (GLenum)info.format.format, (GLenum)info.format.type, vm::base_priv(info.start_address));
if (info.swizzled && (info.format.flags & texture_flags::allow_swizzle) != texture_flags::none)
{
std::unique_ptr<u8[]> linear_pixels(new u8[info.size()]);
__glcheck glGetTexImage((GLenum)info.target, 0, (GLenum)info.format.format, (GLenum)info.format.type, linear_pixels.get());
u16 sw_width = 1 << info.log2_width;
u16 sw_height = 1 << info.log2_height;
// Check and pad texture out if we are given non square texture for swizzle to be correct
if (sw_width != info.width || sw_height != info.height)
{
std::unique_ptr<u8[]> sw_temp(new u8[info.format.bpp * sw_width * sw_height]);
switch (info.format.bpp)
{
case 1:
rsx::pad_texture<u8>(linear_pixels.get(), sw_temp.get(), info.width, info.height, sw_width, sw_height);
break;
case 2:
rsx::pad_texture<u16>(linear_pixels.get(), sw_temp.get(), info.width, info.height, sw_width, sw_height);
break;
case 4:
rsx::pad_texture<u32>(linear_pixels.get(), sw_temp.get(), info.width, info.height, sw_width, sw_height);
break;
}
linear_pixels = std::move(sw_temp);
}
switch (info.format.bpp)
{
case 1:
rsx::convert_linear_swizzle<u8>(linear_pixels.get(), vm::base_priv(info.start_address), sw_width, sw_height, false);
break;
case 2:
rsx::convert_linear_swizzle<u16>(linear_pixels.get(), vm::base_priv(info.start_address), sw_width, sw_height, false);
break;
case 4:
rsx::convert_linear_swizzle<u32>(linear_pixels.get(), vm::base_priv(info.start_address), sw_width, sw_height, false);
break;
}
}
else
{
__glcheck glGetTexImage((GLenum)info.target, 0, (GLenum)info.format.format, (GLenum)info.format.type, vm::base_priv(info.start_address));
}
}
ignore(gl::cache_buffers::all);
@ -590,20 +630,93 @@ namespace gl
//TODO
}
void set_page_protection(range<u32> range, u8 protect)
{
vm::page_protect(range.begin(), range.size(), 0, ~protect & (vm::page_readable | vm::page_writable), protect);
}
void protected_region::combine(protected_region& region)
{
region.unprotect();
unprotect();
cache_access new_protection = region.requires_protection();
for (auto &texture : region.m_textures)
{
texture.second.parent(this);
if (!m_textures.emplace(texture).second)
{
throw EXCEPTION("");
}
new_protection |= texture.second.requires_protection();
}
u8 new_protection_flags = 0;
if ((new_protection & cache_access::read) != cache_access::none)
{
new_protection_flags |= vm::page_readable;
}
if ((new_protection & cache_access::write) != cache_access::none)
{
new_protection_flags |= vm::page_writable;
}
if (m_current_protection != new_protection_flags && region.m_current_protection != new_protection_flags)
{
if (begin() < region.begin())
{
set_page_protection({ begin(), region.end() }, new_protection_flags);
}
else
{
set_page_protection({ region.begin(), end() }, new_protection_flags);
}
}
else if (m_current_protection != new_protection_flags)
{
if (begin() < region.begin())
{
set_page_protection({ begin(), region.begin() }, new_protection_flags);
}
else
{
set_page_protection({ region.end(), end() }, new_protection_flags);
}
}
else if (region.m_current_protection != new_protection_flags)
{
if (begin() < region.begin())
{
set_page_protection({ end(), region.end() }, new_protection_flags);
}
else
{
set_page_protection({ region.begin(), begin() }, new_protection_flags);
}
}
else
{
if (begin() < region.begin())
{
if (u32 diff = region.begin() - end())
{
set_page_protection({ end(), end() + diff }, new_protection_flags);
}
}
else
{
if (u32 diff = begin() - region.end())
{
set_page_protection({ region.end(), region.end() + diff }, new_protection_flags);
}
set_page_protection({ region.begin(), begin() }, new_protection_flags);
}
}
m_current_protection = new_protection_flags;
extend(region);
}
@ -667,7 +780,7 @@ namespace gl
if (!aligned_size)
{
aligned_range.begin(info.start_address & ~(vm::page_size - 1));
aligned_range.size(align(info.size() + info.start_address - aligned_range.begin(), vm::page_size));
aligned_range.size(align(info.start_address - aligned_range.begin() + info.size(), vm::page_size));
}
else
{

View File

@ -70,6 +70,8 @@ namespace gl
bool swizzled;
float lod_bias;
u32 start_address;
u32 log2_width;
u32 log2_height;
u32 size() const
{
@ -126,7 +128,7 @@ namespace gl
{
private:
std::unordered_map<texture_info, cached_texture, fnv_1a_hasher, bitwise_equals> m_textures;
u32 m_current_protection = 0;
u8 m_current_protection = 0;
public:
cache_access requires_protection() const;

View File

@ -186,6 +186,8 @@ void rsx::gl_texture::bind(gl::texture_cache& cache, rsx::texture& tex)
info.lod_bias = tex.bias();
}
gl::texture_flags flags = gl::texture_flags::none;
if (is_compressed)
{
info.format.type = gl::texture::type::ubyte;
@ -226,15 +228,18 @@ void rsx::gl_texture::bind(gl::texture_cache& cache, rsx::texture& tex)
throw EXCEPTION("unimplemented texture format 0x%x", format);
}
info.swizzled = info.swizzled && (found->second.flags & gl::texture_flags::allow_swizzle) != gl::texture_flags::none;
info.format = found->second;
info.pitch = std::max(info.width * info.format.bpp, tex.pitch());
flags = found->second.flags;
info.format.flags &= gl::texture_flags::allow_swizzle;
remap = info.format.remap.data();
}
__glcheck cache.entry(info, gl::cache_buffers::local).bind(tex.index());
if ((info.format.flags & gl::texture_flags::allow_remap) != gl::texture_flags::none)
if ((flags & gl::texture_flags::allow_remap) != gl::texture_flags::none)
{
u8 remap_a = tex.remap() & 0x3;
u8 remap_r = (tex.remap() >> 2) & 0x3;

View File

@ -29,15 +29,22 @@ namespace rsx
}
};
force_inline void async_operation(std::function<void()> function)
force_inline void async_operation(bool call_async, std::function<void()> function)
{
++operations_in_progress;
std::thread([function = std::move(function)]()
if (call_async)
{
++operations_in_progress;
std::thread([function = std::move(function)]()
{
scoped_operation operation;
function();
}).detach();
}
else
{
scoped_operation operation;
function();
}).detach();
}
}
std::vector<std::shared_ptr<thread_ctrl>> threads_storage;
@ -493,7 +500,7 @@ namespace rsx
sw_height_log2 = 1;
}
async_operation([=]
async_operation(need_clip || need_convert || (in_w + in_h) > 128, [=]
{
u8* pixels_src = src_region.tile ? src_region.ptr + src_region.base : src_region.ptr;
u8* pixels_dst = vm::ps3::_ptr<u8>(dst_address + out_offset);
@ -592,10 +599,8 @@ namespace rsx
u16 sw_width = 1 << sw_width_log2;
u16 sw_height = 1 << sw_height_log2;
temp2.reset(new u8[out_bpp * sw_width * sw_height]);
u8* linear_pixels = pixels_src;
u8* swizzled_pixels = temp2.get();
u8* swizzled_pixels = pixels_dst;
std::unique_ptr<u8[]> sw_temp;
@ -632,8 +637,6 @@ namespace rsx
convert_linear_swizzle<u32>(linear_pixels, swizzled_pixels, sw_width, sw_height, false);
break;
}
std::memcpy(pixels_dst, swizzled_pixels, out_bpp * sw_width * sw_height);
}
});
}
@ -674,7 +677,7 @@ namespace rsx
u32 dst_offset = method_registers[NV0039_OFFSET_OUT];
u32 dst_dma = method_registers[NV0039_SET_CONTEXT_DMA_BUFFER_OUT];
async_operation([=]
async_operation(line_count * line_length > 64, [=]
{
u8 *dst = (u8*)vm::base(get_address(dst_offset, dst_dma));
const u8 *src = (u8*)vm::base(get_address(src_offset, src_dma));