rsx: Fix surface cache hit tests

- Avoid silly broken tests due to queue_tag being called before pitch is initialized.
- Return actual memory range covered and exclude trailing padding.
- Coordinates in src are to be calculated with src_pitch, not required_pitch.
This commit is contained in:
kd-11 2019-08-27 22:05:10 +03:00 committed by kd-11
parent cca6a19cdd
commit e334a43169
4 changed files with 25 additions and 25 deletions

View File

@ -753,36 +753,35 @@ namespace rsx
{
std::vector<surface_overlap_info> result;
std::vector<std::pair<u32, bool>> dirty;
const u32 limit = texaddr + (required_pitch * required_height);
const auto surface_internal_pitch = (required_width * required_bpp);
verify(HERE), surface_internal_pitch <= required_pitch;
const auto test_range = utils::address_range::start_length(texaddr, (required_pitch * required_height) - (required_pitch - surface_internal_pitch));
auto process_list_function = [&](std::unordered_map<u32, surface_storage_type>& data, bool is_depth)
{
for (auto &tex_info : data)
{
const auto this_address = tex_info.first;
if (this_address >= limit)
const auto range = tex_info.second->get_memory_range();
if (!range.overlaps(test_range))
continue;
auto surface = tex_info.second.get();
const auto pitch = surface->get_rsx_pitch();
if (!rsx::pitch_compatible(surface, required_pitch, required_height))
continue;
const auto texture_size = pitch * surface->get_surface_height(rsx::surface_metrics::samples);
if ((this_address + texture_size) <= texaddr)
continue;
surface_overlap_info info;
info.surface = surface;
info.base_address = this_address;
info.base_address = range.start;
info.is_depth = is_depth;
const auto normalized_surface_width = surface->get_surface_width(rsx::surface_metrics::bytes) / required_bpp;
const auto normalized_surface_height = surface->get_surface_height(rsx::surface_metrics::samples);
if (LIKELY(this_address >= texaddr))
if (LIKELY(range.start >= texaddr))
{
const auto offset = this_address - texaddr;
const auto offset = range.start - texaddr;
info.dst_y = (offset / required_pitch);
info.dst_x = (offset % required_pitch) / required_bpp;
@ -799,9 +798,10 @@ namespace rsx
}
else
{
const auto offset = texaddr - this_address;
info.src_y = (offset / required_pitch);
info.src_x = (offset % required_pitch) / required_bpp;
const auto pitch = surface->get_rsx_pitch();
const auto offset = texaddr - range.start;
info.src_y = (offset / pitch);
info.src_x = (offset % pitch) / required_bpp;
if (UNLIKELY(info.src_x >= normalized_surface_width || info.src_y >= normalized_surface_height))
{
@ -819,7 +819,7 @@ namespace rsx
// Delay this as much as possible to avoid side-effects of spamming barrier
if (surface->memory_barrier(cmd, access); !surface->test())
{
dirty.emplace_back(this_address, is_depth);
dirty.emplace_back(range.start, is_depth);
continue;
}
@ -838,14 +838,12 @@ namespace rsx
// Range test helper to quickly discard blocks
// Fortunately, render targets tend to be clustered anyway
rsx::address_range test = rsx::address_range::start_end(texaddr, limit-1);
if (test.overlaps(m_render_targets_memory_range))
if (test_range.overlaps(m_render_targets_memory_range))
{
process_list_function(m_render_targets_storage, false);
}
if (test.overlaps(m_depth_stencil_memory_range))
if (test_range.overlaps(m_depth_stencil_memory_range))
{
process_list_function(m_depth_stencil_storage, true);
}

View File

@ -357,6 +357,8 @@ namespace rsx
#else
void queue_tag(u32 address)
{
verify(HERE), native_pitch, rsx_pitch;
base_addr = address;
const u32 size_x = (native_pitch > 8)? (native_pitch - 8) : 0u;
@ -569,7 +571,8 @@ namespace rsx
rsx::address_range get_memory_range() const
{
const u32 internal_height = get_surface_height(rsx::surface_metrics::samples);
return rsx::address_range::start_length(base_addr, internal_height * get_rsx_pitch());
const u32 excess = (rsx_pitch - native_pitch);
return rsx::address_range::start_length(base_addr, internal_height * rsx_pitch - excess);
}
template <typename T>

View File

@ -212,14 +212,11 @@ struct gl_render_target_traits
sink->set_spp(ref->get_spp());
sink->set_native_pitch(prev.width * ref->get_bpp() * ref->samples_x);
sink->set_rsx_pitch(ref->get_rsx_pitch());
sink->set_surface_dimensions(prev.width, prev.height, ref->get_rsx_pitch());
sink->set_native_component_layout(ref->get_native_component_layout());
sink->queue_tag(address);
}
else
{
sink->set_rsx_pitch(ref->get_rsx_pitch());
}
prev.target = sink.get();
@ -236,6 +233,7 @@ struct gl_render_target_traits
}
}
sink->set_rsx_pitch(ref->get_rsx_pitch());
sink->set_old_contents_region(prev, false);
sink->last_use_tag = ref->last_use_tag;
}

View File

@ -711,6 +711,7 @@ namespace rsx
sink->sample_layout = ref->sample_layout;
sink->stencil_init_flags = ref->stencil_init_flags;
sink->native_pitch = u16(prev.width * ref->get_bpp() * ref->samples_x);
sink->rsx_pitch = ref->get_rsx_pitch();
sink->surface_width = prev.width;
sink->surface_height = prev.height;
sink->queue_tag(address);
@ -724,7 +725,6 @@ namespace rsx
prev.target = sink.get();
sink->rsx_pitch = ref->get_rsx_pitch();
if (!sink->old_contents.empty())
{
// Deal with this, likely only needs to clear
@ -738,6 +738,7 @@ namespace rsx
}
}
sink->rsx_pitch = ref->get_rsx_pitch();
sink->set_old_contents_region(prev, false);
sink->last_use_tag = ref->last_use_tag;
}