rsx: Implement report transfer deferring

- Allow delaying report flushes triggered by image_in or buffer_notify
- When the report is ready, all the delayed transfers will automatically
be done.
- TODO: Make this configurable?
This commit is contained in:
kd-11 2019-10-24 23:02:42 +03:00 committed by kd-11
parent 3e0f9dff4d
commit 2a8f2c64d2
4 changed files with 134 additions and 29 deletions

View File

@ -307,7 +307,7 @@ namespace rsx
u8* pixels_src = (u8*)vm::base(src_address);
const u32 src_size = in_pitch * (in_h - 1) + (in_w * in_bpp);
rsx->read_barrier(src_address, src_size);
rsx->read_barrier(src_address, src_size, true);
frame_capture_data::memory_block_data block_data;
block_data.data.resize(src_size);
@ -328,7 +328,7 @@ namespace rsx
u32 src_dma = method_registers.nv0039_input_location();
u32 src_addr = get_address(src_offset, src_dma);
rsx->read_barrier(src_addr, in_pitch * (line_count - 1) + line_length);
rsx->read_barrier(src_addr, in_pitch * (line_count - 1) + line_length, true);
const u8* src = vm::_ptr<u8>(src_addr);

View File

@ -296,7 +296,7 @@ namespace rsx
if (conditional_render_enabled && conditional_render_test_address)
{
// Evaluate conditional rendering test
zcull_ctrl->read_barrier(this, conditional_render_test_address, 4);
zcull_ctrl->read_barrier(this, conditional_render_test_address, 4, reports::sync_no_notify);
vm::ptr<CellGcmReportData> result = vm::cast(conditional_render_test_address);
conditional_render_test_failed = (result->value == 0);
conditional_render_test_address = 0;
@ -2348,6 +2348,11 @@ namespace rsx
vm::_ref<atomic_t<CellGcmReportData>>(sink).store({ timestamp(), value, 0});
}
u32 thread::copy_zcull_stats(u32 memory_range_start, u32 memory_range, u32 destination)
{
return zcull_ctrl->copy_reports_to(memory_range_start, memory_range, destination);
}
void thread::sync()
{
zcull_ctrl->sync(this);
@ -2384,9 +2389,10 @@ namespace rsx
return fifo_ctrl->last_cmd();
}
void thread::read_barrier(u32 memory_address, u32 memory_range)
flags32_t thread::read_barrier(u32 memory_address, u32 memory_range, bool unconditional)
{
zcull_ctrl->read_barrier(this, memory_address, memory_range);
flags32_t zcull_flags = (unconditional)? reports::sync_none : reports::sync_defer_copy;
return zcull_ctrl->read_barrier(this, memory_address, memory_range, zcull_flags);
}
void thread::notify_zcull_info_changed()
@ -2938,6 +2944,16 @@ namespace rsx
vm::_ref<atomic_t<CellGcmReportData>>(sink).store({ timestamp, value, 0});
}
void ZCULL_control::write(queued_report_write* writer, u64 timestamp, u32 value)
{
write(writer->sink, timestamp, writer->type, value);
for (auto &addr : writer->sink_alias)
{
write(addr, timestamp, writer->type, value);
}
}
void ZCULL_control::sync(::rsx::thread* ptimer)
{
if (!m_pending_writes.empty())
@ -2979,8 +2995,10 @@ namespace rsx
}
if (!writer.forwarder)
//No other queries in the chain, write result
write(writer.sink, ptimer->timestamp(), writer.type, result);
{
// No other queries in the chain, write result
write(&writer, ptimer->timestamp(), result);
}
processed++;
}
@ -2997,7 +3015,7 @@ namespace rsx
if (remaining == 1)
{
m_pending_writes.front() = m_pending_writes.back();
m_pending_writes[0] = std::move(m_pending_writes.back());
m_pending_writes.resize(1);
}
else
@ -3156,10 +3174,12 @@ namespace rsx
stat_tag_to_remove = writer.counter_tag;
//only zpass supported right now
// only zpass supported right now
if (!writer.forwarder)
//No other queries in the chain, write result
write(writer.sink, ptimer->timestamp(), writer.type, result);
{
// No other queries in the chain, write result
write(&writer, ptimer->timestamp(), result);
}
processed++;
}
@ -3172,7 +3192,7 @@ namespace rsx
auto remaining = m_pending_writes.size() - processed;
if (remaining == 1)
{
m_pending_writes.front() = m_pending_writes.back();
m_pending_writes[0] = std::move(m_pending_writes.back());
m_pending_writes.resize(1);
}
else if (remaining)
@ -3189,10 +3209,10 @@ namespace rsx
}
}
void ZCULL_control::read_barrier(::rsx::thread* ptimer, u32 memory_address, u32 memory_range)
flags32_t ZCULL_control::read_barrier(::rsx::thread* ptimer, u32 memory_address, u32 memory_range, flags32_t flags)
{
if (m_pending_writes.empty())
return;
return result_none;
const auto memory_end = memory_address + memory_range;
u32 sync_address = 0;
@ -3208,11 +3228,21 @@ namespace rsx
}
}
if (sync_address)
if (!sync_address)
return result_none;
if (!(flags & sync_defer_copy))
{
ptimer->sync_hint(FIFO_hint::hint_zcull_sync, reinterpret_cast<uintptr_t>(query));
if (!(flags & sync_no_notify))
{
ptimer->sync_hint(FIFO_hint::hint_zcull_sync, reinterpret_cast<uintptr_t>(query));
}
update(ptimer, sync_address);
return result_none;
}
return result_zcull_intr;
}
occlusion_query_info* ZCULL_control::find_query(vm::addr_t sink_address)
@ -3225,5 +3255,24 @@ namespace rsx
return nullptr;
}
u32 ZCULL_control::copy_reports_to(u32 start, u32 range, u32 dest)
{
u32 bytes_to_write = 0;
const auto memory_range = utils::address_range::start_length(start, range);
for (auto &writer : m_pending_writes)
{
if (!writer.sink)
break;
if (!writer.forwarder && memory_range.overlaps(writer.sink))
{
u32 address = (writer.sink - start) + dest;
writer.sink_alias.push_back(vm::cast(address));
}
}
return bytes_to_write;
}
}
}

View File

@ -101,6 +101,13 @@ namespace rsx
hint_zcull_sync = 2
};
enum result_flags: u8
{
result_none = 0,
result_error = 1,
result_zcull_intr = 2
};
u32 get_vertex_type_size_on_host(vertex_base_type type, u32 size);
u32 get_address(u32 offset, u32 location);
@ -345,11 +352,20 @@ namespace rsx
u32 counter_tag;
occlusion_query_info* query;
queued_report_write* forwarder;
vm::addr_t sink;
vm::addr_t sink; // Memory location of the report
std::vector<vm::addr_t> sink_alias; // Aliased memory addresses
u64 due_tsc;
};
enum sync_control
{
sync_none = 0,
sync_defer_copy = 1, // If set, return a zcull intr code instead of forcefully reading zcull data
sync_no_notify = 2 // If set, backend hint notifications will not be made
};
struct ZCULL_control
{
// Delay before a report update operation is forced to retire
@ -380,6 +396,7 @@ namespace rsx
void set_active(class ::rsx::thread* ptimer, bool state);
void write(vm::addr_t sink, u64 timestamp, u32 type, u32 value);
void write(queued_report_write* writer, u64 timestamp, u32 value);
// Read current zcull statistics into the address provided
void read_report(class ::rsx::thread* ptimer, vm::addr_t sink, u32 type);
@ -394,7 +411,7 @@ namespace rsx
void sync(class ::rsx::thread* ptimer);
// Conditionally sync any pending writes if range overlaps
void read_barrier(class ::rsx::thread* ptimer, u32 memory_address, u32 memory_range);
flags32_t read_barrier(class ::rsx::thread* ptimer, u32 memory_address, u32 memory_range, flags32_t flags);
// Call once every 'tick' to update, optional address provided to partially sync until address is processed
void update(class ::rsx::thread* ptimer, u32 sync_address = 0);
@ -408,6 +425,9 @@ namespace rsx
// Search for query synchronized at address
occlusion_query_info* find_query(vm::addr_t sink_address);
// Copies queries in range rebased from source range to destination range
u32 copy_reports_to(u32 start, u32 range, u32 dest);
// Backend methods (optional, will return everything as always visible by default)
virtual void begin_occlusion_query(occlusion_query_info* /*query*/) {}
virtual void end_occlusion_query(occlusion_query_info* /*query*/) {}
@ -687,10 +707,11 @@ namespace rsx
void clear_zcull_stats(u32 type);
void check_zcull_status(bool framebuffer_swap);
void get_zcull_stats(u32 type, vm::addr_t sink);
u32 copy_zcull_stats(u32 memory_range_start, u32 memory_range, u32 destination);
// sync
void sync();
void read_barrier(u32 memory_address, u32 memory_range);
flags32_t read_barrier(u32 memory_address, u32 memory_range, bool unconditional);
virtual void sync_hint(FIFO_hint /*hint*/, u64 /*arg*/) {}
gsl::span<const gsl::byte> get_raw_index_array(const draw_clause& draw_indexed_clause) const;

View File

@ -920,6 +920,7 @@ namespace rsx
rsx::blit_engine::transfer_destination_format dst_color_format;
u32 out_pitch = 0;
u32 out_alignment = 64;
bool is_block_transfer = false;
switch (method_registers.blit_engine_context_surface())
{
@ -930,6 +931,7 @@ namespace rsx
dst_color_format = method_registers.blit_engine_nv3062_color_format();
out_pitch = method_registers.blit_engine_output_pitch_nv3062();
out_alignment = method_registers.blit_engine_output_alignment_nv3062();
is_block_transfer = fcmp(scale_x, 1.f) && fcmp(scale_y, 1.f);
break;
}
case blit_engine::context_surface::swizzle2d:
@ -954,8 +956,7 @@ namespace rsx
if (UNLIKELY(in_x == 1 || in_y == 1))
{
const bool is_graphics_op = scale_x < 0.f || scale_y < 0.f || in_bpp != out_bpp || !rsx::fcmp(scale_x, 1.f) || !rsx::fcmp(scale_y, 1.f);
if (!is_graphics_op)
if (is_block_transfer && in_bpp == out_bpp)
{
// No scaling factor, so size in src == size in dst
// Check for texel wrapping where (offset + size) > size by 1 pixel
@ -968,6 +969,8 @@ namespace rsx
// Graphics operation, ignore subpixel correction offsets
if (in_x == 1) in_x = 0;
if (in_y == 1) in_y = 0;
is_block_transfer = false;
}
}
@ -977,12 +980,31 @@ namespace rsx
const u32 src_address = get_address(src_offset, src_dma);
const u32 dst_address = get_address(dst_offset, dst_dma);
const u32 src_line_length = (in_w * in_bpp);
if (is_block_transfer && (clip_h == 1 || (in_pitch == out_pitch && src_line_length == in_pitch)))
{
const u32 nb_lines = std::min(clip_h, in_h);
const u32 data_length = nb_lines * src_line_length;
if (const auto result = rsx->read_barrier(src_address, data_length, false);
result == rsx::result_zcull_intr)
{
if (rsx->copy_zcull_stats(src_address, data_length, dst_address) == data_length)
{
// All writes deferred
return;
}
}
}
else
{
const u32 data_length = in_pitch * (in_h - 1) + src_line_length;
rsx->read_barrier(src_address, dst_address, true);
}
u8* pixels_src = vm::_ptr<u8>(src_address + in_offset);
u8* pixels_dst = vm::_ptr<u8>(dst_address + out_offset);
const auto read_address = get_address(src_offset, src_dma);
rsx->read_barrier(read_address, in_pitch * (in_h - 1) + (in_w * in_bpp));
if (dst_color_format != rsx::blit_engine::transfer_destination_format::r5g6b5 &&
dst_color_format != rsx::blit_engine::transfer_destination_format::a8r8g8b8)
{
@ -1310,15 +1332,28 @@ namespace rsx
u32 dst_offset = method_registers.nv0039_output_offset();
u32 dst_dma = method_registers.nv0039_output_location();
const bool is_block_transfer = (in_pitch == out_pitch && out_pitch == line_length);
const auto read_address = get_address(src_offset, src_dma);
rsx->read_barrier(read_address, in_pitch * (line_count - 1) + line_length);
const auto write_address = get_address(dst_offset, dst_dma);
const auto data_length = in_pitch * (line_count - 1) + line_length;
u8 *dst = vm::_ptr<u8>(get_address(dst_offset, dst_dma));
if (const auto result = rsx->read_barrier(read_address, data_length, !is_block_transfer);
result == rsx::result_zcull_intr)
{
// This transfer overlaps will zcull data pool
if (rsx->copy_zcull_stats(read_address, data_length, write_address) == data_length)
{
// All writes deferred
return;
}
}
u8 *dst = vm::_ptr<u8>(write_address);
const u8 *src = vm::_ptr<u8>(read_address);
const bool is_overlapping = dst_dma == src_dma && [&]() -> bool
{
const u32 src_max = src_offset + (in_pitch * (line_count - 1) + line_length);
const u32 src_max = src_offset + data_length;
const u32 dst_max = dst_offset + (out_pitch * (line_count - 1) + line_length);
return (src_offset >= dst_offset && src_offset < dst_max) ||
(dst_offset >= src_offset && dst_offset < src_max);
@ -1326,7 +1361,7 @@ namespace rsx
if (is_overlapping)
{
if (in_pitch == out_pitch && out_pitch == line_length)
if (is_block_transfer)
{
std::memmove(dst, src, line_length * line_count);
}
@ -1354,7 +1389,7 @@ namespace rsx
}
else
{
if (in_pitch == out_pitch && out_pitch == line_length)
if (is_block_transfer)
{
std::memcpy(dst, src, line_length * line_count);
}