rsx: Minor optimization; avoid preparing unused vertex streams

- Also discards unused program state variables
This commit is contained in:
kd-11 2021-09-15 20:46:03 +03:00 committed by kd-11
parent e7b9513d4a
commit 3e09b97f58
11 changed files with 75 additions and 81 deletions

View File

@ -659,9 +659,6 @@ bool GLGSRender::load_program()
ensure(current_fragment_program.valid);
get_current_vertex_program(vs_sampler_state);
current_vertex_program.skip_vertex_input_check = true; //not needed for us since decoding is done server side
current_fragment_program.texture_state.unnormalized_coords = 0; //unused
}
else if (m_program)
{

View File

@ -136,7 +136,6 @@ struct GLProgramBuffer : public program_state_cache<GLTraits>
template <typename... Args>
void add_pipeline_entry(RSXVertexProgram &vp, RSXFragmentProgram &fp, void* &props, Args&& ...args)
{
vp.skip_vertex_input_check = true;
get_graphics_pipeline(vp, fp, props, false, false, std::forward<Args>(args)...);
}

View File

@ -85,6 +85,9 @@ vertex_program_utils::vertex_program_metadata vertex_program_utils::analyse_vert
instruction_range.first = std::min(current_instruction, instruction_range.first);
instruction_range.second = std::max(current_instruction, instruction_range.second);
// Whether to check if the current instruction references an input stream
bool test_input_read = false;
// Basic vec op analysis, must be done before flow analysis
switch (d1.vec_opcode)
{
@ -94,6 +97,11 @@ vertex_program_utils::vertex_program_metadata vertex_program_utils::analyse_vert
result.referenced_textures_mask |= (1 << d2.tex_num);
break;
}
default:
{
test_input_read = !!d1.input_src;
break;
}
}
bool static_jump = false;
@ -160,6 +168,26 @@ vertex_program_utils::vertex_program_metadata vertex_program_utils::analyse_vert
break;
}
default:
{
test_input_read = !!d1.input_src;
break;
}
}
if (test_input_read)
{
// Type is encoded in the first 2 bits of each block
d2.HEX = instruction._u32[2];
const auto src0 = d2.src0l;
const auto src1 = d2.src1;
const auto src2 = d3.src2l;
if ((src0 | src1 | src2) & RSX_VP_REGISTER_TYPE_INPUT)
{
result.referenced_inputs_mask |= (1 << d1.input_src);
}
}
if ((d3.end && (fast_exit || current_instruction >= instruction_range.second)) ||
@ -249,6 +277,7 @@ vertex_program_utils::vertex_program_metadata vertex_program_utils::analyse_vert
}
}
result.referenced_inputs_mask |= 1u; // VPOS is always enabled, else no rendering can happen
return result;
}
@ -270,8 +299,6 @@ bool vertex_program_compare::operator()(const RSXVertexProgram &binary1, const R
return false;
if (binary1.jump_table != binary2.jump_table)
return false;
if (!binary1.skip_vertex_input_check && !binary2.skip_vertex_input_check && binary1.rsx_vertex_inputs != binary2.rsx_vertex_inputs)
return false;
const void* instBuffer1 = binary1.data.data();
const void* instBuffer2 = binary2.data.data();
@ -457,7 +484,6 @@ usz fragment_program_storage_hash::operator()(const RSXFragmentProgram& program)
hash ^= program.ctrl;
hash ^= +program.two_sided_lighting;
hash ^= program.texture_state.texture_dimensions;
hash ^= program.texture_state.unnormalized_coords;
hash ^= program.texture_state.shadow_textures;
hash ^= program.texture_state.redirected_textures;
hash ^= program.texcoord_control_mask;

View File

@ -25,6 +25,8 @@ namespace program_hash_util
std::bitset<rsx::max_vertex_program_instructions> instruction_mask;
u32 ucode_length;
u32 referenced_textures_mask;
u16 referenced_inputs_mask;
u16 reserved;
};
static usz get_vertex_program_ucode_hash(const RSXVertexProgram &program);

View File

@ -219,31 +219,11 @@ static const std::string rsx_vp_vec_op_names[] =
"SEQ", "SFL", "SGT", "SLE", "SNE", "STR", "SSG", "NULL", "NULL", "TXL"
};
struct rsx_vertex_input
{
u8 location; // between 0 and 15
u8 size; // between 1 and 4
u16 frequency;
bool is_modulo; // either modulo frequency or divide frequency
bool is_array; // false if "reg value"
bool int_type;
u32 flags; //Initially zero, to be optionally filled by the backend
bool operator==(const rsx_vertex_input& other) const
{
return location == other.location && size == other.size && frequency == other.frequency && is_modulo == other.is_modulo &&
is_array == other.is_array && int_type == other.int_type && flags == other.flags;
}
};
struct RSXVertexProgram
{
std::vector<u32> data;
std::vector<rsx_vertex_input> rsx_vertex_inputs;
u32 output_mask;
bool skip_vertex_input_check;
rsx::vertex_program_texture_state texture_state;
u32 output_mask;
u32 base_address;
u32 entry;
std::bitset<rsx::max_vertex_program_instructions> instruction_mask;

View File

@ -52,20 +52,18 @@ namespace rsx
masked_transfer(slots_, src, mask);
}
void fragment_program_texture_state::clear(u32 index)
{
const u16 clear_mask = ~(static_cast<u16>(1 << index));
unnormalized_coords &= clear_mask;
redirected_textures &= clear_mask;
shadow_textures &= clear_mask;
}
void fragment_program_texture_state::import(const fragment_program_texture_state& other, u16 mask)
{
unnormalized_coords = other.unnormalized_coords & mask;
redirected_textures = other.redirected_textures & mask;
shadow_textures = other.shadow_textures & mask;
texture_dimensions = other.texture_dimensions & duplicate_and_extend(mask);
void fragment_program_texture_state::clear(u32 index)
{
const u16 clear_mask = ~(static_cast<u16>(1 << index));
redirected_textures &= clear_mask;
shadow_textures &= clear_mask;
}
void fragment_program_texture_state::import(const fragment_program_texture_state& other, u16 mask)
{
redirected_textures = other.redirected_textures & mask;
shadow_textures = other.shadow_textures & mask;
texture_dimensions = other.texture_dimensions & duplicate_and_extend(mask);
}
void fragment_program_texture_state::set_dimension(texture_dimension_extended type, u32 index)
@ -77,14 +75,13 @@ namespace rsx
}
bool fragment_program_texture_state::operator == (const fragment_program_texture_state& other) const
{
return texture_dimensions == other.texture_dimensions &&
redirected_textures == other.redirected_textures &&
shadow_textures == other.shadow_textures &&
unnormalized_coords == other.unnormalized_coords;
}
void vertex_program_texture_state::clear(u32 /*index*/)
{
return texture_dimensions == other.texture_dimensions &&
redirected_textures == other.redirected_textures &&
shadow_textures == other.shadow_textures;
}
void vertex_program_texture_state::clear(u32 /*index*/)
{
// Nothing to do yet
}

View File

@ -33,16 +33,14 @@ namespace rsx
};
#pragma pack(pop)
struct fragment_program_texture_state
{
u32 texture_dimensions = 0;
u16 unnormalized_coords = 0;
u16 redirected_textures = 0;
u16 shadow_textures = 0;
u16 reserved = 0;
void clear(u32 index);
void import(const fragment_program_texture_state& other, u16 mask);
struct fragment_program_texture_state
{
u32 texture_dimensions = 0;
u16 redirected_textures = 0;
u16 shadow_textures = 0;
void clear(u32 index);
void import(const fragment_program_texture_state& other, u16 mask);
void set_dimension(texture_dimension_extended type, u32 index);
bool operator == (const fragment_program_texture_state& other) const;
};

View File

@ -1637,9 +1637,6 @@ namespace rsx
m_graphics_state &= ~rsx::pipeline_state::vertex_program_ucode_dirty;
const u32 transform_program_start = rsx::method_registers.transform_program_start();
current_vertex_program.skip_vertex_input_check = true;
current_vertex_program.rsx_vertex_inputs.clear();
current_vertex_program.data.reserve(512 * 4);
current_vertex_program.jump_table.clear();
@ -1706,7 +1703,7 @@ namespace rsx
void thread::analyse_inputs_interleaved(vertex_input_layout& result) const
{
const rsx_state& state = rsx::method_registers;
const u32 input_mask = state.vertex_attrib_input_mask();
const u32 input_mask = state.vertex_attrib_input_mask() & current_vp_metadata.referenced_inputs_mask;
result.clear();
@ -1752,11 +1749,15 @@ namespace rsx
result.interleaved_blocks.reserve(16);
result.referenced_registers.reserve(16);
for (u8 index = 0; index < rsx::limits::vertex_count; ++index)
for (auto [ref_mask, index] = std::tuple{ input_mask, u8(0) }; ref_mask; ++index, ref_mask >>= 1)
{
// Check if vertex stream is enabled
if (!(input_mask & (1 << index)))
ensure(index < rsx::limits::vertex_count);
if (!(ref_mask & 1u))
{
// Nothing to do, uninitialized
continue;
}
//Check for interleaving
const auto &info = state.vertex_arrays_info[index];
@ -1921,8 +1922,6 @@ namespace rsx
if (raw_format & CELL_GCM_TEXTURE_UN)
{
current_fp_texture_state.unnormalized_coords |= (1 << i);
if (tex.min_filter() == rsx::texture_minify_filter::nearest ||
tex.mag_filter() == rsx::texture_magnify_filter::nearest)
{
@ -2204,8 +2203,14 @@ namespace rsx
const u32 modulo_mask = rsx::method_registers.frequency_divider_operation_mask();
const auto max_index = (first_vertex + vertex_count) - 1;
for (u8 index = 0; index < rsx::limits::vertex_count; ++index)
for (u16 ref_mask = current_vp_metadata.referenced_inputs_mask, index = 0; ref_mask; ++index, ref_mask >>= 1)
{
if (!(ref_mask & 1u))
{
// Unused input, ignore this
continue;
}
if (layout.attribute_placement[index] == attribute_buffer_placement::none)
{
static constexpr u64 zero = 0;

View File

@ -1771,9 +1771,7 @@ bool VKGSRender::load_program()
{
vk::enter_uninterruptible();
// Load current program from buffer
vertex_program.skip_vertex_input_check = true;
fragment_program.texture_state.unnormalized_coords = 0;
// Load current program from cache
m_program = m_prog_buffer->get_graphics_pipeline(vertex_program, fragment_program, properties,
shadermode != shader_mode::recompiler, true, pipeline_layout);

View File

@ -91,13 +91,11 @@ namespace vk
template <typename... Args>
void add_pipeline_entry(RSXVertexProgram& vp, RSXFragmentProgram& fp, vk::pipeline_props& props, Args&& ...args)
{
vp.skip_vertex_input_check = true;
get_graphics_pipeline(vp, fp, props, false, false, std::forward<Args>(args)...);
}
void preload_programs(RSXVertexProgram& vp, RSXFragmentProgram& fp)
{
vp.skip_vertex_input_check = true;
search_vertex_program(vp);
search_fragment_program(fp);
}

View File

@ -39,7 +39,6 @@ namespace rsx
u32 fp_ctrl;
u32 fp_texture_dimensions;
u32 fp_texcoord_control;
u16 fp_unnormalized_coords;
u16 fp_height;
u16 fp_pixel_layout;
u16 fp_lighting_flags;
@ -307,7 +306,6 @@ namespace rsx
state_hash ^= rpcs3::hash_base<u32>(data.vp_texture_dimensions);
state_hash ^= rpcs3::hash_base<u32>(data.fp_texture_dimensions);
state_hash ^= rpcs3::hash_base<u32>(data.fp_texcoord_control);
state_hash ^= rpcs3::hash_base<u16>(data.fp_unnormalized_coords);
state_hash ^= rpcs3::hash_base<u16>(data.fp_height);
state_hash ^= rpcs3::hash_base<u16>(data.fp_pixel_layout);
state_hash ^= rpcs3::hash_base<u16>(data.fp_lighting_flags);
@ -326,8 +324,6 @@ namespace rsx
fs::file f(fmt::format("%s/raw/%llX.vp", root_path, program_hash));
if (f) f.read(vp.data, f.size() / sizeof(u32));
vp.skip_vertex_input_check = true;
return vp;
}
@ -381,7 +377,6 @@ namespace rsx
fp.ctrl = data.fp_ctrl;
fp.texture_state.texture_dimensions = data.fp_texture_dimensions;
fp.texture_state.unnormalized_coords = data.fp_unnormalized_coords;
fp.texture_state.shadow_textures = data.fp_shadow_textures;
fp.texture_state.redirected_textures = data.fp_redirected_textures;
fp.texcoord_control_mask = data.fp_texcoord_control;
@ -426,7 +421,6 @@ namespace rsx
data_block.fp_ctrl = fp.ctrl;
data_block.fp_texture_dimensions = fp.texture_state.texture_dimensions;
data_block.fp_texcoord_control = fp.texcoord_control_mask;
data_block.fp_unnormalized_coords = fp.texture_state.unnormalized_coords;
data_block.fp_lighting_flags = u16(fp.two_sided_lighting);
data_block.fp_shadow_textures = fp.texture_state.shadow_textures;
data_block.fp_redirected_textures = fp.texture_state.redirected_textures;