mirror of
https://github.com/RPCS3/rpcs3.git
synced 2024-09-29 15:31:14 +00:00
rsx: Minor optimization; avoid preparing unused vertex streams
- Also discards unused program state variables
This commit is contained in:
parent
e7b9513d4a
commit
3e09b97f58
@ -659,9 +659,6 @@ bool GLGSRender::load_program()
|
||||
ensure(current_fragment_program.valid);
|
||||
|
||||
get_current_vertex_program(vs_sampler_state);
|
||||
|
||||
current_vertex_program.skip_vertex_input_check = true; //not needed for us since decoding is done server side
|
||||
current_fragment_program.texture_state.unnormalized_coords = 0; //unused
|
||||
}
|
||||
else if (m_program)
|
||||
{
|
||||
|
@ -136,7 +136,6 @@ struct GLProgramBuffer : public program_state_cache<GLTraits>
|
||||
template <typename... Args>
|
||||
void add_pipeline_entry(RSXVertexProgram &vp, RSXFragmentProgram &fp, void* &props, Args&& ...args)
|
||||
{
|
||||
vp.skip_vertex_input_check = true;
|
||||
get_graphics_pipeline(vp, fp, props, false, false, std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
|
@ -85,6 +85,9 @@ vertex_program_utils::vertex_program_metadata vertex_program_utils::analyse_vert
|
||||
instruction_range.first = std::min(current_instruction, instruction_range.first);
|
||||
instruction_range.second = std::max(current_instruction, instruction_range.second);
|
||||
|
||||
// Whether to check if the current instruction references an input stream
|
||||
bool test_input_read = false;
|
||||
|
||||
// Basic vec op analysis, must be done before flow analysis
|
||||
switch (d1.vec_opcode)
|
||||
{
|
||||
@ -94,6 +97,11 @@ vertex_program_utils::vertex_program_metadata vertex_program_utils::analyse_vert
|
||||
result.referenced_textures_mask |= (1 << d2.tex_num);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
test_input_read = !!d1.input_src;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bool static_jump = false;
|
||||
@ -160,6 +168,26 @@ vertex_program_utils::vertex_program_metadata vertex_program_utils::analyse_vert
|
||||
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
test_input_read = !!d1.input_src;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (test_input_read)
|
||||
{
|
||||
// Type is encoded in the first 2 bits of each block
|
||||
d2.HEX = instruction._u32[2];
|
||||
|
||||
const auto src0 = d2.src0l;
|
||||
const auto src1 = d2.src1;
|
||||
const auto src2 = d3.src2l;
|
||||
|
||||
if ((src0 | src1 | src2) & RSX_VP_REGISTER_TYPE_INPUT)
|
||||
{
|
||||
result.referenced_inputs_mask |= (1 << d1.input_src);
|
||||
}
|
||||
}
|
||||
|
||||
if ((d3.end && (fast_exit || current_instruction >= instruction_range.second)) ||
|
||||
@ -249,6 +277,7 @@ vertex_program_utils::vertex_program_metadata vertex_program_utils::analyse_vert
|
||||
}
|
||||
}
|
||||
|
||||
result.referenced_inputs_mask |= 1u; // VPOS is always enabled, else no rendering can happen
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -270,8 +299,6 @@ bool vertex_program_compare::operator()(const RSXVertexProgram &binary1, const R
|
||||
return false;
|
||||
if (binary1.jump_table != binary2.jump_table)
|
||||
return false;
|
||||
if (!binary1.skip_vertex_input_check && !binary2.skip_vertex_input_check && binary1.rsx_vertex_inputs != binary2.rsx_vertex_inputs)
|
||||
return false;
|
||||
|
||||
const void* instBuffer1 = binary1.data.data();
|
||||
const void* instBuffer2 = binary2.data.data();
|
||||
@ -457,7 +484,6 @@ usz fragment_program_storage_hash::operator()(const RSXFragmentProgram& program)
|
||||
hash ^= program.ctrl;
|
||||
hash ^= +program.two_sided_lighting;
|
||||
hash ^= program.texture_state.texture_dimensions;
|
||||
hash ^= program.texture_state.unnormalized_coords;
|
||||
hash ^= program.texture_state.shadow_textures;
|
||||
hash ^= program.texture_state.redirected_textures;
|
||||
hash ^= program.texcoord_control_mask;
|
||||
|
@ -25,6 +25,8 @@ namespace program_hash_util
|
||||
std::bitset<rsx::max_vertex_program_instructions> instruction_mask;
|
||||
u32 ucode_length;
|
||||
u32 referenced_textures_mask;
|
||||
u16 referenced_inputs_mask;
|
||||
u16 reserved;
|
||||
};
|
||||
|
||||
static usz get_vertex_program_ucode_hash(const RSXVertexProgram &program);
|
||||
|
@ -219,31 +219,11 @@ static const std::string rsx_vp_vec_op_names[] =
|
||||
"SEQ", "SFL", "SGT", "SLE", "SNE", "STR", "SSG", "NULL", "NULL", "TXL"
|
||||
};
|
||||
|
||||
struct rsx_vertex_input
|
||||
{
|
||||
u8 location; // between 0 and 15
|
||||
u8 size; // between 1 and 4
|
||||
u16 frequency;
|
||||
bool is_modulo; // either modulo frequency or divide frequency
|
||||
bool is_array; // false if "reg value"
|
||||
bool int_type;
|
||||
u32 flags; //Initially zero, to be optionally filled by the backend
|
||||
|
||||
bool operator==(const rsx_vertex_input& other) const
|
||||
{
|
||||
return location == other.location && size == other.size && frequency == other.frequency && is_modulo == other.is_modulo &&
|
||||
is_array == other.is_array && int_type == other.int_type && flags == other.flags;
|
||||
}
|
||||
};
|
||||
|
||||
struct RSXVertexProgram
|
||||
{
|
||||
std::vector<u32> data;
|
||||
std::vector<rsx_vertex_input> rsx_vertex_inputs;
|
||||
u32 output_mask;
|
||||
bool skip_vertex_input_check;
|
||||
rsx::vertex_program_texture_state texture_state;
|
||||
|
||||
u32 output_mask;
|
||||
u32 base_address;
|
||||
u32 entry;
|
||||
std::bitset<rsx::max_vertex_program_instructions> instruction_mask;
|
||||
|
@ -52,20 +52,18 @@ namespace rsx
|
||||
masked_transfer(slots_, src, mask);
|
||||
}
|
||||
|
||||
void fragment_program_texture_state::clear(u32 index)
|
||||
{
|
||||
const u16 clear_mask = ~(static_cast<u16>(1 << index));
|
||||
unnormalized_coords &= clear_mask;
|
||||
redirected_textures &= clear_mask;
|
||||
shadow_textures &= clear_mask;
|
||||
}
|
||||
|
||||
void fragment_program_texture_state::import(const fragment_program_texture_state& other, u16 mask)
|
||||
{
|
||||
unnormalized_coords = other.unnormalized_coords & mask;
|
||||
redirected_textures = other.redirected_textures & mask;
|
||||
shadow_textures = other.shadow_textures & mask;
|
||||
texture_dimensions = other.texture_dimensions & duplicate_and_extend(mask);
|
||||
void fragment_program_texture_state::clear(u32 index)
|
||||
{
|
||||
const u16 clear_mask = ~(static_cast<u16>(1 << index));
|
||||
redirected_textures &= clear_mask;
|
||||
shadow_textures &= clear_mask;
|
||||
}
|
||||
|
||||
void fragment_program_texture_state::import(const fragment_program_texture_state& other, u16 mask)
|
||||
{
|
||||
redirected_textures = other.redirected_textures & mask;
|
||||
shadow_textures = other.shadow_textures & mask;
|
||||
texture_dimensions = other.texture_dimensions & duplicate_and_extend(mask);
|
||||
}
|
||||
|
||||
void fragment_program_texture_state::set_dimension(texture_dimension_extended type, u32 index)
|
||||
@ -77,14 +75,13 @@ namespace rsx
|
||||
}
|
||||
|
||||
bool fragment_program_texture_state::operator == (const fragment_program_texture_state& other) const
|
||||
{
|
||||
return texture_dimensions == other.texture_dimensions &&
|
||||
redirected_textures == other.redirected_textures &&
|
||||
shadow_textures == other.shadow_textures &&
|
||||
unnormalized_coords == other.unnormalized_coords;
|
||||
}
|
||||
|
||||
void vertex_program_texture_state::clear(u32 /*index*/)
|
||||
{
|
||||
return texture_dimensions == other.texture_dimensions &&
|
||||
redirected_textures == other.redirected_textures &&
|
||||
shadow_textures == other.shadow_textures;
|
||||
}
|
||||
|
||||
void vertex_program_texture_state::clear(u32 /*index*/)
|
||||
{
|
||||
// Nothing to do yet
|
||||
}
|
||||
|
@ -33,16 +33,14 @@ namespace rsx
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
struct fragment_program_texture_state
|
||||
{
|
||||
u32 texture_dimensions = 0;
|
||||
u16 unnormalized_coords = 0;
|
||||
u16 redirected_textures = 0;
|
||||
u16 shadow_textures = 0;
|
||||
u16 reserved = 0;
|
||||
|
||||
void clear(u32 index);
|
||||
void import(const fragment_program_texture_state& other, u16 mask);
|
||||
struct fragment_program_texture_state
|
||||
{
|
||||
u32 texture_dimensions = 0;
|
||||
u16 redirected_textures = 0;
|
||||
u16 shadow_textures = 0;
|
||||
|
||||
void clear(u32 index);
|
||||
void import(const fragment_program_texture_state& other, u16 mask);
|
||||
void set_dimension(texture_dimension_extended type, u32 index);
|
||||
bool operator == (const fragment_program_texture_state& other) const;
|
||||
};
|
||||
|
@ -1637,9 +1637,6 @@ namespace rsx
|
||||
m_graphics_state &= ~rsx::pipeline_state::vertex_program_ucode_dirty;
|
||||
|
||||
const u32 transform_program_start = rsx::method_registers.transform_program_start();
|
||||
current_vertex_program.skip_vertex_input_check = true;
|
||||
|
||||
current_vertex_program.rsx_vertex_inputs.clear();
|
||||
current_vertex_program.data.reserve(512 * 4);
|
||||
current_vertex_program.jump_table.clear();
|
||||
|
||||
@ -1706,7 +1703,7 @@ namespace rsx
|
||||
void thread::analyse_inputs_interleaved(vertex_input_layout& result) const
|
||||
{
|
||||
const rsx_state& state = rsx::method_registers;
|
||||
const u32 input_mask = state.vertex_attrib_input_mask();
|
||||
const u32 input_mask = state.vertex_attrib_input_mask() & current_vp_metadata.referenced_inputs_mask;
|
||||
|
||||
result.clear();
|
||||
|
||||
@ -1752,11 +1749,15 @@ namespace rsx
|
||||
result.interleaved_blocks.reserve(16);
|
||||
result.referenced_registers.reserve(16);
|
||||
|
||||
for (u8 index = 0; index < rsx::limits::vertex_count; ++index)
|
||||
for (auto [ref_mask, index] = std::tuple{ input_mask, u8(0) }; ref_mask; ++index, ref_mask >>= 1)
|
||||
{
|
||||
// Check if vertex stream is enabled
|
||||
if (!(input_mask & (1 << index)))
|
||||
ensure(index < rsx::limits::vertex_count);
|
||||
|
||||
if (!(ref_mask & 1u))
|
||||
{
|
||||
// Nothing to do, uninitialized
|
||||
continue;
|
||||
}
|
||||
|
||||
//Check for interleaving
|
||||
const auto &info = state.vertex_arrays_info[index];
|
||||
@ -1921,8 +1922,6 @@ namespace rsx
|
||||
|
||||
if (raw_format & CELL_GCM_TEXTURE_UN)
|
||||
{
|
||||
current_fp_texture_state.unnormalized_coords |= (1 << i);
|
||||
|
||||
if (tex.min_filter() == rsx::texture_minify_filter::nearest ||
|
||||
tex.mag_filter() == rsx::texture_magnify_filter::nearest)
|
||||
{
|
||||
@ -2204,8 +2203,14 @@ namespace rsx
|
||||
const u32 modulo_mask = rsx::method_registers.frequency_divider_operation_mask();
|
||||
const auto max_index = (first_vertex + vertex_count) - 1;
|
||||
|
||||
for (u8 index = 0; index < rsx::limits::vertex_count; ++index)
|
||||
for (u16 ref_mask = current_vp_metadata.referenced_inputs_mask, index = 0; ref_mask; ++index, ref_mask >>= 1)
|
||||
{
|
||||
if (!(ref_mask & 1u))
|
||||
{
|
||||
// Unused input, ignore this
|
||||
continue;
|
||||
}
|
||||
|
||||
if (layout.attribute_placement[index] == attribute_buffer_placement::none)
|
||||
{
|
||||
static constexpr u64 zero = 0;
|
||||
|
@ -1771,9 +1771,7 @@ bool VKGSRender::load_program()
|
||||
{
|
||||
vk::enter_uninterruptible();
|
||||
|
||||
// Load current program from buffer
|
||||
vertex_program.skip_vertex_input_check = true;
|
||||
fragment_program.texture_state.unnormalized_coords = 0;
|
||||
// Load current program from cache
|
||||
m_program = m_prog_buffer->get_graphics_pipeline(vertex_program, fragment_program, properties,
|
||||
shadermode != shader_mode::recompiler, true, pipeline_layout);
|
||||
|
||||
|
@ -91,13 +91,11 @@ namespace vk
|
||||
template <typename... Args>
|
||||
void add_pipeline_entry(RSXVertexProgram& vp, RSXFragmentProgram& fp, vk::pipeline_props& props, Args&& ...args)
|
||||
{
|
||||
vp.skip_vertex_input_check = true;
|
||||
get_graphics_pipeline(vp, fp, props, false, false, std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
void preload_programs(RSXVertexProgram& vp, RSXFragmentProgram& fp)
|
||||
{
|
||||
vp.skip_vertex_input_check = true;
|
||||
search_vertex_program(vp);
|
||||
search_fragment_program(fp);
|
||||
}
|
||||
|
@ -39,7 +39,6 @@ namespace rsx
|
||||
u32 fp_ctrl;
|
||||
u32 fp_texture_dimensions;
|
||||
u32 fp_texcoord_control;
|
||||
u16 fp_unnormalized_coords;
|
||||
u16 fp_height;
|
||||
u16 fp_pixel_layout;
|
||||
u16 fp_lighting_flags;
|
||||
@ -307,7 +306,6 @@ namespace rsx
|
||||
state_hash ^= rpcs3::hash_base<u32>(data.vp_texture_dimensions);
|
||||
state_hash ^= rpcs3::hash_base<u32>(data.fp_texture_dimensions);
|
||||
state_hash ^= rpcs3::hash_base<u32>(data.fp_texcoord_control);
|
||||
state_hash ^= rpcs3::hash_base<u16>(data.fp_unnormalized_coords);
|
||||
state_hash ^= rpcs3::hash_base<u16>(data.fp_height);
|
||||
state_hash ^= rpcs3::hash_base<u16>(data.fp_pixel_layout);
|
||||
state_hash ^= rpcs3::hash_base<u16>(data.fp_lighting_flags);
|
||||
@ -326,8 +324,6 @@ namespace rsx
|
||||
fs::file f(fmt::format("%s/raw/%llX.vp", root_path, program_hash));
|
||||
if (f) f.read(vp.data, f.size() / sizeof(u32));
|
||||
|
||||
vp.skip_vertex_input_check = true;
|
||||
|
||||
return vp;
|
||||
}
|
||||
|
||||
@ -381,7 +377,6 @@ namespace rsx
|
||||
|
||||
fp.ctrl = data.fp_ctrl;
|
||||
fp.texture_state.texture_dimensions = data.fp_texture_dimensions;
|
||||
fp.texture_state.unnormalized_coords = data.fp_unnormalized_coords;
|
||||
fp.texture_state.shadow_textures = data.fp_shadow_textures;
|
||||
fp.texture_state.redirected_textures = data.fp_redirected_textures;
|
||||
fp.texcoord_control_mask = data.fp_texcoord_control;
|
||||
@ -426,7 +421,6 @@ namespace rsx
|
||||
data_block.fp_ctrl = fp.ctrl;
|
||||
data_block.fp_texture_dimensions = fp.texture_state.texture_dimensions;
|
||||
data_block.fp_texcoord_control = fp.texcoord_control_mask;
|
||||
data_block.fp_unnormalized_coords = fp.texture_state.unnormalized_coords;
|
||||
data_block.fp_lighting_flags = u16(fp.two_sided_lighting);
|
||||
data_block.fp_shadow_textures = fp.texture_state.shadow_textures;
|
||||
data_block.fp_redirected_textures = fp.texture_state.redirected_textures;
|
||||
|
Loading…
Reference in New Issue
Block a user