rsx: Restructure programs

- Also re-enable pipeline optimizations
This commit is contained in:
kd-11 2018-10-20 17:43:00 +03:00 committed by kd-11
parent b0a6b72ce8
commit 1ad76ad331
21 changed files with 777 additions and 439 deletions

View File

@ -1,4 +1,4 @@
#pragma once #pragma once
#include <sstream> #include <sstream>
#include "ShaderParam.h" #include "ShaderParam.h"
@ -169,7 +169,6 @@ namespace glsl
" int is_volatile;\n" " int is_volatile;\n"
" int frequency;\n" " int frequency;\n"
" int divisor;\n" " int divisor;\n"
" int modulo;\n"
"};\n\n" "};\n\n"
"uint get_bits(uvec4 v, int swap)\n" "uint get_bits(uvec4 v, int swap)\n"
@ -290,17 +289,24 @@ namespace glsl
"attribute_desc fetch_desc(int location)\n" "attribute_desc fetch_desc(int location)\n"
"{\n" "{\n"
" // Each descriptor is 64 bits wide\n"
" // [0-8] attribute stride\n"
" // [8-20] attribute divisor\n"
" // [20-21] swap bytes flag\n"
" // [21-22] volatile flag\n"
" // [22-24] frequency op\n"
" // [24-27] attribute type\n"
" // [27-30] attribute size\n"
" attribute_desc result;\n" " attribute_desc result;\n"
" int attribute_flags = input_attributes[location].w;\n" " int attribute_flags = input_attributes[location].x;\n"
" result.type = input_attributes[location].x;\n"
" result.attribute_size = input_attributes[location].y;\n"
" result.starting_offset = input_attributes[location].z;\n"
" result.stride = attribute_flags & 0xFF;\n" " result.stride = attribute_flags & 0xFF;\n"
" result.swap_bytes = (attribute_flags >> 8) & 0x1;\n" " result.divisor = (attribute_flags >> 8) & 0xFFF;\n"
" result.is_volatile = (attribute_flags >> 9) & 0x1;\n" " result.swap_bytes = (attribute_flags >> 20) & 0x1;\n"
" result.frequency = (attribute_flags >> 10) & 0x3;\n" " result.is_volatile = (attribute_flags >> 21) & 0x1;\n"
" result.modulo = (attribute_flags >> 12) & 0x1;\n" " result.frequency = (attribute_flags >> 22) & 0x3;\n"
" result.divisor = (attribute_flags >> 13) & 0xFFFF;\n" " result.type = (attribute_flags >> 24) & 0x7;\n"
" result.attribute_size = (attribute_flags >> 27) & 0x7;\n"
" result.starting_offset = input_attributes[location].y;\n"
" return result;\n" " return result;\n"
"}\n\n" "}\n\n"
@ -325,15 +331,19 @@ namespace glsl
" }\n\n" " }\n\n"
" int vertex_id = " << vertex_id_name << " - int(vertex_base_index);\n" " int vertex_id = " << vertex_id_name << " - int(vertex_base_index);\n"
" if (desc.frequency == 0)\n" " if (desc.frequency == 0)\n"
" {\n"
" vertex_id = 0;\n" " vertex_id = 0;\n"
" else if (desc.frequency > 1)\n" " }\n"
" else if (desc.frequency == 2)\n"
" {\n" " {\n"
" //if a vertex modifier is active; vertex_base must be 0 and is ignored\n" " //if a vertex modifier is active; vertex_base must be 0 and is ignored\n"
" if (desc.modulo != 0)\n"
" vertex_id = " << vertex_id_name << " % desc.divisor;\n"
" else\n"
" vertex_id = " << vertex_id_name << " / desc.divisor;\n" " vertex_id = " << vertex_id_name << " / desc.divisor;\n"
" }\n" " }\n"
" else if (desc.frequency == 3)\n"
" {\n"
" //if a vertex modifier is active; vertex_base must be 0 and is ignored\n"
" vertex_id = " << vertex_id_name << " % desc.divisor;\n"
" }\n"
"\n" "\n"
" if (desc.is_volatile != 0)\n" " if (desc.is_volatile != 0)\n"
" return fetch_attribute(desc, vertex_id, volatile_input_stream);\n" " return fetch_attribute(desc, vertex_id, volatile_input_stream);\n"

View File

@ -1,4 +1,4 @@
#include "stdafx.h" #include "stdafx.h"
#include <set> #include <set>
#include "Emu/Memory/vm.h" #include "Emu/Memory/vm.h"
#include "Emu/System.h" #include "Emu/System.h"
@ -132,9 +132,8 @@ void GLFragmentDecompilerThread::insertConstants(std::stringstream & OS)
} }
OS << "\n"; OS << "\n";
OS << "layout(std140, binding = 2) uniform FragmentConstantsBuffer\n";
OS << "{\n";
std::string constants_block;
for (const ParamType& PT : m_parr.params[PF_PARAM_UNIFORM]) for (const ParamType& PT : m_parr.params[PF_PARAM_UNIFORM])
{ {
if (PT.type == "sampler1D" || if (PT.type == "sampler1D" ||
@ -144,10 +143,21 @@ void GLFragmentDecompilerThread::insertConstants(std::stringstream & OS)
continue; continue;
for (const ParamItem& PI : PT.items) for (const ParamItem& PI : PT.items)
OS << " " << PT.type << " " << PI.name << ";\n"; {
constants_block += " " + PT.type + " " + PI.name + ";\n";
}
} }
// Fragment state parameters if (!constants_block.empty())
{
OS << "layout(std140, binding = 3) uniform FragmentConstantsBuffer\n";
OS << "{\n";
OS << constants_block;
OS << "};\n\n";
}
OS << "layout(std140, binding = 4) uniform FragmentStateBuffer\n";
OS << "{\n";
OS << " float fog_param0;\n"; OS << " float fog_param0;\n";
OS << " float fog_param1;\n"; OS << " float fog_param1;\n";
OS << " uint rop_control;\n"; OS << " uint rop_control;\n";
@ -156,8 +166,12 @@ void GLFragmentDecompilerThread::insertConstants(std::stringstream & OS)
OS << " uint fog_mode;\n"; OS << " uint fog_mode;\n";
OS << " float wpos_scale;\n"; OS << " float wpos_scale;\n";
OS << " float wpos_bias;\n"; OS << " float wpos_bias;\n";
OS << "};\n\n";
OS << "layout(std140, binding = 5) uniform TextureParametersBuffer\n";
OS << "{\n";
OS << " vec4 texture_parameters[16];\n"; //sampling: x,y scaling and (unused) offsets data OS << " vec4 texture_parameters[16];\n"; //sampling: x,y scaling and (unused) offsets data
OS << "};\n"; OS << "};\n\n";
} }
void GLFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS) void GLFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS)

View File

@ -624,7 +624,10 @@ void GLGSRender::end()
m_attrib_ring_buffer->notify(); m_attrib_ring_buffer->notify();
m_index_ring_buffer->notify(); m_index_ring_buffer->notify();
m_vertex_state_buffer->notify(); m_fragment_env_buffer->notify();
m_vertex_env_buffer->notify();
m_texture_parameters_buffer->notify();
m_vertex_layout_buffer->notify();
m_fragment_constants_buffer->notify(); m_fragment_constants_buffer->notify();
m_transform_constants_buffer->notify(); m_transform_constants_buffer->notify();
@ -795,7 +798,10 @@ void GLGSRender::on_init_thread()
m_attrib_ring_buffer.reset(new gl::legacy_ring_buffer()); m_attrib_ring_buffer.reset(new gl::legacy_ring_buffer());
m_transform_constants_buffer.reset(new gl::legacy_ring_buffer()); m_transform_constants_buffer.reset(new gl::legacy_ring_buffer());
m_fragment_constants_buffer.reset(new gl::legacy_ring_buffer()); m_fragment_constants_buffer.reset(new gl::legacy_ring_buffer());
m_vertex_state_buffer.reset(new gl::legacy_ring_buffer()); m_fragment_env_buffer.reset(new gl::legacy_ring_buffer());
m_vertex_env_buffer.reset(new gl::legacy_ring_buffer());
m_texture_parameters_buffer.reset(new gl::legacy_ring_buffer());
m_vertex_layout_buffer.reset(new gl::legacy_ring_buffer());
m_index_ring_buffer.reset(new gl::legacy_ring_buffer()); m_index_ring_buffer.reset(new gl::legacy_ring_buffer());
} }
else else
@ -803,7 +809,10 @@ void GLGSRender::on_init_thread()
m_attrib_ring_buffer.reset(new gl::ring_buffer()); m_attrib_ring_buffer.reset(new gl::ring_buffer());
m_transform_constants_buffer.reset(new gl::ring_buffer()); m_transform_constants_buffer.reset(new gl::ring_buffer());
m_fragment_constants_buffer.reset(new gl::ring_buffer()); m_fragment_constants_buffer.reset(new gl::ring_buffer());
m_vertex_state_buffer.reset(new gl::ring_buffer()); m_fragment_env_buffer.reset(new gl::ring_buffer());
m_vertex_env_buffer.reset(new gl::ring_buffer());
m_texture_parameters_buffer.reset(new gl::ring_buffer());
m_vertex_layout_buffer.reset(new gl::ring_buffer());
m_index_ring_buffer.reset(new gl::ring_buffer()); m_index_ring_buffer.reset(new gl::ring_buffer());
} }
@ -811,7 +820,10 @@ void GLGSRender::on_init_thread()
m_index_ring_buffer->create(gl::buffer::target::element_array, 64 * 0x100000); m_index_ring_buffer->create(gl::buffer::target::element_array, 64 * 0x100000);
m_transform_constants_buffer->create(gl::buffer::target::uniform, 64 * 0x100000); m_transform_constants_buffer->create(gl::buffer::target::uniform, 64 * 0x100000);
m_fragment_constants_buffer->create(gl::buffer::target::uniform, 16 * 0x100000); m_fragment_constants_buffer->create(gl::buffer::target::uniform, 16 * 0x100000);
m_vertex_state_buffer->create(gl::buffer::target::uniform, 16 * 0x100000); m_fragment_env_buffer->create(gl::buffer::target::uniform, 16 * 0x100000);
m_vertex_env_buffer->create(gl::buffer::target::uniform, 16 * 0x100000);
m_texture_parameters_buffer->create(gl::buffer::target::uniform, 16 * 0x100000);
m_vertex_layout_buffer->create(gl::buffer::target::uniform, 16 * 0x100000);
if (gl_caps.vendor_AMD) if (gl_caps.vendor_AMD)
{ {
@ -1013,9 +1025,24 @@ void GLGSRender::on_exit()
m_fragment_constants_buffer->remove(); m_fragment_constants_buffer->remove();
} }
if (m_vertex_state_buffer) if (m_fragment_env_buffer)
{ {
m_vertex_state_buffer->remove(); m_fragment_env_buffer->remove();
}
if (m_vertex_env_buffer)
{
m_vertex_env_buffer->remove();
}
if (m_texture_parameters_buffer)
{
m_texture_parameters_buffer->remove();
}
if (m_vertex_layout_buffer)
{
m_vertex_layout_buffer->remove();
} }
if (m_index_ring_buffer) if (m_index_ring_buffer)
@ -1224,78 +1251,113 @@ bool GLGSRender::load_program()
void GLGSRender::load_program_env(const gl::vertex_upload_info& upload_info) void GLGSRender::load_program_env(const gl::vertex_upload_info& upload_info)
{ {
u8 *buf;
u32 vertex_state_offset;
u32 vertex_constants_offset;
u32 fragment_constants_offset;
const u32 fragment_constants_size = current_fp_metadata.program_constants_buffer_length;
const u32 fragment_buffer_size = fragment_constants_size + (18 * 4 * sizeof(float));
const bool update_transform_constants = !!(m_graphics_state & rsx::pipeline_state::transform_constants_dirty);
if (!m_program) if (!m_program)
{ {
fmt::throw_exception("Unreachable right now" HERE); fmt::throw_exception("Unreachable right now" HERE);
} }
const u32 fragment_constants_size = current_fp_metadata.program_constants_buffer_length;
const bool update_transform_constants = !!(m_graphics_state & rsx::pipeline_state::transform_constants_dirty);
const bool update_fragment_constants = !!(m_graphics_state & rsx::pipeline_state::fragment_constants_dirty) && fragment_constants_size;
const bool update_vertex_env = !!(m_graphics_state & rsx::pipeline_state::vertex_state_dirty);
const bool update_fragment_env = !!(m_graphics_state & rsx::pipeline_state::fragment_state_dirty);
const bool update_fragment_texture_env = !!(m_graphics_state & rsx::pipeline_state::fragment_texture_state_dirty);
m_program->use(); m_program->use();
if (manually_flush_ring_buffers) if (manually_flush_ring_buffers)
{ {
m_vertex_state_buffer->reserve_storage_on_heap(512); if (update_fragment_env) m_fragment_env_buffer->reserve_storage_on_heap(128);
m_fragment_constants_buffer->reserve_storage_on_heap(align(fragment_buffer_size, 256)); if (update_vertex_env) m_vertex_env_buffer->reserve_storage_on_heap(256);
if (update_fragment_texture_env) m_texture_parameters_buffer->reserve_storage_on_heap(256);
if (update_fragment_constants) m_fragment_constants_buffer->reserve_storage_on_heap(align(fragment_constants_size, 256));
if (update_transform_constants) m_transform_constants_buffer->reserve_storage_on_heap(8192); if (update_transform_constants) m_transform_constants_buffer->reserve_storage_on_heap(8192);
m_vertex_layout_buffer->reserve_storage_on_heap(128 + 16);
} }
if (update_vertex_env)
{
// Vertex state // Vertex state
auto mapping = m_vertex_state_buffer->alloc_from_heap(512, m_uniform_buffer_offset_align); auto mapping = m_vertex_env_buffer->alloc_from_heap(160, m_uniform_buffer_offset_align);
buf = static_cast<u8*>(mapping.first); auto buf = static_cast<u8*>(mapping.first);
vertex_state_offset = mapping.second;
fill_scale_offset_data(buf, false); fill_scale_offset_data(buf, false);
fill_user_clip_data(buf + 64); fill_user_clip_data(buf + 64);
*(reinterpret_cast<u32*>(buf + 128)) = rsx::method_registers.transform_branch_bits(); *(reinterpret_cast<u32*>(buf + 128)) = rsx::method_registers.transform_branch_bits();
*(reinterpret_cast<u32*>(buf + 132)) = upload_info.vertex_index_base; *(reinterpret_cast<u32*>(buf + 132)) = 0; // Reserved
*(reinterpret_cast<f32*>(buf + 136)) = rsx::method_registers.point_size(); *(reinterpret_cast<f32*>(buf + 136)) = rsx::method_registers.point_size();
*(reinterpret_cast<f32*>(buf + 140)) = rsx::method_registers.clip_min(); *(reinterpret_cast<f32*>(buf + 140)) = rsx::method_registers.clip_min();
*(reinterpret_cast<f32*>(buf + 144)) = rsx::method_registers.clip_max(); *(reinterpret_cast<f32*>(buf + 144)) = rsx::method_registers.clip_max();
fill_vertex_layout_state(m_vertex_layout, upload_info.allocated_vertex_count, reinterpret_cast<s32*>(buf + 160), upload_info.persistent_mapping_offset, upload_info.volatile_mapping_offset);
m_vertex_env_buffer->bind_range(0, mapping.second, 160);
}
{
// Vertex layout state
auto mapping = m_vertex_layout_buffer->alloc_from_heap(128 + 16, m_uniform_buffer_offset_align);
auto buf = static_cast<s32*>(mapping.first);
*buf = upload_info.vertex_index_base;
buf += 4;
fill_vertex_layout_state(m_vertex_layout, upload_info.allocated_vertex_count, buf, upload_info.persistent_mapping_offset, upload_info.volatile_mapping_offset);
m_vertex_layout_buffer->bind_range(1, mapping.second, 128 + 16);
}
if (update_transform_constants) if (update_transform_constants)
{ {
// Vertex constants // Vertex constants
mapping = m_transform_constants_buffer->alloc_from_heap(8192, m_uniform_buffer_offset_align); auto mapping = m_transform_constants_buffer->alloc_from_heap(8192, m_uniform_buffer_offset_align);
buf = static_cast<u8*>(mapping.first); auto buf = static_cast<u8*>(mapping.first);
vertex_constants_offset = mapping.second;
fill_vertex_program_constants_data(buf); fill_vertex_program_constants_data(buf);
m_transform_constants_buffer->bind_range(2, mapping.second, 8192);
} }
// Fragment constants if (update_fragment_constants)
mapping = m_fragment_constants_buffer->alloc_from_heap(fragment_buffer_size, m_uniform_buffer_offset_align);
buf = static_cast<u8*>(mapping.first);
fragment_constants_offset = mapping.second;
if (fragment_constants_size)
{ {
// Fragment constants
auto mapping = m_fragment_constants_buffer->alloc_from_heap(fragment_constants_size, m_uniform_buffer_offset_align);
auto buf = static_cast<u8*>(mapping.first);
m_prog_buffer.fill_fragment_constants_buffer({ reinterpret_cast<float*>(buf), gsl::narrow<int>(fragment_constants_size) }, m_prog_buffer.fill_fragment_constants_buffer({ reinterpret_cast<float*>(buf), gsl::narrow<int>(fragment_constants_size) },
current_fragment_program, gl::get_driver_caps().vendor_NVIDIA); current_fragment_program, gl::get_driver_caps().vendor_NVIDIA);
m_fragment_constants_buffer->bind_range(3, mapping.second, fragment_constants_size);
} }
if (update_fragment_env)
{
// Fragment state // Fragment state
fill_fragment_state_buffer(buf + fragment_constants_size, current_fragment_program); auto mapping = m_fragment_env_buffer->alloc_from_heap(32, m_uniform_buffer_offset_align);
auto buf = static_cast<u8*>(mapping.first);
fill_fragment_state_buffer(buf, current_fragment_program);
m_vertex_state_buffer->bind_range(0, vertex_state_offset, 512); m_fragment_env_buffer->bind_range(4, mapping.second, 32);
m_fragment_constants_buffer->bind_range(2, fragment_constants_offset, fragment_buffer_size); }
if (update_transform_constants) m_transform_constants_buffer->bind_range(1, vertex_constants_offset, 8192); if (update_fragment_texture_env)
{
// Fragment texture parameters
auto mapping = m_texture_parameters_buffer->alloc_from_heap(256, m_uniform_buffer_offset_align);
auto buf = static_cast<u8*>(mapping.first);
fill_fragment_texture_parameters(buf, current_fragment_program);
m_texture_parameters_buffer->bind_range(5, mapping.second, 256);
}
if (manually_flush_ring_buffers) if (manually_flush_ring_buffers)
{ {
m_vertex_state_buffer->unmap(); if (update_fragment_env) m_fragment_env_buffer->unmap();
m_fragment_constants_buffer->unmap(); if (update_vertex_env) m_vertex_env_buffer->unmap();
if (update_fragment_texture_env) m_texture_parameters_buffer->unmap();
if (update_fragment_constants) m_fragment_constants_buffer->unmap();
if (update_transform_constants) m_transform_constants_buffer->unmap(); if (update_transform_constants) m_transform_constants_buffer->unmap();
m_vertex_layout_buffer->unmap();
} }
const u32 handled_flags = (rsx::pipeline_state::fragment_state_dirty | rsx::pipeline_state::vertex_state_dirty | rsx::pipeline_state::transform_constants_dirty); const u32 handled_flags = (rsx::pipeline_state::fragment_state_dirty | rsx::pipeline_state::vertex_state_dirty | rsx::pipeline_state::transform_constants_dirty | rsx::pipeline_state::fragment_constants_dirty | rsx::pipeline_state::fragment_texture_state_dirty);
m_graphics_state &= ~handled_flags; m_graphics_state &= ~handled_flags;
} }

View File

@ -294,7 +294,10 @@ private:
std::unique_ptr<gl::ring_buffer> m_attrib_ring_buffer; std::unique_ptr<gl::ring_buffer> m_attrib_ring_buffer;
std::unique_ptr<gl::ring_buffer> m_fragment_constants_buffer; std::unique_ptr<gl::ring_buffer> m_fragment_constants_buffer;
std::unique_ptr<gl::ring_buffer> m_transform_constants_buffer; std::unique_ptr<gl::ring_buffer> m_transform_constants_buffer;
std::unique_ptr<gl::ring_buffer> m_vertex_state_buffer; std::unique_ptr<gl::ring_buffer> m_fragment_env_buffer;
std::unique_ptr<gl::ring_buffer> m_vertex_env_buffer;
std::unique_ptr<gl::ring_buffer> m_texture_parameters_buffer;
std::unique_ptr<gl::ring_buffer> m_vertex_layout_buffer;
std::unique_ptr<gl::ring_buffer> m_index_ring_buffer; std::unique_ptr<gl::ring_buffer> m_index_ring_buffer;
// Identity buffer used to fix broken gl_VertexID on ATI stack // Identity buffer used to fix broken gl_VertexID on ATI stack

View File

@ -1,4 +1,4 @@
#include "stdafx.h" #include "stdafx.h"
#include "Emu/System.h" #include "Emu/System.h"
#include "GLVertexProgram.h" #include "GLVertexProgram.h"
@ -37,11 +37,15 @@ void GLVertexDecompilerThread::insertHeader(std::stringstream &OS)
OS << " ivec4 user_clip_enabled[2];\n"; OS << " ivec4 user_clip_enabled[2];\n";
OS << " vec4 user_clip_factor[2];\n"; OS << " vec4 user_clip_factor[2];\n";
OS << " uint transform_branch_bits;\n"; OS << " uint transform_branch_bits;\n";
OS << " uint vertex_base_index;\n";
OS << " float point_size;\n"; OS << " float point_size;\n";
OS << " float z_near;\n"; OS << " float z_near;\n";
OS << " float z_far;\n"; OS << " float z_far;\n";
OS << " ivec4 input_attributes[16];\n"; OS << "};\n\n";
OS << "layout(std140, binding = 1) uniform VertexLayoutBuffer\n";
OS << "{\n";
OS << " uint vertex_base_index;\n";
OS << " ivec2 input_attributes[16];\n";
OS << "};\n\n"; OS << "};\n\n";
} }
@ -53,7 +57,7 @@ void GLVertexDecompilerThread::insertInputs(std::stringstream & OS, const std::v
void GLVertexDecompilerThread::insertConstants(std::stringstream & OS, const std::vector<ParamType> & constants) void GLVertexDecompilerThread::insertConstants(std::stringstream & OS, const std::vector<ParamType> & constants)
{ {
OS << "layout(std140, binding = 1) uniform VertexConstantsBuffer\n"; OS << "layout(std140, binding = 2) uniform VertexConstantsBuffer\n";
OS << "{\n"; OS << "{\n";
OS << " vec4 vc[468];\n"; OS << " vec4 vc[468];\n";
OS << "};\n\n"; OS << "};\n\n";

View File

@ -129,7 +129,7 @@ namespace rsx
m_prefetcher_busy.store(true); m_prefetcher_busy.store(true);
read_ahead(m_prefetcher_info, m_prefetched_queue, m_prefetch_get); read_ahead(m_prefetcher_info, m_prefetched_queue, m_prefetch_get);
//optimize(m_prefetcher_info, m_prefetched_queue); optimize(m_prefetcher_info, m_prefetched_queue);
m_prefetcher_busy.store(false); m_prefetcher_busy.store(false);
m_prefetch_mutex.unlock(); m_prefetch_mutex.unlock();
@ -206,7 +206,9 @@ namespace rsx
info.start_loc = get_pointer; info.start_loc = get_pointer;
info.num_draw_calls = 0; info.num_draw_calls = 0;
info.draw_call_distance_weight = 0;
u32 cmd;
u32 count;
while (true) while (true)
{ {
@ -218,8 +220,6 @@ namespace rsx
// Validate put and get registers before reading the command // Validate put and get registers before reading the command
// TODO: Who should handle graphics exceptions?? // TODO: Who should handle graphics exceptions??
u32 cmd;
if (u32 addr = RSXIOMem.RealAddr(get_pointer)) if (u32 addr = RSXIOMem.RealAddr(get_pointer))
{ {
cmd = vm::read32(addr); cmd = vm::read32(addr);
@ -230,6 +230,8 @@ namespace rsx
break; break;
} }
if (UNLIKELY(cmd & 0xe0030003))
{
if ((cmd & RSX_METHOD_OLD_JUMP_CMD_MASK) == RSX_METHOD_OLD_JUMP_CMD || if ((cmd & RSX_METHOD_OLD_JUMP_CMD_MASK) == RSX_METHOD_OLD_JUMP_CMD ||
(cmd & RSX_METHOD_NEW_JUMP_CMD_MASK) == RSX_METHOD_NEW_JUMP_CMD || (cmd & RSX_METHOD_NEW_JUMP_CMD_MASK) == RSX_METHOD_NEW_JUMP_CMD ||
(cmd & RSX_METHOD_CALL_CMD_MASK) == RSX_METHOD_CALL_CMD || (cmd & RSX_METHOD_CALL_CMD_MASK) == RSX_METHOD_CALL_CMD ||
@ -239,8 +241,8 @@ namespace rsx
commands.push_back({ cmd, 0, get_pointer }); commands.push_back({ cmd, 0, get_pointer });
break; break;
} }
}
if ((cmd & RSX_METHOD_NOP_MASK) == RSX_METHOD_NOP_CMD) else if (UNLIKELY((cmd & RSX_METHOD_NOP_MASK) == RSX_METHOD_NOP_CMD))
{ {
if (commands.empty() || commands.back().reg != RSX_METHOD_NOP_CMD) if (commands.empty() || commands.back().reg != RSX_METHOD_NOP_CMD)
{ {
@ -251,49 +253,50 @@ namespace rsx
get_pointer += 4; get_pointer += 4;
continue; continue;
} }
else if (UNLIKELY(cmd & 0x3))
if (cmd & 0x3)
{ {
// Malformed command, optional recovery // Malformed command, optional recovery
break; break;
} }
u32 count = (cmd >> 18) & 0x7ff;
//Validate the args ptr if the command attempts to read from it //Validate the args ptr if the command attempts to read from it
auto args = vm::ptr<u32>::make(RSXIOMem.RealAddr(get_pointer + 4)); auto args = vm::ptr<u32>::make(RSXIOMem.RealAddr(get_pointer + 4));
if (UNLIKELY(!args))
if (!args && count)
{ {
// Optional recovery // Optional recovery
break; break;
} }
count = (cmd >> 18) & 0x7ff;
if (count > 1)
{
// Stop command execution if put will be equal to get ptr during the execution itself // Stop command execution if put will be equal to get ptr during the execution itself
if (count * 4 + 4 > put - get_pointer) if (UNLIKELY(count * 4 + 4 > put - get_pointer))
{ {
count = (put - get_pointer) / 4 - 1; count = (put - get_pointer) / 4 - 1;
} }
if (count > 1)
{
// Queue packet header // Queue packet header
commands.push_back({ FIFO_PACKET_BEGIN, count, get_pointer }); commands.push_back({ FIFO_PACKET_BEGIN, count, get_pointer });
const bool no_increment = (cmd & RSX_METHOD_NON_INCREMENT_CMD_MASK) == RSX_METHOD_NON_INCREMENT_CMD; // First executed command is at data[0]
u32 reg = cmd & 0xfffc; get_pointer += 4;
get_pointer += 4; // First executed command is at data[0]
if (UNLIKELY((cmd & RSX_METHOD_NON_INCREMENT_CMD_MASK) == RSX_METHOD_NON_INCREMENT_CMD))
{
const u32 reg = cmd & 0xfffc;
for (u32 i = 0; i < count; i++, get_pointer += 4) for (u32 i = 0; i < count; i++, get_pointer += 4)
{ {
commands.push_back({ reg, args[i], get_pointer }); commands.push_back({ reg, args[i], get_pointer });
if (reg == (NV4097_SET_BEGIN_END << 2))
{
info.num_draw_calls++;
} }
}
if (!no_increment) reg += 4; else
{
u32 reg = cmd & 0xfffc;
for (u32 i = 0; i < count; i++, get_pointer += 4, reg += 4)
{
commands.push_back({ reg, args[i], get_pointer });
}
} }
} }
else else
@ -315,15 +318,14 @@ namespace rsx
} }
info.length = get_pointer - info.start_loc; info.length = get_pointer - info.start_loc;
if (!info.num_draw_calls) if (info.num_draw_calls < 2)
{ {
return; return;
} }
info.num_draw_calls /= 2; // Begin+End pairs info.num_draw_calls /= 2; // Begin+End pairs
//info.draw_call_distance_weight = info.length / info.num_draw_calls;
} }
#pragma optimize("", on)
void FIFO_control::report_branch_hit(u32 source, u32 target) void FIFO_control::report_branch_hit(u32 source, u32 target)
{ {
const auto range = m_branch_prediction_table.equal_range(source); const auto range = m_branch_prediction_table.equal_range(source);
@ -507,7 +509,7 @@ namespace rsx
if (queue_size > 0) if (queue_size > 0)
{ {
if (m_internal_get != m_ctrl->get) if (UNLIKELY(m_internal_get != m_ctrl->get))
{ {
// Control register changed // Control register changed
registers_changed = true; registers_changed = true;
@ -545,7 +547,7 @@ namespace rsx
} }
} }
verify(HERE), m_queue.empty(); //verify(HERE), m_queue.empty();
if (m_ctrl->put == m_ctrl->get) if (m_ctrl->put == m_ctrl->get)
{ {
@ -573,7 +575,7 @@ namespace rsx
} }
// Lock to disable the prefetcher // Lock to disable the prefetcher
if (!m_prefetch_mutex.try_lock()) if (0)//!m_prefetch_mutex.try_lock())
{ {
return busy_cmd; return busy_cmd;
} }
@ -601,13 +603,13 @@ namespace rsx
{ {
m_internal_get = m_ctrl->get; m_internal_get = m_ctrl->get;
read_ahead(m_fifo_info, m_queue, m_internal_get); read_ahead(m_fifo_info, m_queue, m_internal_get);
//optimize(m_fifo_info, m_queue); optimize(m_fifo_info, m_queue);
m_ctrl->get = m_internal_get; m_ctrl->get = m_internal_get;
m_ctrl_tag++; m_ctrl_tag++;
} }
m_prefetch_mutex.unlock(); //m_prefetch_mutex.unlock();
if (!m_queue.empty()) if (!m_queue.empty())
{ {
@ -656,49 +658,58 @@ namespace rsx
// Vertex // Vertex
{ NV4097_SET_VERTEX_DATA_ARRAY_FORMAT, 16 }, { NV4097_SET_VERTEX_DATA_ARRAY_FORMAT, 16 },
{ NV4097_SET_VERTEX_DATA_ARRAY_OFFSET, 16 }, { NV4097_SET_VERTEX_DATA_ARRAY_OFFSET, 16 },
// Raster
{ NV4097_SET_ALPHA_TEST_ENABLE, 1 },
{ NV4097_SET_ALPHA_FUNC, 1 },
{ NV4097_SET_ALPHA_REF, 1 },
{ NV4097_SET_FRONT_FACE, 1 },
}; };
for (u32 reg = 0; reg < m_skippable_registers.size(); ++reg) const std::pair<u32, u32> ignorable_ranges[] =
{ {
bool _continue = false; // General
for (const auto &method : skippable_ranges) { NV4097_INVALIDATE_VERTEX_FILE, 3 }, // PSLight clears VERTEX_FILE[0-2]
{ { NV4097_INVALIDATE_VERTEX_CACHE_FILE, 1 },
if (reg < method.first) { NV4097_INVALIDATE_L2, 1 },
break; { NV4097_INVALIDATE_ZCULL, 1 },
// FIFO
{ (FIFO_DISABLED_COMMAND >> 2), 1},
{ (FIFO_PACKET_BEGIN >> 2), 1 },
{ (FIFO_DRAW_BARRIER >> 2), 1 },
// ROP
{ NV4097_SET_ALPHA_FUNC, 1 },
{ NV4097_SET_ALPHA_REF, 1 },
{ NV4097_SET_ALPHA_TEST_ENABLE, 1 },
{ NV4097_SET_ANTI_ALIASING_CONTROL, 1 },
// Program
{ NV4097_SET_SHADER_PACKER, 1 },
{ NV4097_SET_SHADER_WINDOW, 1 },
// Vertex data offsets
{ NV4097_SET_VERTEX_DATA_BASE_OFFSET, 1 },
{ NV4097_SET_VERTEX_DATA_BASE_INDEX, 1 }
};
if (reg - method.first < method.second) std::fill(m_register_properties.begin(), m_register_properties.end(), 0u);
{
// Safe to ignore if value has not changed
m_skippable_registers[reg] = true;
_continue = true;
break;
}
}
if (_continue)
continue;
m_skippable_registers[reg] = false;
}
for (const auto &method : skippable_ranges) for (const auto &method : skippable_ranges)
{ {
for (int subreg = 0; subreg < method.second; ++subreg) for (int i = 0; i < method.second; ++i)
{ {
// Safe to ignore if value has not changed m_register_properties[method.first + i] = register_props::skippable;
verify(HERE), m_skippable_registers[subreg] = true; }
}
for (const auto &method : ignorable_ranges)
{
for (int i = 0; i < method.second; ++i)
{
m_register_properties[method.first + i] |= register_props::ignorable;
} }
} }
} }
void flattening_pass::optimize(const fifo_buffer_info_t& info, simple_array<register_pair>& commands, const u32* registers) void flattening_pass::optimize(const fifo_buffer_info_t& info, simple_array<register_pair>& commands, const u32* registers)
{ {
__unused(info); if (info.num_draw_calls < 20)
{
// Not enough draw calls
return;
}
#if (ENABLE_OPTIMIZATION_DEBUGGING) #if (ENABLE_OPTIMIZATION_DEBUGGING)
auto copy = commands; auto copy = commands;
@ -750,31 +761,14 @@ namespace rsx
for (auto &command : commands) for (auto &command : commands)
{ {
//LOG_ERROR(RSX, "[0x%x] %s(0x%x)", command.loc, _get_method_name(command.reg), command.value);
bool flush_commands_flag = has_deferred_call; bool flush_commands_flag = has_deferred_call;
bool execute_method_flag = true; bool execute_method_flag = true;
const auto reg = command.reg >> 2; const auto reg = command.reg >> 2;
const auto value = command.value; const auto value = command.value;
switch (reg) switch (reg)
{ {
case NV4097_INVALIDATE_VERTEX_FILE: // PSLight clears VERTEX_FILE[0-2]
case NV4097_PIPE_NOP:
case NV4097_INVALIDATE_VERTEX_FILE + 2:
case NV4097_INVALIDATE_VERTEX_CACHE_FILE:
case NV4097_INVALIDATE_L2:
case NV4097_INVALIDATE_ZCULL:
case (FIFO_DISABLED_COMMAND >> 2):
case (FIFO_PACKET_BEGIN >> 2):
case (FIFO_DRAW_BARRIER >> 2):
case (FIFO_EMPTY >> 2):
case (FIFO_BUSY >> 2):
{
// Ignore these completely
flush_commands_flag = false;
break;
}
case NV4097_SET_BEGIN_END: case NV4097_SET_BEGIN_END:
{ {
if (value && value != deferred_primitive_type) if (value && value != deferred_primitive_type)
@ -788,47 +782,50 @@ namespace rsx
has_deferred_call = true; has_deferred_call = true;
flush_commands_flag = false; flush_commands_flag = false;
execute_method_flag = false; execute_method_flag = false;
// TODO: If END, insert draw barrier
} }
break; break;
} }
case NV4097_DRAW_ARRAYS: case NV4097_DRAW_ARRAYS:
{
if (has_deferred_call)
{ {
const auto cmd = method_registers.current_draw_clause.command; const auto cmd = method_registers.current_draw_clause.command;
if (cmd != rsx::draw_command::array && cmd != rsx::draw_command::none) if (cmd != rsx::draw_command::array && cmd != rsx::draw_command::none)
break; break;
flush_commands_flag = false; flush_commands_flag = false;
}
break; break;
} }
case NV4097_DRAW_INDEX_ARRAY: case NV4097_DRAW_INDEX_ARRAY:
{
if (has_deferred_call)
{ {
const auto cmd = method_registers.current_draw_clause.command; const auto cmd = method_registers.current_draw_clause.command;
if (cmd != rsx::draw_command::indexed && cmd != rsx::draw_command::none) if (cmd != rsx::draw_command::indexed && cmd != rsx::draw_command::none)
break; break;
flush_commands_flag = false; flush_commands_flag = false;
break;
} }
case NV4097_SET_VERTEX_DATA_BASE_INDEX:
case NV4097_SET_VERTEX_DATA_BASE_OFFSET:
{
// These can be executed when emitting geometry
flush_commands_flag = false;
break; break;
} }
default: default:
{ {
// Hopefully this is skippable so the batch can keep growing if (reg >= m_register_properties.size())
if (reg >= m_skippable_registers.size())
{ {
// Likely flow control, unskippable // Flow control or special command
break; break;
} }
if (m_skippable_registers[reg]) const auto properties = m_register_properties[reg];
if (properties & register_props::ignorable)
{
// These have no effect on rendering behavior or can be handled within begin/end
flush_commands_flag = false;
break;
}
if (properties & register_props::skippable)
{ {
if (has_deferred_call) if (has_deferred_call)
{ {
@ -840,9 +837,10 @@ namespace rsx
break; break;
} }
} }
}
set_register(reg, value); set_register(reg, value);
}
break; break;
} }
} }
@ -1211,7 +1209,7 @@ namespace rsx
return; return;
} }
if (cmd == FIFO::FIFO_EMPTY || !Emu.IsRunning()) if (cmd == FIFO::FIFO_EMPTY)
{ {
if (performance_counters.state == FIFO_state::running) if (performance_counters.state == FIFO_state::running)
{ {
@ -1219,7 +1217,6 @@ namespace rsx
performance_counters.state = FIFO_state::empty; performance_counters.state = FIFO_state::empty;
} }
std::this_thread::yield();
return; return;
} }
@ -1227,7 +1224,7 @@ namespace rsx
// TODO: Who should handle graphics exceptions?? // TODO: Who should handle graphics exceptions??
if ((cmd & RSX_METHOD_OLD_JUMP_CMD_MASK) == RSX_METHOD_OLD_JUMP_CMD) if ((cmd & RSX_METHOD_OLD_JUMP_CMD_MASK) == RSX_METHOD_OLD_JUMP_CMD)
{ {
u32 offs = cmd & 0x1ffffffc; const u32 offs = cmd & 0x1ffffffc;
if (offs == command.loc) if (offs == command.loc)
{ {
//Jump to self. Often preceded by NOP //Jump to self. Often preceded by NOP
@ -1245,7 +1242,7 @@ namespace rsx
} }
if ((cmd & RSX_METHOD_NEW_JUMP_CMD_MASK) == RSX_METHOD_NEW_JUMP_CMD) if ((cmd & RSX_METHOD_NEW_JUMP_CMD_MASK) == RSX_METHOD_NEW_JUMP_CMD)
{ {
u32 offs = cmd & 0xfffffffc; const u32 offs = cmd & 0xfffffffc;
if (offs == command.loc) if (offs == command.loc)
{ {
//Jump to self. Often preceded by NOP //Jump to self. Often preceded by NOP
@ -1271,8 +1268,7 @@ namespace rsx
return; return;
} }
u32 offs = cmd & 0xfffffffc; const u32 offs = cmd & 0xfffffffc;
//LOG_WARNING(RSX, "rsx call(0x%x) #0x%x - 0x%x", offs, cmd, get);
m_return_addr = command.loc + 4; m_return_addr = command.loc + 4;
fifo_ctrl->set_get(offs); fifo_ctrl->set_get(offs);
return; return;
@ -1286,7 +1282,6 @@ namespace rsx
return; return;
} }
//LOG_WARNING(RSX, "rsx return(0x%x)", get);
fifo_ctrl->set_get(m_return_addr); fifo_ctrl->set_get(m_return_addr);
m_return_addr = -1; m_return_addr = -1;
return; return;

View File

@ -47,7 +47,7 @@ namespace rsx
u32 start_loc; u32 start_loc;
u32 length; u32 length;
u32 num_draw_calls; u32 num_draw_calls;
u32 draw_call_distance_weight; u32 reserved;
}; };
struct branch_target_info_t struct branch_target_info_t
@ -67,7 +67,13 @@ namespace rsx
struct flattening_pass : public optimization_pass struct flattening_pass : public optimization_pass
{ {
private: private:
std::array<bool, 0x10000 / 4> m_skippable_registers; enum register_props : u8
{
skippable = 1,
ignorable = 2
};
std::array<u8, 0x10000 / 4> m_register_properties;
public: public:
flattening_pass(); flattening_pass();

View File

@ -1,4 +1,4 @@
#pragma once #pragma once
#include "GCM.h" #include "GCM.h"
#include "RSXTexture.h" #include "RSXTexture.h"
@ -230,7 +230,7 @@ struct RSXFragmentProgram
bool front_color_specular_output : 1; bool front_color_specular_output : 1;
u32 texture_dimensions; u32 texture_dimensions;
std::array<float, 4> texture_scale[16]; float texture_scale[16][4];
u8 textures_alpha_kill[16]; u8 textures_alpha_kill[16];
u8 textures_zfunc[16]; u8 textures_zfunc[16];

View File

@ -419,6 +419,15 @@ namespace rsx
conditional_render_test_address = 0; conditional_render_test_address = 0;
} }
if (m_graphics_state & rsx::pipeline_state::fragment_program_dirty)
{
// Request for update of fragment constants if the program block is invalidated
m_graphics_state |= rsx::pipeline_state::fragment_constants_dirty;
// Request for update of texture parameters if the program is likely to have changed
m_graphics_state |= rsx::pipeline_state::fragment_texture_state_dirty;
}
in_begin_end = true; in_begin_end = true;
} }
@ -545,7 +554,7 @@ namespace rsx
fifo_ctrl = std::make_unique<::rsx::FIFO::FIFO_control>(this); fifo_ctrl = std::make_unique<::rsx::FIFO::FIFO_control>(this);
//fifo_ctrl->register_optimization_pass(new FIFO::flattening_pass()); fifo_ctrl->register_optimization_pass(new FIFO::flattening_pass());
//fifo_ctrl->register_optimization_pass(new FIFO::reordering_pass()); // R&C2 - Not working if flattening is also enabled!!! //fifo_ctrl->register_optimization_pass(new FIFO::reordering_pass()); // R&C2 - Not working if flattening is also enabled!!!
//fifo_ctrl->register_optimization_pass(new FIFO::flattening_pass()); //fifo_ctrl->register_optimization_pass(new FIFO::flattening_pass());
@ -640,13 +649,20 @@ namespace rsx
while (external_interrupt_lock.load()) _mm_pause(); while (external_interrupt_lock.load()) _mm_pause();
} }
// Idle if emulation paused
if (Emu.IsPaused())
{
std::this_thread::sleep_for(1ms);
continue;
}
// Execute backend-local tasks first // Execute backend-local tasks first
do_local_task(performance_counters.state); do_local_task(performance_counters.state);
// Update sub-units // Update sub-units
zcull_ctrl->update(this); zcull_ctrl->update(this);
// Execite FIFO queue // Execute FIFO queue
run_FIFO(); run_FIFO();
} }
} }
@ -716,8 +732,9 @@ namespace rsx
rsx::method_registers.clip_plane_5_enabled(), rsx::method_registers.clip_plane_5_enabled(),
}; };
s32 clip_enabled_flags[8] = {}; u8 data_block[64];
f32 clip_distance_factors[8] = {}; s32* clip_enabled_flags = reinterpret_cast<s32*>(data_block);
f32* clip_distance_factors = reinterpret_cast<f32*>(data_block + 32);
for (int index = 0; index < 6; ++index) for (int index = 0; index < 6; ++index)
{ {
@ -743,8 +760,7 @@ namespace rsx
} }
} }
memcpy(buffer, clip_enabled_flags, 32); memcpy(buffer, data_block, 2 * 8 * sizeof(u32));
memcpy((char*)buffer + 32, clip_distance_factors, 32);
} }
/** /**
@ -814,16 +830,11 @@ namespace rsx
u32 *dst = static_cast<u32*>(buffer); u32 *dst = static_cast<u32*>(buffer);
stream_vector(dst, (u32&)fog0, (u32&)fog1, rop_control, (u32&)alpha_ref); stream_vector(dst, (u32&)fog0, (u32&)fog1, rop_control, (u32&)alpha_ref);
stream_vector(dst + 4, alpha_func, fog_mode, (u32&)wpos_scale, (u32&)wpos_bias); stream_vector(dst + 4, alpha_func, fog_mode, (u32&)wpos_scale, (u32&)wpos_bias);
size_t offset = 8;
for (int index = 0; index < 16; ++index)
{
stream_vector(&dst[offset],
(u32&)fragment_program.texture_scale[index][0], (u32&)fragment_program.texture_scale[index][1],
(u32&)fragment_program.texture_scale[index][2], (u32&)fragment_program.texture_scale[index][3]);
offset += 4;
} }
void thread::fill_fragment_texture_parameters(void *buffer, const RSXFragmentProgram &fragment_program)
{
memcpy(buffer, fragment_program.texture_scale, 16 * 4 * sizeof(float));
} }
void thread::write_inline_array_to_buffer(void *dst_buffer) void thread::write_inline_array_to_buffer(void *dst_buffer)
@ -2020,15 +2031,23 @@ namespace rsx
} }
} }
//Fill the data // Fill the data
// Each descriptor field is 64 bits wide
// [0-8] attribute stride\n"
// [8-20] attribute divisor\n"
// [20-21] swap bytes flag\n"
// [21-22] volatile flag\n"
// [22-24] frequency op\n"
// [24-27] attribute type\n"
// [27-30] attribute size\n"
memset(buffer, 0, 256); memset(buffer, 0, 256);
const s32 swap_storage_mask = (1 << 8); const s32 swap_storage_mask = (1 << 20);
const s32 volatile_storage_mask = (1 << 9); const s32 volatile_storage_mask = (1 << 21);
const s32 default_frequency_mask = (1 << 10); const s32 default_frequency_mask = (1 << 22);
const s32 repeating_frequency_mask = (3 << 10); const s32 division_op_frequency_mask = (2 << 22);
const s32 input_function_modulo_mask = (1 << 12); const s32 modulo_op_frequency_mask = (3 << 22);
const s32 input_divisor_mask = (0xFFFF << 13);
const u32 modulo_mask = rsx::method_registers.frequency_divider_operation_mask(); const u32 modulo_mask = rsx::method_registers.frequency_divider_operation_mask();
@ -2114,11 +2133,14 @@ namespace rsx
} }
default: default:
{ {
if (modulo_mask & (1 << index)) verify(HERE), frequency <= 4095u;
attributes |= input_function_modulo_mask;
attributes |= repeating_frequency_mask; if (modulo_mask & (1 << index))
attributes |= (frequency << 13) & input_divisor_mask; attributes |= modulo_op_frequency_mask;
else
attributes |= division_op_frequency_mask;
attributes |= (frequency << 8);
break; break;
} }
} }
@ -2144,10 +2166,11 @@ namespace rsx
if (to_swap_bytes) attributes |= swap_storage_mask; if (to_swap_bytes) attributes |= swap_storage_mask;
buffer[index * 4 + 0] = static_cast<s32>(type); attributes |= (static_cast<s32>(type) << 24);
buffer[index * 4 + 1] = size; attributes |= (size << 27);
buffer[index * 4 + 2] = offset_in_block[index];
buffer[index * 4 + 3] = attributes; buffer[index * 4 + 0] = attributes;
buffer[index * 4 + 1] = offset_in_block[index];
} }
} }
@ -2326,6 +2349,9 @@ namespace rsx
{ {
zcull_ctrl->sync(this); zcull_ctrl->sync(this);
// Fragment constants may have been updated
m_graphics_state |= rsx::pipeline_state::fragment_constants_dirty;
//TODO: On sync every sub-unit should finish any pending tasks //TODO: On sync every sub-unit should finish any pending tasks
//Might cause zcull lockup due to zombie 'unclaimed reports' which are not forcefully removed currently //Might cause zcull lockup due to zombie 'unclaimed reports' which are not forcefully removed currently
//verify (HERE), async_tasks_pending.load() == 0; //verify (HERE), async_tasks_pending.load() == 0;

View File

@ -88,18 +88,21 @@ namespace rsx
context_clear_all = context_clear_color | context_clear_depth context_clear_all = context_clear_color | context_clear_depth
}; };
enum pipeline_state : u8 enum pipeline_state : u32
{ {
fragment_program_dirty = 1, fragment_program_dirty = 0x1, // Fragment program changed
vertex_program_dirty = 2, vertex_program_dirty = 0x2, // Vertex program changed
fragment_state_dirty = 4, fragment_state_dirty = 0x4, // Fragment state changed (alpha test, etc)
vertex_state_dirty = 8, vertex_state_dirty = 0x8, // Vertex state changed (scale_offset, clip planes, etc)
transform_constants_dirty = 16, transform_constants_dirty = 0x10, // Transform constants changed
framebuffer_reads_dirty = 32, fragment_constants_dirty = 0x20, // Fragment constants changed
framebuffer_reads_dirty = 0x40, // Framebuffer contents changed
fragment_texture_state_dirty = 0x80, // Fragment texture parameters changed
vertex_texture_state_dirty = 0x80, // Fragment texture parameters changed
invalidate_pipeline_bits = fragment_program_dirty | vertex_program_dirty, invalidate_pipeline_bits = fragment_program_dirty | vertex_program_dirty,
memory_barrier_bits = framebuffer_reads_dirty, memory_barrier_bits = framebuffer_reads_dirty,
all_dirty = 255 all_dirty = -1u
}; };
enum FIFO_state : u8 enum FIFO_state : u8
@ -641,6 +644,11 @@ namespace rsx
*/ */
void fill_fragment_state_buffer(void *buffer, const RSXFragmentProgram &fragment_program); void fill_fragment_state_buffer(void *buffer, const RSXFragmentProgram &fragment_program);
/**
* Fill buffer with fragment texture parameter constants (texture matrix)
*/
void fill_fragment_texture_parameters(void *buffer, const RSXFragmentProgram &fragment_program);
/** /**
* Write inlined array data to buffer. * Write inlined array data to buffer.
* The storage of inlined data looks different from memory stored arrays. * The storage of inlined data looks different from memory stored arrays.

View File

@ -1,4 +1,4 @@
#include "stdafx.h" #include "stdafx.h"
#include "Emu/Memory/vm.h" #include "Emu/Memory/vm.h"
#include "Emu/System.h" #include "Emu/System.h"
#include "VKFragmentProgram.h" #include "VKFragmentProgram.h"
@ -144,9 +144,7 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS)
} }
} }
OS << "layout(std140, set = 0, binding = 2) uniform FragmentConstantsBuffer\n"; std::string constants_block;
OS << "{\n";
for (const ParamType& PT : m_parr.params[PF_PARAM_UNIFORM]) for (const ParamType& PT : m_parr.params[PF_PARAM_UNIFORM])
{ {
if (PT.type == "sampler1D" || if (PT.type == "sampler1D" ||
@ -156,9 +154,21 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS)
continue; continue;
for (const ParamItem& PI : PT.items) for (const ParamItem& PI : PT.items)
OS << " " << PT.type << " " << PI.name << ";\n"; {
constants_block += " " + PT.type + " " + PI.name + ";\n";
}
} }
if (!constants_block.empty())
{
OS << "layout(std140, set = 0, binding = 3) uniform FragmentConstantsBuffer\n";
OS << "{\n";
OS << constants_block;
OS << "};\n\n";
}
OS << "layout(std140, set = 0, binding = 4) uniform FragmentStateBuffer\n";
OS << "{\n";
OS << " float fog_param0;\n"; OS << " float fog_param0;\n";
OS << " float fog_param1;\n"; OS << " float fog_param1;\n";
OS << " uint rop_control;\n"; OS << " uint rop_control;\n";
@ -167,15 +177,26 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS)
OS << " uint fog_mode;\n"; OS << " uint fog_mode;\n";
OS << " float wpos_scale;\n"; OS << " float wpos_scale;\n";
OS << " float wpos_bias;\n"; OS << " float wpos_bias;\n";
OS << "};\n\n";
OS << "layout(std140, set = 0, binding = 5) uniform TextureParametersBuffer\n";
OS << "{\n";
OS << " vec4 texture_parameters[16];\n"; OS << " vec4 texture_parameters[16];\n";
OS << "};\n"; OS << "};\n\n";
vk::glsl::program_input in; vk::glsl::program_input in;
in.location = FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT; in.location = FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT;
in.domain = glsl::glsl_fragment_program; in.domain = glsl::glsl_fragment_program;
in.name = "FragmentConstantsBuffer"; in.name = "FragmentConstantsBuffer";
in.type = vk::glsl::input_type_uniform_buffer; in.type = vk::glsl::input_type_uniform_buffer;
inputs.push_back(in);
in.location = FRAGMENT_STATE_BIND_SLOT;
in.name = "FragmentStateBuffer";
inputs.push_back(in);
in.location = FRAGMENT_TEXTURE_PARAMS_BIND_SLOT;
in.name = "TextureParametersBuffer";
inputs.push_back(in); inputs.push_back(in);
} }

View File

@ -433,11 +433,12 @@ namespace
std::tuple<VkPipelineLayout, VkDescriptorSetLayout> get_shared_pipeline_layout(VkDevice dev) std::tuple<VkPipelineLayout, VkDescriptorSetLayout> get_shared_pipeline_layout(VkDevice dev)
{ {
std::array<VkDescriptorSetLayoutBinding, 39> bindings = {}; std::array<VkDescriptorSetLayoutBinding, VK_NUM_DESCRIPTOR_BINDINGS> bindings = {};
size_t idx = 0; size_t idx = 0;
// Vertex buffer
for (int i = 0; i < 16; i++) // Vertex stream, one stream for cacheable data, one stream for transient data
for (int i = 0; i < 2; i++)
{ {
bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
bindings[idx].descriptorCount = 1; bindings[idx].descriptorCount = 1;
@ -453,6 +454,20 @@ namespace
idx++; idx++;
bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
bindings[idx].descriptorCount = 1;
bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
bindings[idx].binding = FRAGMENT_STATE_BIND_SLOT;
idx++;
bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
bindings[idx].descriptorCount = 1;
bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
bindings[idx].binding = FRAGMENT_TEXTURE_PARAMS_BIND_SLOT;
idx++;
bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
bindings[idx].descriptorCount = 1; bindings[idx].descriptorCount = 1;
bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
@ -460,7 +475,21 @@ namespace
idx++; idx++;
for (int i = 0; i < 16; i++) bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
bindings[idx].descriptorCount = 1;
bindings[idx].stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS;
bindings[idx].binding = VERTEX_LAYOUT_BIND_SLOT;
idx++;
bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
bindings[idx].descriptorCount = 1;
bindings[idx].stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS;
bindings[idx].binding = VERTEX_PARAMS_BIND_SLOT;
idx++;
for (int i = 0; i < rsx::limits::fragment_textures_count; i++)
{ {
bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
bindings[idx].descriptorCount = 1; bindings[idx].descriptorCount = 1;
@ -469,7 +498,7 @@ namespace
idx++; idx++;
} }
for (int i = 0; i < 4; i++) for (int i = 0; i < rsx::limits::vertex_textures_count; i++)
{ {
bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
bindings[idx].descriptorCount = 1; bindings[idx].descriptorCount = 1;
@ -478,10 +507,7 @@ namespace
idx++; idx++;
} }
bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; verify(HERE), idx == VK_NUM_DESCRIPTOR_BINDINGS;
bindings[idx].descriptorCount = 1;
bindings[idx].stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS;
bindings[idx].binding = SCALE_OFFSET_BIND_SLOT;
VkDescriptorSetLayoutCreateInfo infos = {}; VkDescriptorSetLayoutCreateInfo infos = {};
infos.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; infos.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
@ -619,7 +645,11 @@ VKGSRender::VKGSRender() : GSRender()
//VRAM allocation //VRAM allocation
m_attrib_ring_info.create(VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000, "attrib buffer", 0x400000); m_attrib_ring_info.create(VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000, "attrib buffer", 0x400000);
m_uniform_buffer_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "uniform buffer"); m_fragment_env_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "fragment env buffer");
m_vertex_env_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "vertex env buffer");
m_fragment_texture_params_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "fragment texture params buffer");
m_vertex_layout_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "vertex layout buffer");
m_fragment_constants_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "fragment constants buffer");
m_transform_constants_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_TRANSFORM_CONSTANTS_BUFFER_SIZE_M * 0x100000, "transform constants buffer"); m_transform_constants_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_TRANSFORM_CONSTANTS_BUFFER_SIZE_M * 0x100000, "transform constants buffer");
m_index_buffer_ring_info.create(VK_BUFFER_USAGE_INDEX_BUFFER_BIT, VK_INDEX_RING_BUFFER_SIZE_M * 0x100000, "index buffer"); m_index_buffer_ring_info.create(VK_BUFFER_USAGE_INDEX_BUFFER_BIT, VK_INDEX_RING_BUFFER_SIZE_M * 0x100000, "index buffer");
m_texture_upload_buffer_ring_info.create(VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000, "texture upload buffer", 32 * 0x100000); m_texture_upload_buffer_ring_info.create(VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000, "texture upload buffer", 32 * 0x100000);
@ -719,10 +749,14 @@ VKGSRender::~VKGSRender()
vk::destroy_global_resources(); vk::destroy_global_resources();
//Heaps //Heaps
m_index_buffer_ring_info.destroy();
m_uniform_buffer_ring_info.destroy();
m_transform_constants_ring_info.destroy();
m_attrib_ring_info.destroy(); m_attrib_ring_info.destroy();
m_fragment_env_ring_info.destroy();
m_vertex_env_ring_info.destroy();
m_fragment_texture_params_ring_info.destroy();
m_vertex_layout_ring_info.destroy();
m_fragment_constants_ring_info.destroy();
m_transform_constants_ring_info.destroy();
m_index_buffer_ring_info.destroy();
m_texture_upload_buffer_ring_info.destroy(); m_texture_upload_buffer_ring_info.destroy();
//Fallback bindables //Fallback bindables
@ -938,7 +972,11 @@ void VKGSRender::check_heap_status()
{ {
if (m_attrib_ring_info.is_critical() || if (m_attrib_ring_info.is_critical() ||
m_texture_upload_buffer_ring_info.is_critical() || m_texture_upload_buffer_ring_info.is_critical() ||
m_uniform_buffer_ring_info.is_critical() || m_fragment_env_ring_info.is_critical() ||
m_vertex_env_ring_info.is_critical() ||
m_fragment_texture_params_ring_info.is_critical() ||
m_vertex_layout_ring_info.is_critical() ||
m_fragment_constants_ring_info.is_critical() ||
m_transform_constants_ring_info.is_critical() || m_transform_constants_ring_info.is_critical() ||
m_index_buffer_ring_info.is_critical()) m_index_buffer_ring_info.is_critical())
{ {
@ -963,7 +1001,11 @@ void VKGSRender::check_heap_status()
m_vertex_cache->purge(); m_vertex_cache->purge();
m_index_buffer_ring_info.reset_allocation_stats(); m_index_buffer_ring_info.reset_allocation_stats();
m_uniform_buffer_ring_info.reset_allocation_stats(); m_fragment_env_ring_info.reset_allocation_stats();
m_vertex_env_ring_info.reset_allocation_stats();
m_fragment_texture_params_ring_info.reset_allocation_stats();
m_vertex_layout_ring_info.reset_allocation_stats();
m_fragment_constants_ring_info.reset_allocation_stats();
m_transform_constants_ring_info.reset_allocation_stats(); m_transform_constants_ring_info.reset_allocation_stats();
m_attrib_ring_info.reset_allocation_stats(); m_attrib_ring_info.reset_allocation_stats();
m_texture_upload_buffer_ring_info.reset_allocation_stats(); m_texture_upload_buffer_ring_info.reset_allocation_stats();
@ -1161,15 +1203,10 @@ void VKGSRender::emit_geometry(u32 sub_index)
if (sub_index == 0) if (sub_index == 0)
{ {
// Load program execution environment
load_program_env(upload_info);
update_descriptors = true; update_descriptors = true;
} }
else else
{ {
// Update vertex fetch environment
update_vertex_env(upload_info);
if (persistent_buffer != old_persistent_buffer || volatile_buffer != old_volatile_buffer) if (persistent_buffer != old_persistent_buffer || volatile_buffer != old_volatile_buffer)
{ {
/* VkDescriptorSetAllocateInfo alloc_info = {}; /* VkDescriptorSetAllocateInfo alloc_info = {};
@ -1192,10 +1229,13 @@ void VKGSRender::emit_geometry(u32 sub_index)
} }
} }
// Update vertex fetch parameters
update_vertex_env(upload_info);
if (update_descriptors) if (update_descriptors)
{ {
m_program->bind_uniform(persistent_buffer, "persistent_input_stream", m_current_frame->descriptor_set); m_program->bind_uniform(persistent_buffer, vk::glsl::program_input_type::input_type_texel_buffer, "persistent_input_stream", m_current_frame->descriptor_set);
m_program->bind_uniform(volatile_buffer, "volatile_input_stream", m_current_frame->descriptor_set); m_program->bind_uniform(volatile_buffer, vk::glsl::program_input_type::input_type_texel_buffer, "volatile_input_stream", m_current_frame->descriptor_set);
vkCmdBindDescriptorSets(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, 1, &m_current_frame->descriptor_set, 0, nullptr); vkCmdBindDescriptorSets(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, 1, &m_current_frame->descriptor_set, 0, nullptr);
} }
@ -1203,8 +1243,6 @@ void VKGSRender::emit_geometry(u32 sub_index)
//std::chrono::time_point<steady_clock> draw_start = steady_clock::now(); //std::chrono::time_point<steady_clock> draw_start = steady_clock::now();
//m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(draw_start - vertex_end).count(); //m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(draw_start - vertex_end).count();
begin_render_pass();
if (!upload_info.index_info) if (!upload_info.index_info)
{ {
if (draw_call.is_single_draw()) if (draw_call.is_single_draw())
@ -1247,8 +1285,6 @@ void VKGSRender::emit_geometry(u32 sub_index)
} }
} }
close_render_pass();
//std::chrono::time_point<steady_clock> draw_end = steady_clock::now(); //std::chrono::time_point<steady_clock> draw_end = steady_clock::now();
//m_draw_time += std::chrono::duration_cast<std::chrono::microseconds>(draw_end - draw_start).count(); //m_draw_time += std::chrono::duration_cast<std::chrono::microseconds>(draw_end - draw_start).count();
} }
@ -1503,6 +1539,9 @@ void VKGSRender::end()
return; return;
} }
// Load program execution environment
load_program_env();
std::chrono::time_point<steady_clock> program_end = steady_clock::now(); std::chrono::time_point<steady_clock> program_end = steady_clock::now();
m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(program_end - program_start).count(); m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(program_end - program_start).count();
@ -1608,6 +1647,7 @@ void VKGSRender::end()
vkCmdBindPipeline(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_program->pipeline); vkCmdBindPipeline(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_program->pipeline);
update_draw_state(); update_draw_state();
begin_render_pass();
u32 sub_index = 0; u32 sub_index = 0;
rsx::method_registers.current_draw_clause.begin(); rsx::method_registers.current_draw_clause.begin();
@ -1617,6 +1657,7 @@ void VKGSRender::end()
} }
while (rsx::method_registers.current_draw_clause.next()); while (rsx::method_registers.current_draw_clause.next());
close_render_pass();
vk::leave_uninterruptible(); vk::leave_uninterruptible();
if (m_occlusion_query_active && (occlusion_id != UINT32_MAX)) if (m_occlusion_query_active && (occlusion_id != UINT32_MAX))
@ -2022,7 +2063,11 @@ void VKGSRender::advance_queued_frames()
m_vertex_cache->purge(); m_vertex_cache->purge();
m_current_frame->tag_frame_end(m_attrib_ring_info.get_current_put_pos_minus_one(), m_current_frame->tag_frame_end(m_attrib_ring_info.get_current_put_pos_minus_one(),
m_uniform_buffer_ring_info.get_current_put_pos_minus_one(), m_vertex_env_ring_info.get_current_put_pos_minus_one(),
m_fragment_env_ring_info.get_current_put_pos_minus_one(),
m_vertex_layout_ring_info.get_current_put_pos_minus_one(),
m_fragment_texture_params_ring_info.get_current_put_pos_minus_one(),
m_fragment_constants_ring_info.get_current_put_pos_minus_one(),
m_transform_constants_ring_info.get_current_put_pos_minus_one(), m_transform_constants_ring_info.get_current_put_pos_minus_one(),
m_index_buffer_ring_info.get_current_put_pos_minus_one(), m_index_buffer_ring_info.get_current_put_pos_minus_one(),
m_texture_upload_buffer_ring_info.get_current_put_pos_minus_one()); m_texture_upload_buffer_ring_info.get_current_put_pos_minus_one());
@ -2148,14 +2193,22 @@ void VKGSRender::process_swap_request(frame_context_t *ctx, bool free_resources)
//Heap cleanup; deallocates memory consumed by the frame if it is still held //Heap cleanup; deallocates memory consumed by the frame if it is still held
m_attrib_ring_info.m_get_pos = ctx->attrib_heap_ptr; m_attrib_ring_info.m_get_pos = ctx->attrib_heap_ptr;
m_uniform_buffer_ring_info.m_get_pos = ctx->ubo_heap_ptr; m_vertex_env_ring_info.m_get_pos = ctx->vtx_env_heap_ptr;
m_transform_constants_ring_info.m_get_pos = ctx->vtxconst_heap_ptr; m_fragment_env_ring_info.m_get_pos = ctx->frag_env_heap_ptr;
m_fragment_constants_ring_info.m_get_pos = ctx->frag_const_heap_ptr;
m_transform_constants_ring_info.m_get_pos = ctx->vtx_const_heap_ptr;
m_vertex_layout_ring_info.m_get_pos = ctx->vtx_layout_heap_ptr;
m_fragment_texture_params_ring_info.m_get_pos = ctx->frag_texparam_heap_ptr;
m_index_buffer_ring_info.m_get_pos = ctx->index_heap_ptr; m_index_buffer_ring_info.m_get_pos = ctx->index_heap_ptr;
m_texture_upload_buffer_ring_info.m_get_pos = ctx->texture_upload_heap_ptr; m_texture_upload_buffer_ring_info.m_get_pos = ctx->texture_upload_heap_ptr;
m_attrib_ring_info.notify(); m_attrib_ring_info.notify();
m_uniform_buffer_ring_info.notify(); m_vertex_env_ring_info.notify();
m_fragment_env_ring_info.notify();
m_fragment_constants_ring_info.notify();
m_transform_constants_ring_info.notify(); m_transform_constants_ring_info.notify();
m_vertex_layout_ring_info.notify();
m_fragment_texture_params_ring_info.notify();
m_index_buffer_ring_info.notify(); m_index_buffer_ring_info.notify();
m_texture_upload_buffer_ring_info.notify(); m_texture_upload_buffer_ring_info.notify();
} }
@ -2482,107 +2535,119 @@ bool VKGSRender::load_program()
return m_program != nullptr; return m_program != nullptr;
} }
void VKGSRender::load_program_env(const vk::vertex_upload_info& vertex_info) void VKGSRender::load_program_env()
{ {
if (!m_program) if (!m_program)
{ {
fmt::throw_exception("Unreachable right now" HERE); fmt::throw_exception("Unreachable right now" HERE);
} }
if (1)//m_graphics_state & (rsx::pipeline_state::fragment_state_dirty | rsx::pipeline_state::vertex_state_dirty)) const u32 fragment_constants_size = current_fp_metadata.program_constants_buffer_length;
const bool update_transform_constants = !!(m_graphics_state & rsx::pipeline_state::transform_constants_dirty);
const bool update_fragment_constants = !!(m_graphics_state & rsx::pipeline_state::fragment_constants_dirty);
const bool update_vertex_env = !!(m_graphics_state & rsx::pipeline_state::vertex_state_dirty);
const bool update_fragment_env = !!(m_graphics_state & rsx::pipeline_state::fragment_state_dirty);
const bool update_fragment_texture_env = !!(m_graphics_state & rsx::pipeline_state::fragment_texture_state_dirty);
if (update_vertex_env)
{ {
const size_t fragment_constants_sz = current_fp_metadata.program_constants_buffer_length; // Vertex state
const size_t fragment_buffer_sz = fragment_constants_sz + (18 * 4 * sizeof(float)); const auto mem = m_vertex_env_ring_info.alloc<256>(256);
const size_t required_mem = 512 + fragment_buffer_sz; auto buf = (u8*)m_vertex_env_ring_info.map(mem, 160);
const size_t vertex_state_offset = m_uniform_buffer_ring_info.alloc<256>(required_mem);
const size_t fragment_constants_offset = vertex_state_offset + 512;
//We do this in one go
u8 *buf = (u8*)m_uniform_buffer_ring_info.map(vertex_state_offset, required_mem);
//Vertex state
fill_scale_offset_data(buf, false); fill_scale_offset_data(buf, false);
fill_user_clip_data(buf + 64); fill_user_clip_data(buf + 64);
*(reinterpret_cast<u32*>(buf + 128)) = rsx::method_registers.transform_branch_bits(); *(reinterpret_cast<u32*>(buf + 128)) = rsx::method_registers.transform_branch_bits();
*(reinterpret_cast<u32*>(buf + 132)) = vertex_info.vertex_index_base; *(reinterpret_cast<u32*>(buf + 132)) = 0; // Reserved
*(reinterpret_cast<f32*>(buf + 136)) = rsx::method_registers.point_size(); *(reinterpret_cast<f32*>(buf + 136)) = rsx::method_registers.point_size();
*(reinterpret_cast<f32*>(buf + 140)) = rsx::method_registers.clip_min(); *(reinterpret_cast<f32*>(buf + 140)) = rsx::method_registers.clip_min();
*(reinterpret_cast<f32*>(buf + 144)) = rsx::method_registers.clip_max(); *(reinterpret_cast<f32*>(buf + 144)) = rsx::method_registers.clip_max();
fill_vertex_layout_state(m_vertex_layout, vertex_info.allocated_vertex_count, reinterpret_cast<s32*>(buf + 160), m_vertex_env_ring_info.unmap();
vertex_info.persistent_window_offset, vertex_info.volatile_window_offset); m_vertex_env_buffer_info = { m_vertex_env_ring_info.heap->value, mem, 160 };
//Fragment constants
buf = buf + 512;
if (fragment_constants_sz)
{
m_prog_buffer->fill_fragment_constants_buffer({ reinterpret_cast<float*>(buf), ::narrow<int>(fragment_constants_sz) },
current_fragment_program, vk::sanitize_fp_values());
} }
fill_fragment_state_buffer(buf + fragment_constants_sz, current_fragment_program); if (update_transform_constants)
m_uniform_buffer_ring_info.unmap();
m_vertex_state_buffer_info = { m_uniform_buffer_ring_info.heap->value, vertex_state_offset, 512 };
m_fragment_state_buffer_info = { m_uniform_buffer_ring_info.heap->value, fragment_constants_offset, fragment_buffer_sz };
}
if (m_graphics_state & rsx::pipeline_state::transform_constants_dirty)
{ {
//Vertex constants // Transform constants
const size_t vertex_constants_offset = m_transform_constants_ring_info.alloc<256>(8192); auto mem = m_transform_constants_ring_info.alloc<256>(8192);
auto buf = m_transform_constants_ring_info.map(vertex_constants_offset, 8192); auto buf = m_transform_constants_ring_info.map(mem, 8192);
fill_vertex_program_constants_data(buf); fill_vertex_program_constants_data(buf);
m_transform_constants_ring_info.unmap(); m_transform_constants_ring_info.unmap();
m_vertex_constants_buffer_info = { m_transform_constants_ring_info.heap->value, vertex_constants_offset, 8192 }; m_vertex_constants_buffer_info = { m_transform_constants_ring_info.heap->value, mem, 8192 };
} }
if (1)//m_graphics_state || old_program != m_program) if (update_fragment_constants)
{ {
m_program->bind_uniform(m_vertex_state_buffer_info, SCALE_OFFSET_BIND_SLOT, m_current_frame->descriptor_set); // Fragment constants
if (fragment_constants_size)
{
auto mem = m_fragment_constants_ring_info.alloc<256>(fragment_constants_size);
auto buf = m_fragment_constants_ring_info.map(mem, fragment_constants_size);
m_prog_buffer->fill_fragment_constants_buffer({ reinterpret_cast<float*>(buf), ::narrow<int>(fragment_constants_size) },
current_fragment_program, vk::sanitize_fp_values());
m_fragment_constants_ring_info.unmap();
m_fragment_constants_buffer_info = { m_fragment_constants_ring_info.heap->value, mem, fragment_constants_size };
}
else
{
m_fragment_constants_buffer_info = { m_fragment_constants_ring_info.heap->value, 0, VK_WHOLE_SIZE };
}
}
if (update_fragment_env)
{
auto mem = m_fragment_env_ring_info.alloc<256>(256);
auto buf = m_fragment_env_ring_info.map(mem, 32);
fill_fragment_state_buffer(buf, current_fragment_program);
m_fragment_env_ring_info.unmap();
m_fragment_env_buffer_info = { m_fragment_env_ring_info.heap->value, mem, 32 };
}
if (update_fragment_texture_env)
{
auto mem = m_fragment_texture_params_ring_info.alloc<256>(256);
auto buf = m_fragment_texture_params_ring_info.map(mem, 256);
fill_fragment_texture_parameters(buf, current_fragment_program);
m_fragment_texture_params_ring_info.unmap();
m_fragment_texture_params_buffer_info = { m_fragment_texture_params_ring_info.heap->value, mem, 256 };
}
//if (1)
{
m_program->bind_uniform(m_vertex_env_buffer_info, VERTEX_PARAMS_BIND_SLOT, m_current_frame->descriptor_set);
m_program->bind_uniform(m_vertex_constants_buffer_info, VERTEX_CONSTANT_BUFFERS_BIND_SLOT, m_current_frame->descriptor_set); m_program->bind_uniform(m_vertex_constants_buffer_info, VERTEX_CONSTANT_BUFFERS_BIND_SLOT, m_current_frame->descriptor_set);
m_program->bind_uniform(m_fragment_state_buffer_info, FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT, m_current_frame->descriptor_set); m_program->bind_uniform(m_fragment_constants_buffer_info, FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT, m_current_frame->descriptor_set);
m_program->bind_uniform(m_fragment_env_buffer_info, FRAGMENT_STATE_BIND_SLOT, m_current_frame->descriptor_set);
m_program->bind_uniform(m_fragment_texture_params_buffer_info, FRAGMENT_TEXTURE_PARAMS_BIND_SLOT, m_current_frame->descriptor_set);
} }
//Clear flags //Clear flags
const u32 handled_flags = (rsx::pipeline_state::fragment_state_dirty | rsx::pipeline_state::vertex_state_dirty | rsx::pipeline_state::transform_constants_dirty); const u32 handled_flags = (rsx::pipeline_state::fragment_state_dirty | rsx::pipeline_state::vertex_state_dirty | rsx::pipeline_state::transform_constants_dirty | rsx::pipeline_state::fragment_constants_dirty | rsx::pipeline_state::fragment_texture_state_dirty);
m_graphics_state &= ~handled_flags; m_graphics_state &= ~handled_flags;
} }
void VKGSRender::update_vertex_env(const vk::vertex_upload_info& vertex_info) void VKGSRender::update_vertex_env(const vk::vertex_upload_info& vertex_info)
{ {
// Vertex base index = vertex_offset + 132 auto mem = m_vertex_layout_ring_info.alloc<256>(256);
// Vertex layout = vertex_offset + 160 auto buf = (u32*)m_vertex_layout_ring_info.map(mem, 128 + 16);
std::array<s32, 16 * 4> vertex_layout; *buf = vertex_info.vertex_index_base;
fill_vertex_layout_state(m_vertex_layout, vertex_info.allocated_vertex_count, vertex_layout.data(), buf += 4;
fill_vertex_layout_state(m_vertex_layout, vertex_info.allocated_vertex_count, (s32*)buf,
vertex_info.persistent_window_offset, vertex_info.volatile_window_offset); vertex_info.persistent_window_offset, vertex_info.volatile_window_offset);
vk::insert_buffer_memory_barrier(*m_current_command_buffer, m_uniform_buffer_ring_info.heap->value, m_vertex_state_buffer_info.offset, 512, m_vertex_layout_ring_info.unmap();
VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_TRANSFER_WRITE_BIT); m_vertex_layout_buffer_info = { m_vertex_layout_ring_info.heap->value, mem, 128 + 16 };
vkCmdUpdateBuffer(*m_current_command_buffer, m_uniform_buffer_ring_info.heap->value, m_vertex_state_buffer_info.offset + 132, 4, &vertex_info.vertex_index_base); m_program->bind_uniform(m_vertex_layout_buffer_info, VERTEX_LAYOUT_BIND_SLOT, m_current_frame->descriptor_set);
u32 write_offset = m_vertex_state_buffer_info.offset + 160;
s32 *src_ptr = vertex_layout.data();
for (const auto& placement : m_vertex_layout.attribute_placement)
{
constexpr u32 data_len = 4 * sizeof(s32);
if (placement != rsx::attribute_buffer_placement::none)
{
vkCmdUpdateBuffer(*m_current_command_buffer, m_uniform_buffer_ring_info.heap->value, write_offset, data_len, src_ptr);
}
write_offset += data_len;
src_ptr += 4;
}
vk::insert_buffer_memory_barrier(*m_current_command_buffer, m_uniform_buffer_ring_info.heap->value, m_vertex_state_buffer_info.offset, 512,
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_UNIFORM_READ_BIT);
} }
void VKGSRender::init_buffers(rsx::framebuffer_creation_context context, bool skip_reading) void VKGSRender::init_buffers(rsx::framebuffer_creation_context context, bool skip_reading)
@ -2633,7 +2698,11 @@ void VKGSRender::write_buffers()
void VKGSRender::close_and_submit_command_buffer(const std::vector<VkSemaphore> &semaphores, VkFence fence, VkPipelineStageFlags pipeline_stage_flags) void VKGSRender::close_and_submit_command_buffer(const std::vector<VkSemaphore> &semaphores, VkFence fence, VkPipelineStageFlags pipeline_stage_flags)
{ {
if (m_attrib_ring_info.dirty() || if (m_attrib_ring_info.dirty() ||
m_uniform_buffer_ring_info.dirty() || m_fragment_env_ring_info.dirty() ||
m_vertex_env_ring_info.dirty() ||
m_fragment_texture_params_ring_info.dirty() ||
m_vertex_layout_ring_info.dirty() ||
m_fragment_constants_ring_info.dirty() ||
m_index_buffer_ring_info.dirty() || m_index_buffer_ring_info.dirty() ||
m_transform_constants_ring_info.dirty() || m_transform_constants_ring_info.dirty() ||
m_texture_upload_buffer_ring_info.dirty()) m_texture_upload_buffer_ring_info.dirty())
@ -2642,7 +2711,11 @@ void VKGSRender::close_and_submit_command_buffer(const std::vector<VkSemaphore>
m_secondary_command_buffer.begin(); m_secondary_command_buffer.begin();
m_attrib_ring_info.sync(m_secondary_command_buffer); m_attrib_ring_info.sync(m_secondary_command_buffer);
m_uniform_buffer_ring_info.sync(m_secondary_command_buffer); m_fragment_env_ring_info.sync(m_secondary_command_buffer);
m_vertex_env_ring_info.sync(m_secondary_command_buffer);
m_fragment_texture_params_ring_info.sync(m_secondary_command_buffer);
m_vertex_layout_ring_info.sync(m_secondary_command_buffer);
m_fragment_constants_ring_info.sync(m_secondary_command_buffer);
m_index_buffer_ring_info.sync(m_secondary_command_buffer); m_index_buffer_ring_info.sync(m_secondary_command_buffer);
m_transform_constants_ring_info.sync(m_secondary_command_buffer); m_transform_constants_ring_info.sync(m_secondary_command_buffer);
m_texture_upload_buffer_ring_info.sync(m_secondary_command_buffer); m_texture_upload_buffer_ring_info.sync(m_secondary_command_buffer);

View File

@ -36,8 +36,9 @@ namespace vk
//NOTE: Texture uploads can be huge, up to 16MB for a single texture (4096x4096px) //NOTE: Texture uploads can be huge, up to 16MB for a single texture (4096x4096px)
#define VK_ATTRIB_RING_BUFFER_SIZE_M 384 #define VK_ATTRIB_RING_BUFFER_SIZE_M 384
#define VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M 256 #define VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M 256
#define VK_UBO_RING_BUFFER_SIZE_M 64 #define VK_UBO_RING_BUFFER_SIZE_M 16
#define VK_TRANSFORM_CONSTANTS_BUFFER_SIZE_M 64 #define VK_TRANSFORM_CONSTANTS_BUFFER_SIZE_M 64
#define VK_FRAGMENT_CONSTANTS_BUFFER_SIZE_M 64
#define VK_INDEX_RING_BUFFER_SIZE_M 64 #define VK_INDEX_RING_BUFFER_SIZE_M 64
#define VK_MAX_ASYNC_CB_COUNT 64 #define VK_MAX_ASYNC_CB_COUNT 64
@ -161,8 +162,12 @@ struct frame_context_t
//Heap pointers //Heap pointers
s64 attrib_heap_ptr = 0; s64 attrib_heap_ptr = 0;
s64 ubo_heap_ptr = 0; s64 vtx_env_heap_ptr = 0;
s64 vtxconst_heap_ptr = 0; s64 frag_env_heap_ptr = 0;
s64 frag_const_heap_ptr = 0;
s64 vtx_const_heap_ptr = 0;
s64 vtx_layout_heap_ptr = 0;
s64 frag_texparam_heap_ptr = 0;
s64 index_heap_ptr = 0; s64 index_heap_ptr = 0;
s64 texture_upload_heap_ptr = 0; s64 texture_upload_heap_ptr = 0;
@ -177,9 +182,13 @@ struct frame_context_t
used_descriptors = other.used_descriptors; used_descriptors = other.used_descriptors;
attrib_heap_ptr = other.attrib_heap_ptr; attrib_heap_ptr = other.attrib_heap_ptr;
ubo_heap_ptr = other.attrib_heap_ptr; vtx_env_heap_ptr = other.vtx_env_heap_ptr;
vtxconst_heap_ptr = other.vtxconst_heap_ptr; frag_env_heap_ptr = other.frag_env_heap_ptr;
index_heap_ptr = other.attrib_heap_ptr; vtx_layout_heap_ptr = other.vtx_layout_heap_ptr;
frag_texparam_heap_ptr = other.frag_texparam_heap_ptr;
frag_const_heap_ptr = other.frag_const_heap_ptr;
vtx_const_heap_ptr = other.vtx_const_heap_ptr;
index_heap_ptr = other.index_heap_ptr;
texture_upload_heap_ptr = other.texture_upload_heap_ptr; texture_upload_heap_ptr = other.texture_upload_heap_ptr;
} }
@ -190,11 +199,15 @@ struct frame_context_t
std::swap(samplers_to_clean, other.samplers_to_clean); std::swap(samplers_to_clean, other.samplers_to_clean);
} }
void tag_frame_end(s64 attrib_loc, s64 ubo_loc, s64 vtxconst_loc, s64 index_loc, s64 texture_loc) void tag_frame_end(s64 attrib_loc, s64 vtxenv_loc, s64 fragenv_loc, s64 vtxlayout_loc, s64 fragtex_loc, s64 fragconst_loc,s64 vtxconst_loc, s64 index_loc, s64 texture_loc)
{ {
attrib_heap_ptr = attrib_loc; attrib_heap_ptr = attrib_loc;
ubo_heap_ptr = ubo_loc; vtx_env_heap_ptr = vtxenv_loc;
vtxconst_heap_ptr = vtxconst_loc; frag_env_heap_ptr = fragenv_loc;
vtx_layout_heap_ptr = vtxlayout_loc;
frag_texparam_heap_ptr = fragtex_loc;
frag_const_heap_ptr = fragconst_loc;
vtx_const_heap_ptr = vtxconst_loc;
index_heap_ptr = index_loc; index_heap_ptr = index_loc;
texture_upload_heap_ptr = texture_loc; texture_upload_heap_ptr = texture_loc;
@ -326,15 +339,22 @@ private:
u64 m_last_heap_sync_time = 0; u64 m_last_heap_sync_time = 0;
u32 m_texbuffer_view_size = 0; u32 m_texbuffer_view_size = 0;
vk::vk_data_heap m_attrib_ring_info; vk::data_heap m_attrib_ring_info; // Vertex data
vk::vk_data_heap m_uniform_buffer_ring_info; vk::data_heap m_fragment_constants_ring_info; // Fragment program constants
vk::vk_data_heap m_transform_constants_ring_info; vk::data_heap m_transform_constants_ring_info; // Transform program constants
vk::vk_data_heap m_index_buffer_ring_info; vk::data_heap m_fragment_env_ring_info; // Fragment environment params
vk::vk_data_heap m_texture_upload_buffer_ring_info; vk::data_heap m_vertex_env_ring_info; // Vertex environment params
vk::data_heap m_fragment_texture_params_ring_info; // Fragment texture params
vk::data_heap m_vertex_layout_ring_info; // Vertex layout structure
vk::data_heap m_index_buffer_ring_info; // Index data
vk::data_heap m_texture_upload_buffer_ring_info; // Texture upload heap
VkDescriptorBufferInfo m_vertex_state_buffer_info; VkDescriptorBufferInfo m_vertex_env_buffer_info;
VkDescriptorBufferInfo m_fragment_env_buffer_info;
VkDescriptorBufferInfo m_vertex_constants_buffer_info; VkDescriptorBufferInfo m_vertex_constants_buffer_info;
VkDescriptorBufferInfo m_fragment_state_buffer_info; VkDescriptorBufferInfo m_fragment_constants_buffer_info;
VkDescriptorBufferInfo m_vertex_layout_buffer_info;
VkDescriptorBufferInfo m_fragment_texture_params_buffer_info;
std::array<frame_context_t, VK_MAX_ASYNC_FRAMES> frame_context_storage; std::array<frame_context_t, VK_MAX_ASYNC_FRAMES> frame_context_storage;
//Temp frame context to use if the real frame queue is overburdened. Only used for storage //Temp frame context to use if the real frame queue is overburdened. Only used for storage
@ -404,7 +424,7 @@ private:
vk::vertex_upload_info upload_vertex_data(); vk::vertex_upload_info upload_vertex_data();
bool load_program(); bool load_program();
void load_program_env(const vk::vertex_upload_info& upload_info); void load_program_env();
void update_vertex_env(const vk::vertex_upload_info& upload_info); void update_vertex_env(const vk::vertex_upload_info& upload_info);
public: public:

View File

@ -34,12 +34,17 @@
#define DESCRIPTOR_MAX_DRAW_CALLS 4096 #define DESCRIPTOR_MAX_DRAW_CALLS 4096
#define OCCLUSION_MAX_POOL_SIZE 8192 #define OCCLUSION_MAX_POOL_SIZE 8192
#define VERTEX_BUFFERS_FIRST_BIND_SLOT 3 #define VERTEX_PARAMS_BIND_SLOT 0
#define FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT 2 #define VERTEX_LAYOUT_BIND_SLOT 1
#define VERTEX_CONSTANT_BUFFERS_BIND_SLOT 1 #define VERTEX_CONSTANT_BUFFERS_BIND_SLOT 2
#define SCALE_OFFSET_BIND_SLOT 0 #define FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT 3
#define TEXTURES_FIRST_BIND_SLOT 19 #define FRAGMENT_STATE_BIND_SLOT 4
#define VERTEX_TEXTURES_FIRST_BIND_SLOT 35 //19+16 #define FRAGMENT_TEXTURE_PARAMS_BIND_SLOT 5
#define VERTEX_BUFFERS_FIRST_BIND_SLOT 6
#define TEXTURES_FIRST_BIND_SLOT 8
#define VERTEX_TEXTURES_FIRST_BIND_SLOT 24 //8+16
#define VK_NUM_DESCRIPTOR_BINDINGS (VERTEX_TEXTURES_FIRST_BIND_SLOT + 4)
namespace rsx namespace rsx
{ {
@ -80,7 +85,7 @@ namespace vk
class command_buffer; class command_buffer;
struct image; struct image;
struct buffer; struct buffer;
struct vk_data_heap; struct data_heap;
class mem_allocator_base; class mem_allocator_base;
struct memory_type_mapping; struct memory_type_mapping;
struct gpu_formats_support; struct gpu_formats_support;
@ -131,7 +136,7 @@ namespace vk
*/ */
void copy_mipmaped_image_using_buffer(VkCommandBuffer cmd, vk::image* dst_image, void copy_mipmaped_image_using_buffer(VkCommandBuffer cmd, vk::image* dst_image,
const std::vector<rsx_subresource_layout>& subresource_layout, int format, bool is_swizzled, u16 mipmap_count, const std::vector<rsx_subresource_layout>& subresource_layout, int format, bool is_swizzled, u16 mipmap_count,
VkImageAspectFlags flags, vk::vk_data_heap &upload_heap); VkImageAspectFlags flags, vk::data_heap &upload_heap);
//Other texture management helpers //Other texture management helpers
void change_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, VkImageSubresourceRange range); void change_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, VkImageSubresourceRange range);
@ -2723,12 +2728,14 @@ public:
namespace glsl namespace glsl
{ {
enum program_input_type enum program_input_type : u32
{ {
input_type_uniform_buffer = 0, input_type_uniform_buffer = 0,
input_type_texel_buffer = 1, input_type_texel_buffer = 1,
input_type_texture = 2, input_type_texture = 2,
input_type_storage_buffer = 3 input_type_storage_buffer = 3,
input_type_max_enum = 4
}; };
struct bound_sampler struct bound_sampler
@ -2834,8 +2841,9 @@ public:
class program class program
{ {
std::vector<program_input> uniforms; std::array<std::vector<program_input>, input_type_max_enum> uniforms;
VkDevice m_device; VkDevice m_device;
public: public:
VkPipeline pipeline; VkPipeline pipeline;
u64 attribute_location_mask; u64 attribute_location_mask;
@ -2848,10 +2856,10 @@ public:
program& load_uniforms(::glsl::program_domain domain, const std::vector<program_input>& inputs); program& load_uniforms(::glsl::program_domain domain, const std::vector<program_input>& inputs);
bool has_uniform(std::string uniform_name); bool has_uniform(program_input_type type, const std::string &uniform_name);
void bind_uniform(const VkDescriptorImageInfo &image_descriptor, std::string uniform_name, VkDescriptorSet &descriptor_set); void bind_uniform(const VkDescriptorImageInfo &image_descriptor, const std::string &uniform_name, VkDescriptorSet &descriptor_set);
void bind_uniform(const VkDescriptorBufferInfo &buffer_descriptor, uint32_t binding_point, VkDescriptorSet &descriptor_set); void bind_uniform(const VkDescriptorBufferInfo &buffer_descriptor, uint32_t binding_point, VkDescriptorSet &descriptor_set);
void bind_uniform(const VkBufferView &buffer_view, const std::string &binding_name, VkDescriptorSet &descriptor_set); void bind_uniform(const VkBufferView &buffer_view, program_input_type type, const std::string &binding_name, VkDescriptorSet &descriptor_set);
void bind_buffer(const VkDescriptorBufferInfo &buffer_descriptor, uint32_t binding_point, VkDescriptorType type, VkDescriptorSet &descriptor_set); void bind_buffer(const VkDescriptorBufferInfo &buffer_descriptor, uint32_t binding_point, VkDescriptorType type, VkDescriptorSet &descriptor_set);
@ -2859,7 +2867,7 @@ public:
}; };
} }
struct vk_data_heap : public data_heap struct data_heap : public ::data_heap
{ {
std::unique_ptr<buffer> heap; std::unique_ptr<buffer> heap;
bool mapped = false; bool mapped = false;
@ -2874,7 +2882,7 @@ public:
void create(VkBufferUsageFlags usage, size_t size, const char *name = "unnamed", size_t guard = 0x10000) void create(VkBufferUsageFlags usage, size_t size, const char *name = "unnamed", size_t guard = 0x10000)
{ {
data_heap::init(size, name, guard); ::data_heap::init(size, name, guard);
const auto device = get_current_renderer(); const auto device = get_current_renderer();
const auto memory_map = device->get_memory_mapping(); const auto memory_map = device->get_memory_mapping();

View File

@ -1,4 +1,4 @@
#pragma once #pragma once
#include "VKHelpers.h" #include "VKHelpers.h"
#include "VKVertexProgram.h" #include "VKVertexProgram.h"
#include "VKFragmentProgram.h" #include "VKFragmentProgram.h"
@ -26,8 +26,8 @@ namespace vk
std::unordered_map<VkRenderPass, std::unique_ptr<vk::glsl::program>> m_program_cache; std::unordered_map<VkRenderPass, std::unique_ptr<vk::glsl::program>> m_program_cache;
std::unique_ptr<vk::sampler> m_sampler; std::unique_ptr<vk::sampler> m_sampler;
std::unique_ptr<vk::framebuffer> m_draw_fbo; std::unique_ptr<vk::framebuffer> m_draw_fbo;
vk_data_heap m_vao; vk::data_heap m_vao;
vk_data_heap m_ubo; vk::data_heap m_ubo;
vk::render_device* m_device = nullptr; vk::render_device* m_device = nullptr;
std::string vs_src; std::string vs_src;
@ -574,7 +574,7 @@ namespace vk
} }
vk::image_view* upload_simple_texture(vk::render_device &dev, vk::command_buffer &cmd, vk::image_view* upload_simple_texture(vk::render_device &dev, vk::command_buffer &cmd,
vk::vk_data_heap& upload_heap, u64 key, int w, int h, bool font, bool temp, void *pixel_src, u32 owner_uid) vk::data_heap& upload_heap, u64 key, int w, int h, bool font, bool temp, void *pixel_src, u32 owner_uid)
{ {
const VkFormat format = (font) ? VK_FORMAT_R8_UNORM : VK_FORMAT_B8G8R8A8_UNORM; const VkFormat format = (font) ? VK_FORMAT_R8_UNORM : VK_FORMAT_B8G8R8A8_UNORM;
const u32 pitch = (font) ? w : w * 4; const u32 pitch = (font) ? w : w * 4;
@ -627,7 +627,7 @@ namespace vk
return result; return result;
} }
void create(vk::command_buffer &cmd, vk::vk_data_heap &upload_heap) void create(vk::command_buffer &cmd, vk::data_heap &upload_heap)
{ {
auto& dev = cmd.get_command_pool().get_owner(); auto& dev = cmd.get_command_pool().get_owner();
overlay_pass::create(dev); overlay_pass::create(dev);
@ -674,7 +674,7 @@ namespace vk
} }
} }
vk::image_view* find_font(rsx::overlays::font *font, vk::command_buffer &cmd, vk::vk_data_heap &upload_heap) vk::image_view* find_font(rsx::overlays::font *font, vk::command_buffer &cmd, vk::data_heap &upload_heap)
{ {
u64 key = (u64)font; u64 key = (u64)font;
auto found = view_cache.find(key); auto found = view_cache.find(key);
@ -686,7 +686,7 @@ namespace vk
true, false, font->glyph_data.data(), UINT32_MAX); true, false, font->glyph_data.data(), UINT32_MAX);
} }
vk::image_view* find_temp_image(rsx::overlays::image_info *desc, vk::command_buffer &cmd, vk::vk_data_heap &upload_heap, u32 owner_uid) vk::image_view* find_temp_image(rsx::overlays::image_info *desc, vk::command_buffer &cmd, vk::data_heap &upload_heap, u32 owner_uid)
{ {
u64 key = (u64)desc; u64 key = (u64)desc;
auto found = temp_view_cache.find(key); auto found = temp_view_cache.find(key);
@ -735,7 +735,7 @@ namespace vk
} }
void run(vk::command_buffer &cmd, u16 w, u16 h, vk::framebuffer* target, VkRenderPass render_pass, void run(vk::command_buffer &cmd, u16 w, u16 h, vk::framebuffer* target, VkRenderPass render_pass,
vk::vk_data_heap &upload_heap, rsx::overlays::overlay &ui) vk::data_heap &upload_heap, rsx::overlays::overlay &ui)
{ {
m_scale_offset = color4f((f32)ui.virtual_width, (f32)ui.virtual_height, 1.f, 1.f); m_scale_offset = color4f((f32)ui.virtual_width, (f32)ui.virtual_height, 1.f, 1.f);
m_time = (f32)(get_system_time() / 1000) * 0.005f; m_time = (f32)(get_system_time() / 1000) * 0.005f;

View File

@ -1,4 +1,4 @@
#include "stdafx.h" #include "stdafx.h"
#include "VKHelpers.h" #include "VKHelpers.h"
namespace vk namespace vk
@ -23,23 +23,17 @@ namespace vk
program& program::load_uniforms(program_domain domain, const std::vector<program_input>& inputs) program& program::load_uniforms(program_domain domain, const std::vector<program_input>& inputs)
{ {
std::vector<program_input> store = uniforms;
uniforms.resize(0);
for (auto &item : store)
{
uniforms.push_back(item);
}
for (auto &item : inputs) for (auto &item : inputs)
uniforms.push_back(item); {
uniforms[item.type].push_back(item);
}
return *this; return *this;
} }
bool program::has_uniform(std::string uniform_name) bool program::has_uniform(program_input_type type, const std::string &uniform_name)
{ {
for (auto &uniform : uniforms) for (const auto &uniform : uniforms[type])
{ {
if (uniform.name == uniform_name) if (uniform.name == uniform_name)
return true; return true;
@ -48,20 +42,25 @@ namespace vk
return false; return false;
} }
void program::bind_uniform(const VkDescriptorImageInfo &image_descriptor, std::string uniform_name, VkDescriptorSet &descriptor_set) void program::bind_uniform(const VkDescriptorImageInfo &image_descriptor, const std::string& uniform_name, VkDescriptorSet &descriptor_set)
{ {
for (auto &uniform : uniforms) for (const auto &uniform : uniforms[program_input_type::input_type_texture])
{ {
if (uniform.name == uniform_name) if (uniform.name == uniform_name)
{ {
VkWriteDescriptorSet descriptor_writer = {}; const VkWriteDescriptorSet descriptor_writer =
descriptor_writer.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; {
descriptor_writer.dstSet = descriptor_set; VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // sType
descriptor_writer.descriptorCount = 1; nullptr, // pNext
descriptor_writer.pImageInfo = &image_descriptor; descriptor_set, // dstSet
descriptor_writer.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; uniform.location, // dstBinding
descriptor_writer.dstArrayElement = 0; 0, // dstArrayElement
descriptor_writer.dstBinding = uniform.location; 1, // descriptorCount
VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, // descriptorType
&image_descriptor, // pImageInfo
nullptr, // pBufferInfo
nullptr // pTexelBufferView
};
vkUpdateDescriptorSets(m_device, 1, &descriptor_writer, 0, nullptr); vkUpdateDescriptorSets(m_device, 1, &descriptor_writer, 0, nullptr);
attribute_location_mask |= (1ull << uniform.location); attribute_location_mask |= (1ull << uniform.location);
@ -77,20 +76,25 @@ namespace vk
bind_buffer(buffer_descriptor, binding_point, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, descriptor_set); bind_buffer(buffer_descriptor, binding_point, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, descriptor_set);
} }
void program::bind_uniform(const VkBufferView &buffer_view, const std::string &binding_name, VkDescriptorSet &descriptor_set) void program::bind_uniform(const VkBufferView &buffer_view, program_input_type type, const std::string &binding_name, VkDescriptorSet &descriptor_set)
{ {
for (auto &uniform : uniforms) for (const auto &uniform : uniforms[type])
{ {
if (uniform.name == binding_name) if (uniform.name == binding_name)
{ {
VkWriteDescriptorSet descriptor_writer = {}; const VkWriteDescriptorSet descriptor_writer =
descriptor_writer.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; {
descriptor_writer.dstSet = descriptor_set; VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // sType
descriptor_writer.descriptorCount = 1; nullptr, // pNext
descriptor_writer.pTexelBufferView = &buffer_view; descriptor_set, // dstSet
descriptor_writer.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; uniform.location, // dstBinding
descriptor_writer.dstArrayElement = 0; 0, // dstArrayElement
descriptor_writer.dstBinding = uniform.location; 1, // descriptorCount
VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,// descriptorType
nullptr, // pImageInfo
nullptr, // pBufferInfo
&buffer_view // pTexelBufferView
};
vkUpdateDescriptorSets(m_device, 1, &descriptor_writer, 0, nullptr); vkUpdateDescriptorSets(m_device, 1, &descriptor_writer, 0, nullptr);
attribute_location_mask |= (1ull << uniform.location); attribute_location_mask |= (1ull << uniform.location);
@ -103,14 +107,19 @@ namespace vk
void program::bind_buffer(const VkDescriptorBufferInfo &buffer_descriptor, uint32_t binding_point, VkDescriptorType type, VkDescriptorSet &descriptor_set) void program::bind_buffer(const VkDescriptorBufferInfo &buffer_descriptor, uint32_t binding_point, VkDescriptorType type, VkDescriptorSet &descriptor_set)
{ {
VkWriteDescriptorSet descriptor_writer = {}; const VkWriteDescriptorSet descriptor_writer =
descriptor_writer.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; {
descriptor_writer.dstSet = descriptor_set; VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // sType
descriptor_writer.descriptorCount = 1; nullptr, // pNext
descriptor_writer.pBufferInfo = &buffer_descriptor; descriptor_set, // dstSet
descriptor_writer.descriptorType = type; binding_point, // dstBinding
descriptor_writer.dstArrayElement = 0; 0, // dstArrayElement
descriptor_writer.dstBinding = binding_point; 1, // descriptorCount
type, // descriptorType
nullptr, // pImageInfo
&buffer_descriptor, // pBufferInfo
nullptr // pTexelBufferView
};
vkUpdateDescriptorSets(m_device, 1, &descriptor_writer, 0, nullptr); vkUpdateDescriptorSets(m_device, 1, &descriptor_writer, 0, nullptr);
attribute_location_mask |= (1ull << binding_point); attribute_location_mask |= (1ull << binding_point);
@ -121,10 +130,9 @@ namespace vk
if (vertex_attributes_mask) if (vertex_attributes_mask)
return vertex_attributes_mask; return vertex_attributes_mask;
for (auto &uniform : uniforms) for (const auto &uniform : uniforms[program_input_type::input_type_texel_buffer])
{ {
if (uniform.domain == program_domain::glsl_vertex_program && if (uniform.domain == program_domain::glsl_vertex_program)
uniform.type == program_input_type::input_type_texel_buffer)
{ {
vertex_attributes_mask |= (1ull << (uniform.location - VERTEX_BUFFERS_FIRST_BIND_SLOT)); vertex_attributes_mask |= (1ull << (uniform.location - VERTEX_BUFFERS_FIRST_BIND_SLOT));
} }

View File

@ -1,4 +1,4 @@
#include "stdafx.h" #include "stdafx.h"
#include "VKHelpers.h" #include "VKHelpers.h"
#include "../GCM.h" #include "../GCM.h"
#include "../RSXThread.h" #include "../RSXThread.h"
@ -427,7 +427,7 @@ namespace vk
void copy_mipmaped_image_using_buffer(VkCommandBuffer cmd, vk::image* dst_image, void copy_mipmaped_image_using_buffer(VkCommandBuffer cmd, vk::image* dst_image,
const std::vector<rsx_subresource_layout>& subresource_layout, int format, bool is_swizzled, u16 mipmap_count, const std::vector<rsx_subresource_layout>& subresource_layout, int format, bool is_swizzled, u16 mipmap_count,
VkImageAspectFlags flags, vk::vk_data_heap &upload_heap) VkImageAspectFlags flags, vk::data_heap &upload_heap)
{ {
u32 mipmap_level = 0; u32 mipmap_level = 0;
u32 block_in_pixel = get_format_block_size_in_texel(format); u32 block_in_pixel = get_format_block_size_in_texel(format);

View File

@ -427,7 +427,7 @@ namespace vk
vk::memory_type_mapping m_memory_types; vk::memory_type_mapping m_memory_types;
vk::gpu_formats_support m_formats_support; vk::gpu_formats_support m_formats_support;
VkQueue m_submit_queue; VkQueue m_submit_queue;
vk_data_heap* m_texture_upload_heap; vk::data_heap* m_texture_upload_heap;
//Stuff that has been dereferenced goes into these //Stuff that has been dereferenced goes into these
std::list<discarded_storage> m_discardable_storage; std::list<discarded_storage> m_discardable_storage;
@ -956,7 +956,7 @@ namespace vk
public: public:
using baseclass::texture_cache; using baseclass::texture_cache;
void initialize(vk::render_device& device, VkQueue submit_queue, vk::vk_data_heap& upload_heap) void initialize(vk::render_device& device, VkQueue submit_queue, vk::data_heap& upload_heap)
{ {
m_device = &device; m_device = &device;
m_memory_types = device.get_memory_mapping(); m_memory_types = device.get_memory_mapping();

View File

@ -63,7 +63,7 @@ namespace
std::tuple<u32, std::tuple<VkDeviceSize, VkIndexType>> generate_emulating_index_buffer( std::tuple<u32, std::tuple<VkDeviceSize, VkIndexType>> generate_emulating_index_buffer(
const rsx::draw_clause& clause, u32 vertex_count, const rsx::draw_clause& clause, u32 vertex_count,
vk::vk_data_heap& m_index_buffer_ring_info) vk::data_heap& m_index_buffer_ring_info)
{ {
u32 index_count = get_index_count(clause.primitive, vertex_count); u32 index_count = get_index_count(clause.primitive, vertex_count);
u32 upload_size = index_count * sizeof(u16); u32 upload_size = index_count * sizeof(u16);
@ -91,7 +91,7 @@ namespace
struct draw_command_visitor struct draw_command_visitor
{ {
draw_command_visitor(vk::vk_data_heap& index_buffer_ring_info, rsx::vertex_input_layout& layout) draw_command_visitor(vk::data_heap& index_buffer_ring_info, rsx::vertex_input_layout& layout)
: m_index_buffer_ring_info(index_buffer_ring_info) : m_index_buffer_ring_info(index_buffer_ring_info)
, m_vertex_layout(layout) , m_vertex_layout(layout)
{ {
@ -226,7 +226,7 @@ namespace
} }
private: private:
vk::vk_data_heap& m_index_buffer_ring_info; vk::data_heap& m_index_buffer_ring_info;
rsx::vertex_input_layout& m_vertex_layout; rsx::vertex_input_layout& m_vertex_layout;
}; };
} }

View File

@ -1,4 +1,4 @@
#include "stdafx.h" #include "stdafx.h"
#include "Emu/System.h" #include "Emu/System.h"
#include "VKVertexProgram.h" #include "VKVertexProgram.h"
@ -28,33 +28,41 @@ std::string VKVertexDecompilerThread::compareFunction(COMPARE f, const std::stri
void VKVertexDecompilerThread::insertHeader(std::stringstream &OS) void VKVertexDecompilerThread::insertHeader(std::stringstream &OS)
{ {
OS << "#version 450\n\n"; OS << "#version 450\n\n";
OS << "#extension GL_ARB_separate_shader_objects : enable\n"; OS << "#extension GL_ARB_separate_shader_objects : enable\n\n";
OS << "layout(std140, set = 0, binding = 0) uniform VertexContextBuffer\n"; OS << "layout(std140, set = 0, binding = 0) uniform VertexContextBuffer\n";
OS << "{\n"; OS << "{\n";
OS << " mat4 scale_offset_mat;\n"; OS << " mat4 scale_offset_mat;\n";
OS << " ivec4 user_clip_enabled[2];\n"; OS << " ivec4 user_clip_enabled[2];\n";
OS << " vec4 user_clip_factor[2];\n"; OS << " vec4 user_clip_factor[2];\n";
OS << " uint transform_branch_bits;\n"; OS << " uint transform_branch_bits;\n";
OS << " uint vertex_base_index;\n";
OS << " float point_size;\n"; OS << " float point_size;\n";
OS << " float z_near;\n"; OS << " float z_near;\n";
OS << " float z_far;\n"; OS << " float z_far;\n";
OS << " ivec4 input_attributes[16];\n"; OS << "};\n\n";
OS << "};\n";
OS << "layout(std140, set = 0, binding = 1) uniform VertexLayoutBuffer\n";
OS << "{\n";
OS << " uint vertex_base_index;\n";
OS << " ivec2 input_attributes[16];\n";
OS << "};\n\n";
vk::glsl::program_input in; vk::glsl::program_input in;
in.location = SCALE_OFFSET_BIND_SLOT; in.location = VERTEX_PARAMS_BIND_SLOT;
in.domain = glsl::glsl_vertex_program; in.domain = glsl::glsl_vertex_program;
in.name = "VertexContextBuffer"; in.name = "VertexContextBuffer";
in.type = vk::glsl::input_type_uniform_buffer; in.type = vk::glsl::input_type_uniform_buffer;
inputs.push_back(in);
in.location = VERTEX_LAYOUT_BIND_SLOT;
in.name = "VertexLayoutBuffer";
inputs.push_back(in); inputs.push_back(in);
} }
void VKVertexDecompilerThread::insertInputs(std::stringstream & OS, const std::vector<ParamType>& inputs) void VKVertexDecompilerThread::insertInputs(std::stringstream & OS, const std::vector<ParamType>& inputs)
{ {
OS << "layout(set=0, binding=3) uniform usamplerBuffer persistent_input_stream;\n"; //Data stream with persistent vertex data (cacheable) OS << "layout(set=0, binding=6) uniform usamplerBuffer persistent_input_stream;\n"; //Data stream with persistent vertex data (cacheable)
OS << "layout(set=0, binding=4) uniform usamplerBuffer volatile_input_stream;\n"; //Data stream with per-draw data (registers and immediate draw data) OS << "layout(set=0, binding=7) uniform usamplerBuffer volatile_input_stream;\n"; //Data stream with per-draw data (registers and immediate draw data)
vk::glsl::program_input in; vk::glsl::program_input in;
in.location = VERTEX_BUFFERS_FIRST_BIND_SLOT; in.location = VERTEX_BUFFERS_FIRST_BIND_SLOT;
@ -72,7 +80,7 @@ void VKVertexDecompilerThread::insertInputs(std::stringstream & OS, const std::v
void VKVertexDecompilerThread::insertConstants(std::stringstream & OS, const std::vector<ParamType> & constants) void VKVertexDecompilerThread::insertConstants(std::stringstream & OS, const std::vector<ParamType> & constants)
{ {
OS << "layout(std140, set=0, binding = 1) uniform VertexConstantsBuffer\n"; OS << "layout(std140, set=0, binding = 2) uniform VertexConstantsBuffer\n";
OS << "{\n"; OS << "{\n";
OS << " vec4 vc[468];\n"; OS << " vec4 vc[468];\n";
OS << "};\n\n"; OS << "};\n\n";

View File

@ -584,18 +584,48 @@ namespace rsx
rsx->m_graphics_state |= rsx::pipeline_state::fragment_program_dirty; rsx->m_graphics_state |= rsx::pipeline_state::fragment_program_dirty;
} }
void set_surface_dirty_bit(thread* rsx, u32, u32) void set_surface_dirty_bit(thread* rsx, u32 reg, u32 arg)
{ {
if (reg == NV4097_SET_SURFACE_CLIP_VERTICAL ||
reg == NV4097_SET_SURFACE_CLIP_HORIZONTAL)
{
if (arg != method_registers.register_previous_value)
{
rsx->m_graphics_state |= rsx::pipeline_state::vertex_state_dirty;
}
}
rsx->m_rtts_dirty = true; rsx->m_rtts_dirty = true;
rsx->m_framebuffer_state_contested = false; rsx->m_framebuffer_state_contested = false;
} }
void set_surface_format(thread* rsx, u32 reg, u32 arg)
{
// Special consideration - antialiasing control can affect ROP state
const auto aa_mask = (0xF << 12);
if ((arg & aa_mask) != (method_registers.register_previous_value & aa_mask))
{
// Antialias control has changed, update ROP parameters
rsx->m_graphics_state |= rsx::pipeline_state::fragment_state_dirty;
}
set_surface_dirty_bit(rsx, reg, arg);
}
void set_surface_options_dirty_bit(thread* rsx, u32, u32) void set_surface_options_dirty_bit(thread* rsx, u32, u32)
{ {
if (rsx->m_framebuffer_state_contested) if (rsx->m_framebuffer_state_contested)
rsx->m_rtts_dirty = true; rsx->m_rtts_dirty = true;
} }
void set_ROP_state_dirty_bit(thread* rsx, u32, u32 arg)
{
if (arg != method_registers.register_previous_value)
{
rsx->m_graphics_state |= rsx::fragment_state_dirty;
}
}
void set_vertex_base_offset(thread* rsx, u32 reg, u32 arg) void set_vertex_base_offset(thread* rsx, u32 reg, u32 arg)
{ {
if (rsx->in_begin_end) if (rsx->in_begin_end)
@ -620,6 +650,22 @@ namespace rsx
} }
} }
void set_vertex_env_dirty_bit(thread* rsx, u32 reg, u32 arg)
{
if (arg != method_registers.register_previous_value)
{
rsx->m_graphics_state |= rsx::pipeline_state::vertex_state_dirty;
}
}
void set_fragment_env_dirty_bit(thread* rsx, u32 reg, u32 arg)
{
if (arg != method_registers.register_previous_value)
{
rsx->m_graphics_state |= rsx::pipeline_state::fragment_state_dirty;
}
}
template<u32 index> template<u32 index>
struct set_texture_dirty_bit struct set_texture_dirty_bit
{ {
@ -647,6 +693,18 @@ namespace rsx
} }
} }
}; };
template<u32 index>
struct set_viewport_dirty_bit
{
static void impl(thread* rsx, u32 _reg, u32 arg)
{
if (arg != method_registers.register_previous_value)
{
rsx->m_graphics_state |= rsx::pipeline_state::vertex_state_dirty;
}
}
};
} }
namespace nv308a namespace nv308a
@ -2619,7 +2677,7 @@ namespace rsx
bind<NV4097_SET_CONTEXT_DMA_COLOR_C, nv4097::set_surface_dirty_bit>(); bind<NV4097_SET_CONTEXT_DMA_COLOR_C, nv4097::set_surface_dirty_bit>();
bind<NV4097_SET_CONTEXT_DMA_COLOR_D, nv4097::set_surface_dirty_bit>(); bind<NV4097_SET_CONTEXT_DMA_COLOR_D, nv4097::set_surface_dirty_bit>();
bind<NV4097_SET_CONTEXT_DMA_ZETA, nv4097::set_surface_dirty_bit>(); bind<NV4097_SET_CONTEXT_DMA_ZETA, nv4097::set_surface_dirty_bit>();
bind<NV4097_SET_SURFACE_FORMAT, nv4097::set_surface_dirty_bit>(); bind<NV4097_SET_SURFACE_FORMAT, nv4097::set_surface_format>();
bind<NV4097_SET_SURFACE_PITCH_A, nv4097::set_surface_dirty_bit>(); bind<NV4097_SET_SURFACE_PITCH_A, nv4097::set_surface_dirty_bit>();
bind<NV4097_SET_SURFACE_PITCH_B, nv4097::set_surface_dirty_bit>(); bind<NV4097_SET_SURFACE_PITCH_B, nv4097::set_surface_dirty_bit>();
bind<NV4097_SET_SURFACE_PITCH_C, nv4097::set_surface_dirty_bit>(); bind<NV4097_SET_SURFACE_PITCH_C, nv4097::set_surface_dirty_bit>();
@ -2660,6 +2718,20 @@ namespace rsx
bind<NV4097_SET_VERTEX_ATTRIB_OUTPUT_MASK, nv4097::set_vertex_attribute_output_mask>(); bind<NV4097_SET_VERTEX_ATTRIB_OUTPUT_MASK, nv4097::set_vertex_attribute_output_mask>();
bind<NV4097_SET_VERTEX_DATA_BASE_OFFSET, nv4097::set_vertex_base_offset>(); bind<NV4097_SET_VERTEX_DATA_BASE_OFFSET, nv4097::set_vertex_base_offset>();
bind<NV4097_SET_VERTEX_DATA_BASE_INDEX, nv4097::set_index_base_offset>(); bind<NV4097_SET_VERTEX_DATA_BASE_INDEX, nv4097::set_index_base_offset>();
bind<NV4097_SET_USER_CLIP_PLANE_CONTROL, nv4097::set_vertex_env_dirty_bit>();
bind<NV4097_SET_TRANSFORM_BRANCH_BITS, nv4097::set_vertex_env_dirty_bit>();
bind<NV4097_SET_CLIP_MIN, nv4097::set_vertex_env_dirty_bit>();
bind<NV4097_SET_CLIP_MAX, nv4097::set_vertex_env_dirty_bit>();
bind<NV4097_SET_ALPHA_FUNC, nv4097::set_ROP_state_dirty_bit>();
bind<NV4097_SET_ALPHA_REF, nv4097::set_ROP_state_dirty_bit>();
bind<NV4097_SET_ALPHA_TEST_ENABLE, nv4097::set_ROP_state_dirty_bit>();
bind<NV4097_SET_ANTI_ALIASING_CONTROL, nv4097::set_ROP_state_dirty_bit>();
bind<NV4097_SET_SHADER_PACKER, nv4097::set_ROP_state_dirty_bit>();
bind<NV4097_SET_SHADER_WINDOW, nv4097::set_ROP_state_dirty_bit>();
bind<NV4097_SET_FOG_MODE, nv4097::set_ROP_state_dirty_bit>();
bind_array<NV4097_SET_FOG_PARAMS, 1, 2, nv4097::set_ROP_state_dirty_bit>();
bind_range<NV4097_SET_VIEWPORT_SCALE, 1, 3, nv4097::set_viewport_dirty_bit>();
bind_range<NV4097_SET_VIEWPORT_OFFSET, 1, 3, nv4097::set_viewport_dirty_bit>();
//NV308A //NV308A
bind_range<NV308A_COLOR, 1, 256, nv308a::color>(); bind_range<NV308A_COLOR, 1, 256, nv308a::color>();