mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-03-29 22:20:48 +00:00
vk: Compute kernel fixups
- Adhere to workgroup count limits as exposed by the GPU vendor. They already execute properly even when going beyond the limits but this removes validation noise. - Fix invocation counts for deswizzle kernel. The count was incorrect if blocksize was not 4, causing a bunch of useless work to be done.
This commit is contained in:
parent
99d71fdc2a
commit
508ffcb775
@ -26,6 +26,7 @@ namespace vk
|
|||||||
u32 push_constants_size = 0;
|
u32 push_constants_size = 0;
|
||||||
u32 optimal_group_size = 1;
|
u32 optimal_group_size = 1;
|
||||||
u32 optimal_kernel_size = 1;
|
u32 optimal_kernel_size = 1;
|
||||||
|
u32 max_invocations_x = 65535;
|
||||||
|
|
||||||
virtual std::vector<std::pair<VkDescriptorType, u8>> get_descriptor_layout()
|
virtual std::vector<std::pair<VkDescriptorType, u8>> get_descriptor_layout()
|
||||||
{
|
{
|
||||||
@ -116,6 +117,9 @@ namespace vk
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const auto& gpu = vk::get_current_renderer()->gpu();
|
||||||
|
max_invocations_x = gpu.get_limits().maxComputeWorkGroupCount[0];
|
||||||
|
|
||||||
initialized = true;
|
initialized = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -203,7 +207,23 @@ namespace vk
|
|||||||
|
|
||||||
virtual void run(VkCommandBuffer cmd, u32 num_invocations)
|
virtual void run(VkCommandBuffer cmd, u32 num_invocations)
|
||||||
{
|
{
|
||||||
run(cmd, num_invocations, 1, 1);
|
u32 invocations_x, invocations_y;
|
||||||
|
if (num_invocations > max_invocations_x)
|
||||||
|
{
|
||||||
|
// AMD hw reports an annoyingly small maximum number of invocations in the X dimension
|
||||||
|
// Split the 1D job into 2 dimensions to accomodate this
|
||||||
|
invocations_x = (u32)floor(std::sqrt(num_invocations));
|
||||||
|
invocations_y = invocations_x;
|
||||||
|
|
||||||
|
if (num_invocations % invocations_x) invocations_y++;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
invocations_x = num_invocations;
|
||||||
|
invocations_y = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
run(cmd, invocations_x, invocations_y, 1);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -215,14 +235,10 @@ namespace vk
|
|||||||
u32 kernel_size = 1;
|
u32 kernel_size = 1;
|
||||||
|
|
||||||
std::string variables, work_kernel, loop_advance, suffix;
|
std::string variables, work_kernel, loop_advance, suffix;
|
||||||
std::string index_declaration;
|
|
||||||
std::string method_declarations;
|
std::string method_declarations;
|
||||||
|
|
||||||
cs_shuffle_base()
|
cs_shuffle_base()
|
||||||
{
|
{
|
||||||
index_declaration =
|
|
||||||
"gl_GlobalInvocationID.x * KERNEL_SIZE";
|
|
||||||
|
|
||||||
work_kernel =
|
work_kernel =
|
||||||
" value = data[index];\n"
|
" value = data[index];\n"
|
||||||
" data[index] = %f(value);\n";
|
" data[index] = %f(value);\n";
|
||||||
@ -263,7 +279,8 @@ namespace vk
|
|||||||
"\n"
|
"\n"
|
||||||
"void main()\n"
|
"void main()\n"
|
||||||
"{\n"
|
"{\n"
|
||||||
" uint index = gl_GlobalInvocationID.x;\n"
|
" uint invocations_x = (gl_NumWorkGroups.x * gl_WorkGroupSize.x);"
|
||||||
|
" uint index = (gl_GlobalInvocationID.y * invocations_x) + gl_GlobalInvocationID.x;\n"
|
||||||
" uint value;\n"
|
" uint value;\n"
|
||||||
" %vars"
|
" %vars"
|
||||||
"\n";
|
"\n";
|
||||||
@ -761,7 +778,7 @@ namespace vk
|
|||||||
params.logd = rsx::ceil_log2(depth);
|
params.logd = rsx::ceil_log2(depth);
|
||||||
set_parameters(cmd);
|
set_parameters(cmd);
|
||||||
|
|
||||||
const u32 num_bytes_per_invocation = (4 * optimal_group_size);
|
const u32 num_bytes_per_invocation = (sizeof(_BlockType) * optimal_group_size);
|
||||||
const u32 linear_invocations = rsx::aligned_div(data_length, num_bytes_per_invocation);
|
const u32 linear_invocations = rsx::aligned_div(data_length, num_bytes_per_invocation);
|
||||||
compute_task::run(cmd, linear_invocations);
|
compute_task::run(cmd, linear_invocations);
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user