From c8366b0b0f6ab996adcbcf1ee09a3ed6298e7d4e Mon Sep 17 00:00:00 2001 From: twinaphex Date: Fri, 6 Jul 2018 05:17:39 +0200 Subject: [PATCH] Squashed 'deps/SPIRV-Cross/' changes from f96c9f9fb4..0f59016635 0f59016635 Merge pull request #636 from KhronosGroup/cfg-refactor 2bf57d6dff Deal with composite constants in variable initializer. dcddd5326e Add LUT test cases for OpVariable with initializer. af290ede87 Remove some redundant spvArrayCopy declarations. 8c314112b4 Run format_all.sh. 5582523d9a Add some tests for LUT promotion. 5143695080 Don't need to enclose expression for arrays. d29f48ef06 Deduce constant LUTs from read-write variables. b5ed706860 Hoist out variable scope analysis. c26c41b26b Make the CFGs for all active functions available. 6fdadb9218 Track partial writes as well. 7216129377 Hoist out the inline VariableAccessHandler class. git-subtree-dir: deps/SPIRV-Cross git-subtree-split: 0f590166359f4ee35d80040f0965417f482cd6b1 --- .../frag/lut-promotion-initializer.asm.frag | 57 ++ .../frag/array-lut-no-loop-variable.frag | 3 +- .../frag/constant-composites.frag | 2 +- .../opt/shaders-hlsl/frag/lut-promotion.frag | 57 ++ .../frag/lut-promotion-initializer.asm.frag | 69 ++ .../frag/array-lut-no-loop-variable.frag | 19 +- .../opt/shaders-msl/frag/constant-array.frag | 6 +- .../shaders-msl/frag/constant-composites.frag | 2 +- .../opt/shaders-msl/frag/lut-promotion.frag | 69 ++ .../frag/lut-promotion-initializer.asm.frag | 42 + .../opt/shaders/comp/generate_height.comp | 4 +- .../opt/shaders/flatten/copy.flatten.vert | 2 +- .../opt/shaders/flatten/dynamic.flatten.vert | 4 +- ...multi-dimensional.desktop.flatten_dim.frag | 2 +- .../frag/array-lut-no-loop-variable.frag | 5 +- .../opt/shaders/frag/constant-array.frag | 9 +- .../opt/shaders/frag/constant-composites.frag | 2 +- reference/opt/shaders/frag/lut-promotion.frag | 42 + .../frag/lut-promotion-initializer.asm.frag | 55 ++ .../frag/array-lut-no-loop-variable.frag | 3 +- .../frag/constant-composites.frag | 2 +- .../shaders-hlsl/frag/lut-promotion.frag | 55 ++ .../frag/lut-promotion-initializer.asm.frag | 67 ++ .../asm/frag/op-constant-null.asm.frag | 17 - .../shaders-msl/comp/composite-construct.comp | 3 +- .../frag/array-lut-no-loop-variable.frag | 19 +- .../shaders-msl/frag/constant-array.frag | 8 +- .../shaders-msl/frag/constant-composites.frag | 2 +- reference/shaders-msl/frag/lut-promotion.frag | 67 ++ .../frag/lut-promotion-initializer.asm.frag | 40 + .../asm/frag/op-constant-null.asm.frag | 3 +- .../shaders/comp/composite-construct.comp | 7 +- reference/shaders/flatten/copy.flatten.vert | 2 +- .../shaders/flatten/dynamic.flatten.vert | 4 +- ...multi-dimensional.desktop.flatten_dim.frag | 2 +- .../frag/array-lut-no-loop-variable.frag | 5 +- reference/shaders/frag/constant-array.frag | 11 +- .../shaders/frag/constant-composites.frag | 2 +- reference/shaders/frag/lut-promotion.frag | 40 + .../frag/lut-promotion-initializer.asm.frag | 195 +++++ shaders-hlsl/frag/lut-promotion.frag | 44 ++ .../frag/lut-promotion-initializer.asm.frag | 195 +++++ shaders-msl/frag/lut-promotion.frag | 44 ++ .../frag/lut-promotion-initializer.asm.frag | 195 +++++ shaders/frag/lut-promotion.frag | 44 ++ spirv_common.hpp | 4 +- spirv_cpp.cpp | 1 + spirv_cross.cpp | 734 +++++++++++------- spirv_cross.hpp | 53 +- spirv_glsl.cpp | 84 +- spirv_glsl.hpp | 3 +- spirv_hlsl.cpp | 1 + spirv_msl.cpp | 33 +- spirv_msl.hpp | 1 + 54 files changed, 2019 insertions(+), 422 deletions(-) create mode 100644 reference/opt/shaders-hlsl/asm/frag/lut-promotion-initializer.asm.frag create mode 100644 reference/opt/shaders-hlsl/frag/lut-promotion.frag create mode 100644 reference/opt/shaders-msl/asm/frag/lut-promotion-initializer.asm.frag create mode 100644 reference/opt/shaders-msl/frag/lut-promotion.frag create mode 100644 reference/opt/shaders/asm/frag/lut-promotion-initializer.asm.frag create mode 100644 reference/opt/shaders/frag/lut-promotion.frag create mode 100644 reference/shaders-hlsl/asm/frag/lut-promotion-initializer.asm.frag create mode 100644 reference/shaders-hlsl/frag/lut-promotion.frag create mode 100644 reference/shaders-msl/asm/frag/lut-promotion-initializer.asm.frag create mode 100644 reference/shaders-msl/frag/lut-promotion.frag create mode 100644 reference/shaders/asm/frag/lut-promotion-initializer.asm.frag create mode 100644 reference/shaders/frag/lut-promotion.frag create mode 100644 shaders-hlsl/asm/frag/lut-promotion-initializer.asm.frag create mode 100644 shaders-hlsl/frag/lut-promotion.frag create mode 100644 shaders-msl/asm/frag/lut-promotion-initializer.asm.frag create mode 100644 shaders-msl/frag/lut-promotion.frag create mode 100644 shaders/asm/frag/lut-promotion-initializer.asm.frag create mode 100644 shaders/frag/lut-promotion.frag diff --git a/reference/opt/shaders-hlsl/asm/frag/lut-promotion-initializer.asm.frag b/reference/opt/shaders-hlsl/asm/frag/lut-promotion-initializer.asm.frag new file mode 100644 index 0000000000..5deae3a569 --- /dev/null +++ b/reference/opt/shaders-hlsl/asm/frag/lut-promotion-initializer.asm.frag @@ -0,0 +1,57 @@ +static const float _46[16] = { 1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f }; +static const float4 _76[4] = { 0.0f.xxxx, 1.0f.xxxx, 8.0f.xxxx, 5.0f.xxxx }; +static const float4 _90[4] = { 20.0f.xxxx, 30.0f.xxxx, 50.0f.xxxx, 60.0f.xxxx }; + +static float FragColor; +static int index; + +struct SPIRV_Cross_Input +{ + nointerpolation int index : TEXCOORD0; +}; + +struct SPIRV_Cross_Output +{ + float FragColor : SV_Target0; +}; + +void frag_main() +{ + float4 foobar[4] = _76; + float4 baz[4] = _76; + FragColor = _46[index]; + if (index < 10) + { + FragColor += _46[index ^ 1]; + } + else + { + FragColor += _46[index & 1]; + } + bool _99 = index > 30; + if (_99) + { + FragColor += _76[index & 3].y; + } + else + { + FragColor += _76[index & 1].x; + } + if (_99) + { + foobar[1].z = 20.0f; + } + int _37 = index & 3; + FragColor += foobar[_37].z; + baz = _90; + FragColor += baz[_37].z; +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + index = stage_input.index; + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/opt/shaders-hlsl/frag/array-lut-no-loop-variable.frag b/reference/opt/shaders-hlsl/frag/array-lut-no-loop-variable.frag index 8cb52f0a4d..3adf7d9852 100644 --- a/reference/opt/shaders-hlsl/frag/array-lut-no-loop-variable.frag +++ b/reference/opt/shaders-hlsl/frag/array-lut-no-loop-variable.frag @@ -15,11 +15,10 @@ struct SPIRV_Cross_Output void frag_main() { - float lut[5] = _17; for (int _46 = 0; _46 < 4; ) { int _33 = _46 + 1; - FragColor += lut[_33].xxxx; + FragColor += _17[_33].xxxx; _46 = _33; continue; } diff --git a/reference/opt/shaders-hlsl/frag/constant-composites.frag b/reference/opt/shaders-hlsl/frag/constant-composites.frag index 0514eef1ee..2613e1c2c5 100644 --- a/reference/opt/shaders-hlsl/frag/constant-composites.frag +++ b/reference/opt/shaders-hlsl/frag/constant-composites.frag @@ -30,7 +30,7 @@ void frag_main() lut = _16; foos = _28; FragColor = lut[_line].xxxx; - FragColor += (foos[_line].a * (foos[1 - _line].a)).xxxx; + FragColor += (foos[_line].a * foos[1 - _line].a).xxxx; } SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) diff --git a/reference/opt/shaders-hlsl/frag/lut-promotion.frag b/reference/opt/shaders-hlsl/frag/lut-promotion.frag new file mode 100644 index 0000000000..aae0d39de2 --- /dev/null +++ b/reference/opt/shaders-hlsl/frag/lut-promotion.frag @@ -0,0 +1,57 @@ +static const float _16[16] = { 1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f }; +static const float4 _60[4] = { 0.0f.xxxx, 1.0f.xxxx, 8.0f.xxxx, 5.0f.xxxx }; +static const float4 _104[4] = { 20.0f.xxxx, 30.0f.xxxx, 50.0f.xxxx, 60.0f.xxxx }; + +static float FragColor; +static int index; + +struct SPIRV_Cross_Input +{ + nointerpolation int index : TEXCOORD0; +}; + +struct SPIRV_Cross_Output +{ + float FragColor : SV_Target0; +}; + +void frag_main() +{ + FragColor = _16[index]; + if (index < 10) + { + FragColor += _16[index ^ 1]; + } + else + { + FragColor += _16[index & 1]; + } + bool _63 = index > 30; + if (_63) + { + FragColor += _60[index & 3].y; + } + else + { + FragColor += _60[index & 1].x; + } + float4 foobar[4] = _60; + if (_63) + { + foobar[1].z = 20.0f; + } + int _91 = index & 3; + FragColor += foobar[_91].z; + float4 baz[4] = _60; + baz = _104; + FragColor += baz[_91].z; +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + index = stage_input.index; + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/opt/shaders-msl/asm/frag/lut-promotion-initializer.asm.frag b/reference/opt/shaders-msl/asm/frag/lut-promotion-initializer.asm.frag new file mode 100644 index 0000000000..9db6b5470d --- /dev/null +++ b/reference/opt/shaders-msl/asm/frag/lut-promotion-initializer.asm.frag @@ -0,0 +1,69 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +constant float _46[16] = {1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0}; +constant float4 _76[4] = {float4(0.0), float4(1.0), float4(8.0), float4(5.0)}; +constant float4 _90[4] = {float4(20.0), float4(30.0), float4(50.0), float4(60.0)}; + +struct main0_out +{ + float FragColor [[color(0)]]; +}; + +struct main0_in +{ + int index [[user(locn0)]]; +}; + +// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment. +template +void spvArrayCopy(thread T (&dst)[N], thread const T (&src)[N]) +{ + for (uint i = 0; i < N; dst[i] = src[i], i++); +} + +// An overload for constant arrays. +template +void spvArrayCopyConstant(thread T (&dst)[N], constant T (&src)[N]) +{ + for (uint i = 0; i < N; dst[i] = src[i], i++); +} + +fragment main0_out main0(main0_in in [[stage_in]]) +{ + float4 foobar[4] = {float4(0.0), float4(1.0), float4(8.0), float4(5.0)}; + float4 baz[4] = {float4(0.0), float4(1.0), float4(8.0), float4(5.0)}; + main0_out out = {}; + out.FragColor = _46[in.index]; + if (in.index < 10) + { + out.FragColor += _46[in.index ^ 1]; + } + else + { + out.FragColor += _46[in.index & 1]; + } + bool _99 = in.index > 30; + if (_99) + { + out.FragColor += _76[in.index & 3].y; + } + else + { + out.FragColor += _76[in.index & 1].x; + } + if (_99) + { + foobar[1].z = 20.0; + } + int _37 = in.index & 3; + out.FragColor += foobar[_37].z; + spvArrayCopyConstant(baz, _90); + out.FragColor += baz[_37].z; + return out; +} + diff --git a/reference/opt/shaders-msl/frag/array-lut-no-loop-variable.frag b/reference/opt/shaders-msl/frag/array-lut-no-loop-variable.frag index 6c8299fa91..60868acd87 100644 --- a/reference/opt/shaders-msl/frag/array-lut-no-loop-variable.frag +++ b/reference/opt/shaders-msl/frag/array-lut-no-loop-variable.frag @@ -1,5 +1,3 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" - #include #include @@ -12,28 +10,13 @@ struct main0_out float4 FragColor [[color(0)]]; }; -// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment. -template -void spvArrayCopy(thread T (&dst)[N], thread const T (&src)[N]) -{ - for (uint i = 0; i < N; dst[i] = src[i], i++); -} - -// An overload for constant arrays. -template -void spvArrayCopyConstant(thread T (&dst)[N], constant T (&src)[N]) -{ - for (uint i = 0; i < N; dst[i] = src[i], i++); -} - fragment main0_out main0() { main0_out out = {}; - float lut[5] = {1.0, 2.0, 3.0, 4.0, 5.0}; for (int _46 = 0; _46 < 4; ) { int _33 = _46 + 1; - out.FragColor += float4(lut[_33]); + out.FragColor += float4(_17[_33]); _46 = _33; continue; } diff --git a/reference/opt/shaders-msl/frag/constant-array.frag b/reference/opt/shaders-msl/frag/constant-array.frag index 63576f109c..bb55934c6d 100644 --- a/reference/opt/shaders-msl/frag/constant-array.frag +++ b/reference/opt/shaders-msl/frag/constant-array.frag @@ -44,10 +44,8 @@ void spvArrayCopyConstant(thread T (&dst)[N], constant T (&src)[N]) fragment main0_out main0(main0_in in [[stage_in]]) { main0_out out = {}; - float4 indexable[3] = {float4(1.0), float4(2.0), float4(3.0)}; - float4 indexable_1[2][2] = {{float4(1.0), float4(2.0)}, {float4(8.0), float4(10.0)}}; - Foobar indexable_2[2] = {{10.0, 40.0}, {90.0, 70.0}}; - out.FragColor = ((indexable[in.index] + (indexable_1[in.index][in.index + 1])) + float4(30.0)) + float4(indexable_2[in.index].a + indexable_2[in.index].b); + Foobar indexable[2] = {{10.0, 40.0}, {90.0, 70.0}}; + out.FragColor = ((_37[in.index] + _55[in.index][in.index + 1]) + float4(30.0)) + float4(indexable[in.index].a + indexable[in.index].b); return out; } diff --git a/reference/opt/shaders-msl/frag/constant-composites.frag b/reference/opt/shaders-msl/frag/constant-composites.frag index ec5d66e86d..cb3e592337 100644 --- a/reference/opt/shaders-msl/frag/constant-composites.frag +++ b/reference/opt/shaders-msl/frag/constant-composites.frag @@ -44,7 +44,7 @@ fragment main0_out main0(main0_in in [[stage_in]]) float lut[4] = {1.0, 4.0, 3.0, 2.0}; Foo foos[2] = {{10.0, 20.0}, {30.0, 40.0}}; out.FragColor = float4(lut[in.line]); - out.FragColor += float4(foos[in.line].a * (foos[1 - in.line].a)); + out.FragColor += float4(foos[in.line].a * foos[1 - in.line].a); return out; } diff --git a/reference/opt/shaders-msl/frag/lut-promotion.frag b/reference/opt/shaders-msl/frag/lut-promotion.frag new file mode 100644 index 0000000000..4efdf4a196 --- /dev/null +++ b/reference/opt/shaders-msl/frag/lut-promotion.frag @@ -0,0 +1,69 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +constant float _16[16] = {1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0}; +constant float4 _60[4] = {float4(0.0), float4(1.0), float4(8.0), float4(5.0)}; +constant float4 _104[4] = {float4(20.0), float4(30.0), float4(50.0), float4(60.0)}; + +struct main0_out +{ + float FragColor [[color(0)]]; +}; + +struct main0_in +{ + int index [[user(locn0)]]; +}; + +// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment. +template +void spvArrayCopy(thread T (&dst)[N], thread const T (&src)[N]) +{ + for (uint i = 0; i < N; dst[i] = src[i], i++); +} + +// An overload for constant arrays. +template +void spvArrayCopyConstant(thread T (&dst)[N], constant T (&src)[N]) +{ + for (uint i = 0; i < N; dst[i] = src[i], i++); +} + +fragment main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + out.FragColor = _16[in.index]; + if (in.index < 10) + { + out.FragColor += _16[in.index ^ 1]; + } + else + { + out.FragColor += _16[in.index & 1]; + } + bool _63 = in.index > 30; + if (_63) + { + out.FragColor += _60[in.index & 3].y; + } + else + { + out.FragColor += _60[in.index & 1].x; + } + float4 foobar[4] = {float4(0.0), float4(1.0), float4(8.0), float4(5.0)}; + if (_63) + { + foobar[1].z = 20.0; + } + int _91 = in.index & 3; + out.FragColor += foobar[_91].z; + float4 baz[4] = {float4(0.0), float4(1.0), float4(8.0), float4(5.0)}; + spvArrayCopyConstant(baz, _104); + out.FragColor += baz[_91].z; + return out; +} + diff --git a/reference/opt/shaders/asm/frag/lut-promotion-initializer.asm.frag b/reference/opt/shaders/asm/frag/lut-promotion-initializer.asm.frag new file mode 100644 index 0000000000..d88c0e36d9 --- /dev/null +++ b/reference/opt/shaders/asm/frag/lut-promotion-initializer.asm.frag @@ -0,0 +1,42 @@ +#version 310 es +precision mediump float; +precision highp int; + +const float _46[16] = float[](1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0); +const vec4 _76[4] = vec4[](vec4(0.0), vec4(1.0), vec4(8.0), vec4(5.0)); + +layout(location = 0) out float FragColor; +layout(location = 0) flat in mediump int index; + +void main() +{ + vec4 foobar[4] = _76; + vec4 baz[4] = _76; + FragColor = _46[index]; + if (index < 10) + { + FragColor += _46[index ^ 1]; + } + else + { + FragColor += _46[index & 1]; + } + bool _99 = index > 30; + if (_99) + { + FragColor += _76[index & 3].y; + } + else + { + FragColor += _76[index & 1].x; + } + if (_99) + { + foobar[1].z = 20.0; + } + mediump int _37 = index & 3; + FragColor += foobar[_37].z; + baz = vec4[](vec4(20.0), vec4(30.0), vec4(50.0), vec4(60.0)); + FragColor += baz[_37].z; +} + diff --git a/reference/opt/shaders/comp/generate_height.comp b/reference/opt/shaders/comp/generate_height.comp index 17d3073d2e..ff96e7505a 100644 --- a/reference/opt/shaders/comp/generate_height.comp +++ b/reference/opt/shaders/comp/generate_height.comp @@ -48,8 +48,8 @@ void main() vec2 _387 = _316.xx; vec2 _392 = _316.yy; vec2 _395 = _392 * _137.distribution[_280].yx; - vec2 _421 = _392 * (_137.distribution[(_476 * _448) + _475]).yx; - vec2 _429 = ((_137.distribution[(_476 * _448) + _475]) * _387) + vec2(-_421.x, _421.y); + vec2 _421 = _392 * _137.distribution[(_476 * _448) + _475].yx; + vec2 _429 = (_137.distribution[(_476 * _448) + _475] * _387) + vec2(-_421.x, _421.y); _225.heights[_280] = packHalf2x16(((_137.distribution[_280] * _387) + vec2(-_395.x, _395.y)) + vec2(_429.x, -_429.y)); } diff --git a/reference/opt/shaders/flatten/copy.flatten.vert b/reference/opt/shaders/flatten/copy.flatten.vert index 27ce450d62..33caec4f78 100644 --- a/reference/opt/shaders/flatten/copy.flatten.vert +++ b/reference/opt/shaders/flatten/copy.flatten.vert @@ -19,7 +19,7 @@ void main() for (int _96 = 0; _96 < 4; ) { vec3 _68 = aVertex.xyz - Light(UBO[_96 * 2 + 4].xyz, UBO[_96 * 2 + 4].w, UBO[_96 * 2 + 5]).Position; - vColor += (((UBO[_96 * 2 + 5]) * clamp(1.0 - (length(_68) / Light(UBO[_96 * 2 + 4].xyz, UBO[_96 * 2 + 4].w, UBO[_96 * 2 + 5]).Radius), 0.0, 1.0)) * dot(aNormal, normalize(_68))); + vColor += ((UBO[_96 * 2 + 5] * clamp(1.0 - (length(_68) / Light(UBO[_96 * 2 + 4].xyz, UBO[_96 * 2 + 4].w, UBO[_96 * 2 + 5]).Radius), 0.0, 1.0)) * dot(aNormal, normalize(_68))); _96++; continue; } diff --git a/reference/opt/shaders/flatten/dynamic.flatten.vert b/reference/opt/shaders/flatten/dynamic.flatten.vert index 8fc8ff6eed..7129af2869 100644 --- a/reference/opt/shaders/flatten/dynamic.flatten.vert +++ b/reference/opt/shaders/flatten/dynamic.flatten.vert @@ -18,8 +18,8 @@ void main() vColor = vec4(0.0); for (int _82 = 0; _82 < 4; ) { - vec3 _54 = aVertex.xyz - (UBO[_82 * 2 + 4].xyz); - vColor += (((UBO[_82 * 2 + 5]) * clamp(1.0 - (length(_54) / (UBO[_82 * 2 + 4].w)), 0.0, 1.0)) * dot(aNormal, normalize(_54))); + vec3 _54 = aVertex.xyz - UBO[_82 * 2 + 4].xyz; + vColor += ((UBO[_82 * 2 + 5] * clamp(1.0 - (length(_54) / UBO[_82 * 2 + 4].w), 0.0, 1.0)) * dot(aNormal, normalize(_54))); _82++; continue; } diff --git a/reference/opt/shaders/flatten/multi-dimensional.desktop.flatten_dim.frag b/reference/opt/shaders/flatten/multi-dimensional.desktop.flatten_dim.frag index 3657298f18..2ecee89377 100644 --- a/reference/opt/shaders/flatten/multi-dimensional.desktop.flatten_dim.frag +++ b/reference/opt/shaders/flatten/multi-dimensional.desktop.flatten_dim.frag @@ -25,6 +25,6 @@ void main() } } } - FragColor = ((values3[1 * 3 * 1 + 2 * 1 + 0]) + (values3[0 * 3 * 1 + 2 * 1 + 0])) + (values3[(vIndex + 1) * 3 * 1 + 2 * 1 + vIndex]); + FragColor = (values3[1 * 3 * 1 + 2 * 1 + 0] + values3[0 * 3 * 1 + 2 * 1 + 0]) + values3[(vIndex + 1) * 3 * 1 + 2 * 1 + vIndex]; } diff --git a/reference/opt/shaders/frag/array-lut-no-loop-variable.frag b/reference/opt/shaders/frag/array-lut-no-loop-variable.frag index 7bdfe543e4..869a76e4e7 100644 --- a/reference/opt/shaders/frag/array-lut-no-loop-variable.frag +++ b/reference/opt/shaders/frag/array-lut-no-loop-variable.frag @@ -2,15 +2,16 @@ precision mediump float; precision highp int; +const float _17[5] = float[](1.0, 2.0, 3.0, 4.0, 5.0); + layout(location = 0) out vec4 FragColor; void main() { - float lut[5] = float[](1.0, 2.0, 3.0, 4.0, 5.0); for (int _46 = 0; _46 < 4; ) { mediump int _33 = _46 + 1; - FragColor += vec4(lut[_33]); + FragColor += vec4(_17[_33]); _46 = _33; continue; } diff --git a/reference/opt/shaders/frag/constant-array.frag b/reference/opt/shaders/frag/constant-array.frag index 2af87ad80d..749fc80980 100644 --- a/reference/opt/shaders/frag/constant-array.frag +++ b/reference/opt/shaders/frag/constant-array.frag @@ -2,6 +2,9 @@ precision mediump float; precision highp int; +const vec4 _37[3] = vec4[](vec4(1.0), vec4(2.0), vec4(3.0)); +const vec4 _55[2][2] = vec4[][](vec4[](vec4(1.0), vec4(2.0)), vec4[](vec4(8.0), vec4(10.0))); + struct Foobar { float a; @@ -13,9 +16,7 @@ layout(location = 0) flat in mediump int index; void main() { - highp vec4 indexable[3] = vec4[](vec4(1.0), vec4(2.0), vec4(3.0)); - highp vec4 indexable_1[2][2] = vec4[][](vec4[](vec4(1.0), vec4(2.0)), vec4[](vec4(8.0), vec4(10.0))); - Foobar indexable_2[2] = Foobar[](Foobar(10.0, 40.0), Foobar(90.0, 70.0)); - FragColor = ((indexable[index] + (indexable_1[index][index + 1])) + vec4(30.0)) + vec4(indexable_2[index].a + indexable_2[index].b); + Foobar indexable[2] = Foobar[](Foobar(10.0, 40.0), Foobar(90.0, 70.0)); + FragColor = ((_37[index] + _55[index][index + 1]) + vec4(30.0)) + vec4(indexable[index].a + indexable[index].b); } diff --git a/reference/opt/shaders/frag/constant-composites.frag b/reference/opt/shaders/frag/constant-composites.frag index b105dbd26c..c65c60613d 100644 --- a/reference/opt/shaders/frag/constant-composites.frag +++ b/reference/opt/shaders/frag/constant-composites.frag @@ -18,6 +18,6 @@ void main() lut = float[](1.0, 4.0, 3.0, 2.0); foos = Foo[](Foo(10.0, 20.0), Foo(30.0, 40.0)); FragColor = vec4(lut[line]); - FragColor += vec4(foos[line].a * (foos[1 - line].a)); + FragColor += vec4(foos[line].a * foos[1 - line].a); } diff --git a/reference/opt/shaders/frag/lut-promotion.frag b/reference/opt/shaders/frag/lut-promotion.frag new file mode 100644 index 0000000000..21c925796e --- /dev/null +++ b/reference/opt/shaders/frag/lut-promotion.frag @@ -0,0 +1,42 @@ +#version 310 es +precision mediump float; +precision highp int; + +const float _16[16] = float[](1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0); +const vec4 _60[4] = vec4[](vec4(0.0), vec4(1.0), vec4(8.0), vec4(5.0)); + +layout(location = 0) out float FragColor; +layout(location = 0) flat in mediump int index; + +void main() +{ + FragColor = _16[index]; + if (index < 10) + { + FragColor += _16[index ^ 1]; + } + else + { + FragColor += _16[index & 1]; + } + bool _63 = index > 30; + if (_63) + { + FragColor += _60[index & 3].y; + } + else + { + FragColor += _60[index & 1].x; + } + vec4 foobar[4] = _60; + if (_63) + { + foobar[1].z = 20.0; + } + mediump int _91 = index & 3; + FragColor += foobar[_91].z; + vec4 baz[4] = _60; + baz = vec4[](vec4(20.0), vec4(30.0), vec4(50.0), vec4(60.0)); + FragColor += baz[_91].z; +} + diff --git a/reference/shaders-hlsl/asm/frag/lut-promotion-initializer.asm.frag b/reference/shaders-hlsl/asm/frag/lut-promotion-initializer.asm.frag new file mode 100644 index 0000000000..a7aec01bad --- /dev/null +++ b/reference/shaders-hlsl/asm/frag/lut-promotion-initializer.asm.frag @@ -0,0 +1,55 @@ +static const float _46[16] = { 1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f }; +static const float4 _76[4] = { 0.0f.xxxx, 1.0f.xxxx, 8.0f.xxxx, 5.0f.xxxx }; +static const float4 _90[4] = { 20.0f.xxxx, 30.0f.xxxx, 50.0f.xxxx, 60.0f.xxxx }; + +static float FragColor; +static int index; + +struct SPIRV_Cross_Input +{ + nointerpolation int index : TEXCOORD0; +}; + +struct SPIRV_Cross_Output +{ + float FragColor : SV_Target0; +}; + +void frag_main() +{ + float4 foobar[4] = _76; + float4 baz[4] = _76; + FragColor = _46[index]; + if (index < 10) + { + FragColor += _46[index ^ 1]; + } + else + { + FragColor += _46[index & 1]; + } + if (index > 30) + { + FragColor += _76[index & 3].y; + } + else + { + FragColor += _76[index & 1].x; + } + if (index > 30) + { + foobar[1].z = 20.0f; + } + FragColor += foobar[index & 3].z; + baz = _90; + FragColor += baz[index & 3].z; +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + index = stage_input.index; + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/shaders-hlsl/frag/array-lut-no-loop-variable.frag b/reference/shaders-hlsl/frag/array-lut-no-loop-variable.frag index 04d4d7fa75..407fa2bda4 100644 --- a/reference/shaders-hlsl/frag/array-lut-no-loop-variable.frag +++ b/reference/shaders-hlsl/frag/array-lut-no-loop-variable.frag @@ -15,8 +15,7 @@ struct SPIRV_Cross_Output void frag_main() { - float lut[5] = _17; - for (int i = 0; i < 4; i++, FragColor += lut[i].xxxx) + for (int i = 0; i < 4; i++, FragColor += _17[i].xxxx) { } } diff --git a/reference/shaders-hlsl/frag/constant-composites.frag b/reference/shaders-hlsl/frag/constant-composites.frag index 0514eef1ee..2613e1c2c5 100644 --- a/reference/shaders-hlsl/frag/constant-composites.frag +++ b/reference/shaders-hlsl/frag/constant-composites.frag @@ -30,7 +30,7 @@ void frag_main() lut = _16; foos = _28; FragColor = lut[_line].xxxx; - FragColor += (foos[_line].a * (foos[1 - _line].a)).xxxx; + FragColor += (foos[_line].a * foos[1 - _line].a).xxxx; } SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) diff --git a/reference/shaders-hlsl/frag/lut-promotion.frag b/reference/shaders-hlsl/frag/lut-promotion.frag new file mode 100644 index 0000000000..d148bc12cb --- /dev/null +++ b/reference/shaders-hlsl/frag/lut-promotion.frag @@ -0,0 +1,55 @@ +static const float _16[16] = { 1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f }; +static const float4 _60[4] = { 0.0f.xxxx, 1.0f.xxxx, 8.0f.xxxx, 5.0f.xxxx }; +static const float4 _104[4] = { 20.0f.xxxx, 30.0f.xxxx, 50.0f.xxxx, 60.0f.xxxx }; + +static float FragColor; +static int index; + +struct SPIRV_Cross_Input +{ + nointerpolation int index : TEXCOORD0; +}; + +struct SPIRV_Cross_Output +{ + float FragColor : SV_Target0; +}; + +void frag_main() +{ + FragColor = _16[index]; + if (index < 10) + { + FragColor += _16[index ^ 1]; + } + else + { + FragColor += _16[index & 1]; + } + if (index > 30) + { + FragColor += _60[index & 3].y; + } + else + { + FragColor += _60[index & 1].x; + } + float4 foobar[4] = _60; + if (index > 30) + { + foobar[1].z = 20.0f; + } + FragColor += foobar[index & 3].z; + float4 baz[4] = _60; + baz = _104; + FragColor += baz[index & 3].z; +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + index = stage_input.index; + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/shaders-msl/asm/frag/lut-promotion-initializer.asm.frag b/reference/shaders-msl/asm/frag/lut-promotion-initializer.asm.frag new file mode 100644 index 0000000000..72d41f3e70 --- /dev/null +++ b/reference/shaders-msl/asm/frag/lut-promotion-initializer.asm.frag @@ -0,0 +1,67 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +constant float _46[16] = {1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0}; +constant float4 _76[4] = {float4(0.0), float4(1.0), float4(8.0), float4(5.0)}; +constant float4 _90[4] = {float4(20.0), float4(30.0), float4(50.0), float4(60.0)}; + +struct main0_out +{ + float FragColor [[color(0)]]; +}; + +struct main0_in +{ + int index [[user(locn0)]]; +}; + +// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment. +template +void spvArrayCopy(thread T (&dst)[N], thread const T (&src)[N]) +{ + for (uint i = 0; i < N; dst[i] = src[i], i++); +} + +// An overload for constant arrays. +template +void spvArrayCopyConstant(thread T (&dst)[N], constant T (&src)[N]) +{ + for (uint i = 0; i < N; dst[i] = src[i], i++); +} + +fragment main0_out main0(main0_in in [[stage_in]]) +{ + float4 foobar[4] = {float4(0.0), float4(1.0), float4(8.0), float4(5.0)}; + float4 baz[4] = {float4(0.0), float4(1.0), float4(8.0), float4(5.0)}; + main0_out out = {}; + out.FragColor = _46[in.index]; + if (in.index < 10) + { + out.FragColor += _46[in.index ^ 1]; + } + else + { + out.FragColor += _46[in.index & 1]; + } + if (in.index > 30) + { + out.FragColor += _76[in.index & 3].y; + } + else + { + out.FragColor += _76[in.index & 1].x; + } + if (in.index > 30) + { + foobar[1].z = 20.0; + } + out.FragColor += foobar[in.index & 3].z; + spvArrayCopyConstant(baz, _90); + out.FragColor += baz[in.index & 3].z; + return out; +} + diff --git a/reference/shaders-msl/asm/frag/op-constant-null.asm.frag b/reference/shaders-msl/asm/frag/op-constant-null.asm.frag index 1d9d11c978..670cb5f371 100644 --- a/reference/shaders-msl/asm/frag/op-constant-null.asm.frag +++ b/reference/shaders-msl/asm/frag/op-constant-null.asm.frag @@ -1,5 +1,3 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" - #include #include @@ -18,20 +16,6 @@ struct main0_out float FragColor [[color(0)]]; }; -// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment. -template -void spvArrayCopy(thread T (&dst)[N], thread const T (&src)[N]) -{ - for (uint i = 0; i < N; dst[i] = src[i], i++); -} - -// An overload for constant arrays. -template -void spvArrayCopyConstant(thread T (&dst)[N], constant T (&src)[N]) -{ - for (uint i = 0; i < N; dst[i] = src[i], i++); -} - fragment main0_out main0() { main0_out out = {}; @@ -39,7 +23,6 @@ fragment main0_out main0() float4 b = float4(0.0); float2x3 c = float2x3(float3(0.0), float3(0.0)); D d = {float4(0.0), 0.0}; - float4 e[4] = {float4(0.0), float4(0.0), float4(0.0), float4(0.0)}; out.FragColor = a; return out; } diff --git a/reference/shaders-msl/comp/composite-construct.comp b/reference/shaders-msl/comp/composite-construct.comp index fb4ed1f101..ef0412b1dc 100644 --- a/reference/shaders-msl/comp/composite-construct.comp +++ b/reference/shaders-msl/comp/composite-construct.comp @@ -42,8 +42,7 @@ kernel void main0(device SSBO0& _16 [[buffer(0)]], device SSBO1& _32 [[buffer(1) float4 _37[2] = { _16.as[gl_GlobalInvocationID.x], _32.bs[gl_GlobalInvocationID.x] }; float4 values[2]; spvArrayCopy(values, _37); - float4 copy_values[2] = {float4(20.0), float4(40.0)}; - Composite c = Composite{ values[0], copy_values[1] }; + Composite c = Composite{ values[0], _43[1] }; _16.as[0] = values[gl_LocalInvocationIndex]; _32.bs[1] = c.b; } diff --git a/reference/shaders-msl/frag/array-lut-no-loop-variable.frag b/reference/shaders-msl/frag/array-lut-no-loop-variable.frag index 8943a8e5a1..37f83aae26 100644 --- a/reference/shaders-msl/frag/array-lut-no-loop-variable.frag +++ b/reference/shaders-msl/frag/array-lut-no-loop-variable.frag @@ -1,5 +1,3 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" - #include #include @@ -12,25 +10,10 @@ struct main0_out float4 FragColor [[color(0)]]; }; -// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment. -template -void spvArrayCopy(thread T (&dst)[N], thread const T (&src)[N]) -{ - for (uint i = 0; i < N; dst[i] = src[i], i++); -} - -// An overload for constant arrays. -template -void spvArrayCopyConstant(thread T (&dst)[N], constant T (&src)[N]) -{ - for (uint i = 0; i < N; dst[i] = src[i], i++); -} - fragment main0_out main0() { main0_out out = {}; - float lut[5] = {1.0, 2.0, 3.0, 4.0, 5.0}; - for (int i = 0; i < 4; i++, out.FragColor += float4(lut[i])) + for (int i = 0; i < 4; i++, out.FragColor += float4(_17[i])) { } return out; diff --git a/reference/shaders-msl/frag/constant-array.frag b/reference/shaders-msl/frag/constant-array.frag index 09f123b29a..212d4f0803 100644 --- a/reference/shaders-msl/frag/constant-array.frag +++ b/reference/shaders-msl/frag/constant-array.frag @@ -49,12 +49,10 @@ float4 resolve(thread const Foobar& f) fragment main0_out main0(main0_in in [[stage_in]]) { main0_out out = {}; - float4 indexable[3] = {float4(1.0), float4(2.0), float4(3.0)}; - float4 indexable_1[2][2] = {{float4(1.0), float4(2.0)}, {float4(8.0), float4(10.0)}}; Foobar param = {10.0, 20.0}; - Foobar indexable_2[2] = {{10.0, 40.0}, {90.0, 70.0}}; - Foobar param_1 = indexable_2[in.index]; - out.FragColor = ((indexable[in.index] + (indexable_1[in.index][in.index + 1])) + resolve(param)) + resolve(param_1); + Foobar indexable[2] = {{10.0, 40.0}, {90.0, 70.0}}; + Foobar param_1 = indexable[in.index]; + out.FragColor = ((_37[in.index] + _55[in.index][in.index + 1]) + resolve(param)) + resolve(param_1); return out; } diff --git a/reference/shaders-msl/frag/constant-composites.frag b/reference/shaders-msl/frag/constant-composites.frag index ec5d66e86d..cb3e592337 100644 --- a/reference/shaders-msl/frag/constant-composites.frag +++ b/reference/shaders-msl/frag/constant-composites.frag @@ -44,7 +44,7 @@ fragment main0_out main0(main0_in in [[stage_in]]) float lut[4] = {1.0, 4.0, 3.0, 2.0}; Foo foos[2] = {{10.0, 20.0}, {30.0, 40.0}}; out.FragColor = float4(lut[in.line]); - out.FragColor += float4(foos[in.line].a * (foos[1 - in.line].a)); + out.FragColor += float4(foos[in.line].a * foos[1 - in.line].a); return out; } diff --git a/reference/shaders-msl/frag/lut-promotion.frag b/reference/shaders-msl/frag/lut-promotion.frag new file mode 100644 index 0000000000..a8eda3e13a --- /dev/null +++ b/reference/shaders-msl/frag/lut-promotion.frag @@ -0,0 +1,67 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +constant float _16[16] = {1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0}; +constant float4 _60[4] = {float4(0.0), float4(1.0), float4(8.0), float4(5.0)}; +constant float4 _104[4] = {float4(20.0), float4(30.0), float4(50.0), float4(60.0)}; + +struct main0_out +{ + float FragColor [[color(0)]]; +}; + +struct main0_in +{ + int index [[user(locn0)]]; +}; + +// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment. +template +void spvArrayCopy(thread T (&dst)[N], thread const T (&src)[N]) +{ + for (uint i = 0; i < N; dst[i] = src[i], i++); +} + +// An overload for constant arrays. +template +void spvArrayCopyConstant(thread T (&dst)[N], constant T (&src)[N]) +{ + for (uint i = 0; i < N; dst[i] = src[i], i++); +} + +fragment main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + out.FragColor = _16[in.index]; + if (in.index < 10) + { + out.FragColor += _16[in.index ^ 1]; + } + else + { + out.FragColor += _16[in.index & 1]; + } + if (in.index > 30) + { + out.FragColor += _60[in.index & 3].y; + } + else + { + out.FragColor += _60[in.index & 1].x; + } + float4 foobar[4] = {float4(0.0), float4(1.0), float4(8.0), float4(5.0)}; + if (in.index > 30) + { + foobar[1].z = 20.0; + } + out.FragColor += foobar[in.index & 3].z; + float4 baz[4] = {float4(0.0), float4(1.0), float4(8.0), float4(5.0)}; + spvArrayCopyConstant(baz, _104); + out.FragColor += baz[in.index & 3].z; + return out; +} + diff --git a/reference/shaders/asm/frag/lut-promotion-initializer.asm.frag b/reference/shaders/asm/frag/lut-promotion-initializer.asm.frag new file mode 100644 index 0000000000..c08bc2c781 --- /dev/null +++ b/reference/shaders/asm/frag/lut-promotion-initializer.asm.frag @@ -0,0 +1,40 @@ +#version 310 es +precision mediump float; +precision highp int; + +const float _46[16] = float[](1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0); +const vec4 _76[4] = vec4[](vec4(0.0), vec4(1.0), vec4(8.0), vec4(5.0)); + +layout(location = 0) out float FragColor; +layout(location = 0) flat in mediump int index; + +void main() +{ + vec4 foobar[4] = _76; + vec4 baz[4] = _76; + FragColor = _46[index]; + if (index < 10) + { + FragColor += _46[index ^ 1]; + } + else + { + FragColor += _46[index & 1]; + } + if (index > 30) + { + FragColor += _76[index & 3].y; + } + else + { + FragColor += _76[index & 1].x; + } + if (index > 30) + { + foobar[1].z = 20.0; + } + FragColor += foobar[index & 3].z; + baz = vec4[](vec4(20.0), vec4(30.0), vec4(50.0), vec4(60.0)); + FragColor += baz[index & 3].z; +} + diff --git a/reference/shaders/asm/frag/op-constant-null.asm.frag b/reference/shaders/asm/frag/op-constant-null.asm.frag index c4ae981f64..970b4c4a66 100644 --- a/reference/shaders/asm/frag/op-constant-null.asm.frag +++ b/reference/shaders/asm/frag/op-constant-null.asm.frag @@ -2,6 +2,8 @@ precision mediump float; precision highp int; +const vec4 _14[4] = vec4[](vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0)); + struct D { vec4 a; @@ -16,7 +18,6 @@ void main() vec4 b = vec4(0.0); mat2x3 c = mat2x3(vec3(0.0), vec3(0.0)); D d = D(vec4(0.0), 0.0); - vec4 e[4] = vec4[](vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0)); FragColor = a; } diff --git a/reference/shaders/comp/composite-construct.comp b/reference/shaders/comp/composite-construct.comp index 91bb5348f5..3018be8f1b 100644 --- a/reference/shaders/comp/composite-construct.comp +++ b/reference/shaders/comp/composite-construct.comp @@ -1,6 +1,9 @@ #version 310 es layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; +const vec4 _66[2] = vec4[](vec4(10.0), vec4(30.0)); +const float _94[2][3] = float[][](float[](1.0, 1.0, 1.0), float[](2.0, 2.0, 2.0)); + struct Composite { vec4 a[2]; @@ -25,13 +28,11 @@ vec4 summe(vec4 values[3][2]) void main() { vec4 values[2] = vec4[](_41.as[gl_GlobalInvocationID.x], _55.bs[gl_GlobalInvocationID.x]); - vec4 const_values[2] = vec4[](vec4(10.0), vec4(30.0)); - vec4 copy_values[2] = const_values; + vec4 copy_values[2] = _66; vec4 copy_values2[2] = values; vec4 param[3][2] = vec4[][](values, copy_values, copy_values2); _41.as[gl_GlobalInvocationID.x] = summe(param); Composite c = Composite(values, copy_values); - float arrayofarray[2][3] = float[][](float[](1.0, 1.0, 1.0), float[](2.0, 2.0, 2.0)); float b = 10.0; float values_scalar[4] = float[](b, b, b, b); } diff --git a/reference/shaders/flatten/copy.flatten.vert b/reference/shaders/flatten/copy.flatten.vert index 2bdd723886..6416a4f425 100644 --- a/reference/shaders/flatten/copy.flatten.vert +++ b/reference/shaders/flatten/copy.flatten.vert @@ -23,7 +23,7 @@ void main() light.Radius = Light(UBO[i * 2 + 4].xyz, UBO[i * 2 + 4].w, UBO[i * 2 + 5]).Radius; light.Color = Light(UBO[i * 2 + 4].xyz, UBO[i * 2 + 4].w, UBO[i * 2 + 5]).Color; vec3 L = aVertex.xyz - light.Position; - vColor += (((UBO[i * 2 + 5]) * clamp(1.0 - (length(L) / light.Radius), 0.0, 1.0)) * dot(aNormal, normalize(L))); + vColor += ((UBO[i * 2 + 5] * clamp(1.0 - (length(L) / light.Radius), 0.0, 1.0)) * dot(aNormal, normalize(L))); } } diff --git a/reference/shaders/flatten/dynamic.flatten.vert b/reference/shaders/flatten/dynamic.flatten.vert index 6214ca450a..8be397ea3f 100644 --- a/reference/shaders/flatten/dynamic.flatten.vert +++ b/reference/shaders/flatten/dynamic.flatten.vert @@ -18,8 +18,8 @@ void main() vColor = vec4(0.0); for (int i = 0; i < 4; i++) { - vec3 L = aVertex.xyz - (UBO[i * 2 + 4].xyz); - vColor += (((UBO[i * 2 + 5]) * clamp(1.0 - (length(L) / (UBO[i * 2 + 4].w)), 0.0, 1.0)) * dot(aNormal, normalize(L))); + vec3 L = aVertex.xyz - UBO[i * 2 + 4].xyz; + vColor += ((UBO[i * 2 + 5] * clamp(1.0 - (length(L) / UBO[i * 2 + 4].w), 0.0, 1.0)) * dot(aNormal, normalize(L))); } } diff --git a/reference/shaders/flatten/multi-dimensional.desktop.flatten_dim.frag b/reference/shaders/flatten/multi-dimensional.desktop.flatten_dim.frag index 21c3363ca6..ef6bb526ab 100644 --- a/reference/shaders/flatten/multi-dimensional.desktop.flatten_dim.frag +++ b/reference/shaders/flatten/multi-dimensional.desktop.flatten_dim.frag @@ -19,6 +19,6 @@ void main() } } } - FragColor = ((values3[1 * 3 * 1 + 2 * 1 + 0]) + (values3[0 * 3 * 1 + 2 * 1 + 0])) + (values3[(vIndex + 1) * 3 * 1 + 2 * 1 + vIndex]); + FragColor = (values3[1 * 3 * 1 + 2 * 1 + 0] + values3[0 * 3 * 1 + 2 * 1 + 0]) + values3[(vIndex + 1) * 3 * 1 + 2 * 1 + vIndex]; } diff --git a/reference/shaders/frag/array-lut-no-loop-variable.frag b/reference/shaders/frag/array-lut-no-loop-variable.frag index 54d7bf774c..baf2302519 100644 --- a/reference/shaders/frag/array-lut-no-loop-variable.frag +++ b/reference/shaders/frag/array-lut-no-loop-variable.frag @@ -2,12 +2,13 @@ precision mediump float; precision highp int; +const float _17[5] = float[](1.0, 2.0, 3.0, 4.0, 5.0); + layout(location = 0) out vec4 FragColor; void main() { - float lut[5] = float[](1.0, 2.0, 3.0, 4.0, 5.0); - for (mediump int i = 0; i < 4; i++, FragColor += vec4(lut[i])) + for (mediump int i = 0; i < 4; i++, FragColor += vec4(_17[i])) { } } diff --git a/reference/shaders/frag/constant-array.frag b/reference/shaders/frag/constant-array.frag index 4da9b8948b..be033f3873 100644 --- a/reference/shaders/frag/constant-array.frag +++ b/reference/shaders/frag/constant-array.frag @@ -2,6 +2,9 @@ precision mediump float; precision highp int; +const vec4 _37[3] = vec4[](vec4(1.0), vec4(2.0), vec4(3.0)); +const vec4 _55[2][2] = vec4[][](vec4[](vec4(1.0), vec4(2.0)), vec4[](vec4(8.0), vec4(10.0))); + struct Foobar { float a; @@ -18,11 +21,9 @@ vec4 resolve(Foobar f) void main() { - highp vec4 indexable[3] = vec4[](vec4(1.0), vec4(2.0), vec4(3.0)); - highp vec4 indexable_1[2][2] = vec4[][](vec4[](vec4(1.0), vec4(2.0)), vec4[](vec4(8.0), vec4(10.0))); Foobar param = Foobar(10.0, 20.0); - Foobar indexable_2[2] = Foobar[](Foobar(10.0, 40.0), Foobar(90.0, 70.0)); - Foobar param_1 = indexable_2[index]; - FragColor = ((indexable[index] + (indexable_1[index][index + 1])) + resolve(param)) + resolve(param_1); + Foobar indexable[2] = Foobar[](Foobar(10.0, 40.0), Foobar(90.0, 70.0)); + Foobar param_1 = indexable[index]; + FragColor = ((_37[index] + _55[index][index + 1]) + resolve(param)) + resolve(param_1); } diff --git a/reference/shaders/frag/constant-composites.frag b/reference/shaders/frag/constant-composites.frag index b105dbd26c..c65c60613d 100644 --- a/reference/shaders/frag/constant-composites.frag +++ b/reference/shaders/frag/constant-composites.frag @@ -18,6 +18,6 @@ void main() lut = float[](1.0, 4.0, 3.0, 2.0); foos = Foo[](Foo(10.0, 20.0), Foo(30.0, 40.0)); FragColor = vec4(lut[line]); - FragColor += vec4(foos[line].a * (foos[1 - line].a)); + FragColor += vec4(foos[line].a * foos[1 - line].a); } diff --git a/reference/shaders/frag/lut-promotion.frag b/reference/shaders/frag/lut-promotion.frag new file mode 100644 index 0000000000..019393f9f3 --- /dev/null +++ b/reference/shaders/frag/lut-promotion.frag @@ -0,0 +1,40 @@ +#version 310 es +precision mediump float; +precision highp int; + +const float _16[16] = float[](1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0); +const vec4 _60[4] = vec4[](vec4(0.0), vec4(1.0), vec4(8.0), vec4(5.0)); + +layout(location = 0) out float FragColor; +layout(location = 0) flat in mediump int index; + +void main() +{ + FragColor = _16[index]; + if (index < 10) + { + FragColor += _16[index ^ 1]; + } + else + { + FragColor += _16[index & 1]; + } + if (index > 30) + { + FragColor += _60[index & 3].y; + } + else + { + FragColor += _60[index & 1].x; + } + vec4 foobar[4] = _60; + if (index > 30) + { + foobar[1].z = 20.0; + } + FragColor += foobar[index & 3].z; + vec4 baz[4] = _60; + baz = vec4[](vec4(20.0), vec4(30.0), vec4(50.0), vec4(60.0)); + FragColor += baz[index & 3].z; +} + diff --git a/shaders-hlsl/asm/frag/lut-promotion-initializer.asm.frag b/shaders-hlsl/asm/frag/lut-promotion-initializer.asm.frag new file mode 100644 index 0000000000..320e5ebfbd --- /dev/null +++ b/shaders-hlsl/asm/frag/lut-promotion-initializer.asm.frag @@ -0,0 +1,195 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 6 +; Bound: 111 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %FragColor %index + OpExecutionMode %main OriginUpperLeft + OpSource ESSL 310 + OpName %main "main" + OpName %FragColor "FragColor" + OpName %index "index" + OpName %indexable "indexable" + OpName %indexable_0 "indexable" + OpName %indexable_1 "indexable" + OpName %foo "foo" + OpName %foobar "foobar" + OpName %baz "baz" + OpDecorate %FragColor RelaxedPrecision + OpDecorate %FragColor Location 0 + OpDecorate %index RelaxedPrecision + OpDecorate %index Flat + OpDecorate %index Location 0 + OpDecorate %20 RelaxedPrecision + OpDecorate %25 RelaxedPrecision + OpDecorate %26 RelaxedPrecision + OpDecorate %32 RelaxedPrecision + OpDecorate %34 RelaxedPrecision + OpDecorate %37 RelaxedPrecision + OpDecorate %38 RelaxedPrecision + OpDecorate %39 RelaxedPrecision + OpDecorate %41 RelaxedPrecision + OpDecorate %42 RelaxedPrecision + OpDecorate %45 RelaxedPrecision + OpDecorate %46 RelaxedPrecision + OpDecorate %47 RelaxedPrecision + OpDecorate %foo RelaxedPrecision + OpDecorate %61 RelaxedPrecision + OpDecorate %66 RelaxedPrecision + OpDecorate %68 RelaxedPrecision + OpDecorate %71 RelaxedPrecision + OpDecorate %72 RelaxedPrecision + OpDecorate %73 RelaxedPrecision + OpDecorate %75 RelaxedPrecision + OpDecorate %76 RelaxedPrecision + OpDecorate %79 RelaxedPrecision + OpDecorate %80 RelaxedPrecision + OpDecorate %81 RelaxedPrecision + OpDecorate %foobar RelaxedPrecision + OpDecorate %83 RelaxedPrecision + OpDecorate %90 RelaxedPrecision + OpDecorate %91 RelaxedPrecision + OpDecorate %93 RelaxedPrecision + OpDecorate %94 RelaxedPrecision + OpDecorate %95 RelaxedPrecision + OpDecorate %baz RelaxedPrecision + OpDecorate %105 RelaxedPrecision + OpDecorate %106 RelaxedPrecision + OpDecorate %108 RelaxedPrecision + OpDecorate %109 RelaxedPrecision + OpDecorate %110 RelaxedPrecision + OpDecorate %16 RelaxedPrecision + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 +%_ptr_Output_float = OpTypePointer Output %float + %FragColor = OpVariable %_ptr_Output_float Output + %uint = OpTypeInt 32 0 + %uint_16 = OpConstant %uint 16 +%_arr_float_uint_16 = OpTypeArray %float %uint_16 + %float_1 = OpConstant %float 1 + %float_2 = OpConstant %float 2 + %float_3 = OpConstant %float 3 + %float_4 = OpConstant %float 4 + %16 = OpConstantComposite %_arr_float_uint_16 %float_1 %float_2 %float_3 %float_4 %float_1 %float_2 %float_3 %float_4 %float_1 %float_2 %float_3 %float_4 %float_1 %float_2 %float_3 %float_4 + %int = OpTypeInt 32 1 +%_ptr_Input_int = OpTypePointer Input %int + %index = OpVariable %_ptr_Input_int Input +%_ptr_Function__arr_float_uint_16 = OpTypePointer Function %_arr_float_uint_16 +%_ptr_Function_float = OpTypePointer Function %float + %int_10 = OpConstant %int 10 + %bool = OpTypeBool + %int_1 = OpConstant %int 1 + %v4float = OpTypeVector %float 4 + %uint_4 = OpConstant %uint 4 +%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4 +%_ptr_Function__arr_v4float_uint_4 = OpTypePointer Function %_arr_v4float_uint_4 + %float_0 = OpConstant %float 0 + %54 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0 + %55 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 + %float_8 = OpConstant %float 8 + %57 = OpConstantComposite %v4float %float_8 %float_8 %float_8 %float_8 + %float_5 = OpConstant %float 5 + %59 = OpConstantComposite %v4float %float_5 %float_5 %float_5 %float_5 + %60 = OpConstantComposite %_arr_v4float_uint_4 %54 %55 %57 %59 + %int_30 = OpConstant %int 30 + %int_3 = OpConstant %int 3 + %uint_1 = OpConstant %uint 1 + %uint_0 = OpConstant %uint 0 + %float_20 = OpConstant %float 20 + %uint_2 = OpConstant %uint 2 + %97 = OpConstantComposite %v4float %float_20 %float_20 %float_20 %float_20 + %float_30 = OpConstant %float 30 + %99 = OpConstantComposite %v4float %float_30 %float_30 %float_30 %float_30 + %float_50 = OpConstant %float 50 + %101 = OpConstantComposite %v4float %float_50 %float_50 %float_50 %float_50 + %float_60 = OpConstant %float 60 + %103 = OpConstantComposite %v4float %float_60 %float_60 %float_60 %float_60 + %104 = OpConstantComposite %_arr_v4float_uint_4 %97 %99 %101 %103 + %main = OpFunction %void None %3 + %5 = OpLabel + %indexable = OpVariable %_ptr_Function__arr_float_uint_16 Function %16 +%indexable_0 = OpVariable %_ptr_Function__arr_float_uint_16 Function %16 +%indexable_1 = OpVariable %_ptr_Function__arr_float_uint_16 Function %16 + %foo = OpVariable %_ptr_Function__arr_v4float_uint_4 Function %60 + %foobar = OpVariable %_ptr_Function__arr_v4float_uint_4 Function %60 + %baz = OpVariable %_ptr_Function__arr_v4float_uint_4 Function %60 + %20 = OpLoad %int %index + %24 = OpAccessChain %_ptr_Function_float %indexable %20 + %25 = OpLoad %float %24 + OpStore %FragColor %25 + %26 = OpLoad %int %index + %29 = OpSLessThan %bool %26 %int_10 + OpSelectionMerge %31 None + OpBranchConditional %29 %30 %40 + %30 = OpLabel + %32 = OpLoad %int %index + %34 = OpBitwiseXor %int %32 %int_1 + %36 = OpAccessChain %_ptr_Function_float %indexable_0 %34 + %37 = OpLoad %float %36 + %38 = OpLoad %float %FragColor + %39 = OpFAdd %float %38 %37 + OpStore %FragColor %39 + OpBranch %31 + %40 = OpLabel + %41 = OpLoad %int %index + %42 = OpBitwiseAnd %int %41 %int_1 + %44 = OpAccessChain %_ptr_Function_float %indexable_1 %42 + %45 = OpLoad %float %44 + %46 = OpLoad %float %FragColor + %47 = OpFAdd %float %46 %45 + OpStore %FragColor %47 + OpBranch %31 + %31 = OpLabel + %61 = OpLoad %int %index + %63 = OpSGreaterThan %bool %61 %int_30 + OpSelectionMerge %65 None + OpBranchConditional %63 %64 %74 + %64 = OpLabel + %66 = OpLoad %int %index + %68 = OpBitwiseAnd %int %66 %int_3 + %70 = OpAccessChain %_ptr_Function_float %foo %68 %uint_1 + %71 = OpLoad %float %70 + %72 = OpLoad %float %FragColor + %73 = OpFAdd %float %72 %71 + OpStore %FragColor %73 + OpBranch %65 + %74 = OpLabel + %75 = OpLoad %int %index + %76 = OpBitwiseAnd %int %75 %int_1 + %78 = OpAccessChain %_ptr_Function_float %foo %76 %uint_0 + %79 = OpLoad %float %78 + %80 = OpLoad %float %FragColor + %81 = OpFAdd %float %80 %79 + OpStore %FragColor %81 + OpBranch %65 + %65 = OpLabel + %83 = OpLoad %int %index + %84 = OpSGreaterThan %bool %83 %int_30 + OpSelectionMerge %86 None + OpBranchConditional %84 %85 %86 + %85 = OpLabel + %89 = OpAccessChain %_ptr_Function_float %foobar %int_1 %uint_2 + OpStore %89 %float_20 + OpBranch %86 + %86 = OpLabel + %90 = OpLoad %int %index + %91 = OpBitwiseAnd %int %90 %int_3 + %92 = OpAccessChain %_ptr_Function_float %foobar %91 %uint_2 + %93 = OpLoad %float %92 + %94 = OpLoad %float %FragColor + %95 = OpFAdd %float %94 %93 + OpStore %FragColor %95 + OpStore %baz %104 + %105 = OpLoad %int %index + %106 = OpBitwiseAnd %int %105 %int_3 + %107 = OpAccessChain %_ptr_Function_float %baz %106 %uint_2 + %108 = OpLoad %float %107 + %109 = OpLoad %float %FragColor + %110 = OpFAdd %float %109 %108 + OpStore %FragColor %110 + OpReturn + OpFunctionEnd diff --git a/shaders-hlsl/frag/lut-promotion.frag b/shaders-hlsl/frag/lut-promotion.frag new file mode 100644 index 0000000000..0cdc8148f9 --- /dev/null +++ b/shaders-hlsl/frag/lut-promotion.frag @@ -0,0 +1,44 @@ +#version 310 es +precision mediump float; +layout(location = 0) out float FragColor; +layout(location = 0) flat in int index; + +const float LUT[16] = float[]( + 1.0, 2.0, 3.0, 4.0, + 1.0, 2.0, 3.0, 4.0, + 1.0, 2.0, 3.0, 4.0, + 1.0, 2.0, 3.0, 4.0); + +void main() +{ + // Try reading LUTs, both in branches and not branch. + FragColor = LUT[index]; + if (index < 10) + FragColor += LUT[index ^ 1]; + else + FragColor += LUT[index & 1]; + + // Not declared as a LUT, but can be promoted to one. + vec4 foo[4] = vec4[](vec4(0.0), vec4(1.0), vec4(8.0), vec4(5.0)); + if (index > 30) + { + FragColor += foo[index & 3].y; + } + else + { + FragColor += foo[index & 1].x; + } + + // Not declared as a LUT, but this cannot be promoted, because we have a partial write. + vec4 foobar[4] = vec4[](vec4(0.0), vec4(1.0), vec4(8.0), vec4(5.0)); + if (index > 30) + { + foobar[1].z = 20.0; + } + FragColor += foobar[index & 3].z; + + // Not declared as a LUT, but this cannot be promoted, because we have two complete writes. + vec4 baz[4] = vec4[](vec4(0.0), vec4(1.0), vec4(8.0), vec4(5.0)); + baz = vec4[](vec4(20.0), vec4(30.0), vec4(50.0), vec4(60.0)); + FragColor += baz[index & 3].z; +} diff --git a/shaders-msl/asm/frag/lut-promotion-initializer.asm.frag b/shaders-msl/asm/frag/lut-promotion-initializer.asm.frag new file mode 100644 index 0000000000..320e5ebfbd --- /dev/null +++ b/shaders-msl/asm/frag/lut-promotion-initializer.asm.frag @@ -0,0 +1,195 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 6 +; Bound: 111 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %FragColor %index + OpExecutionMode %main OriginUpperLeft + OpSource ESSL 310 + OpName %main "main" + OpName %FragColor "FragColor" + OpName %index "index" + OpName %indexable "indexable" + OpName %indexable_0 "indexable" + OpName %indexable_1 "indexable" + OpName %foo "foo" + OpName %foobar "foobar" + OpName %baz "baz" + OpDecorate %FragColor RelaxedPrecision + OpDecorate %FragColor Location 0 + OpDecorate %index RelaxedPrecision + OpDecorate %index Flat + OpDecorate %index Location 0 + OpDecorate %20 RelaxedPrecision + OpDecorate %25 RelaxedPrecision + OpDecorate %26 RelaxedPrecision + OpDecorate %32 RelaxedPrecision + OpDecorate %34 RelaxedPrecision + OpDecorate %37 RelaxedPrecision + OpDecorate %38 RelaxedPrecision + OpDecorate %39 RelaxedPrecision + OpDecorate %41 RelaxedPrecision + OpDecorate %42 RelaxedPrecision + OpDecorate %45 RelaxedPrecision + OpDecorate %46 RelaxedPrecision + OpDecorate %47 RelaxedPrecision + OpDecorate %foo RelaxedPrecision + OpDecorate %61 RelaxedPrecision + OpDecorate %66 RelaxedPrecision + OpDecorate %68 RelaxedPrecision + OpDecorate %71 RelaxedPrecision + OpDecorate %72 RelaxedPrecision + OpDecorate %73 RelaxedPrecision + OpDecorate %75 RelaxedPrecision + OpDecorate %76 RelaxedPrecision + OpDecorate %79 RelaxedPrecision + OpDecorate %80 RelaxedPrecision + OpDecorate %81 RelaxedPrecision + OpDecorate %foobar RelaxedPrecision + OpDecorate %83 RelaxedPrecision + OpDecorate %90 RelaxedPrecision + OpDecorate %91 RelaxedPrecision + OpDecorate %93 RelaxedPrecision + OpDecorate %94 RelaxedPrecision + OpDecorate %95 RelaxedPrecision + OpDecorate %baz RelaxedPrecision + OpDecorate %105 RelaxedPrecision + OpDecorate %106 RelaxedPrecision + OpDecorate %108 RelaxedPrecision + OpDecorate %109 RelaxedPrecision + OpDecorate %110 RelaxedPrecision + OpDecorate %16 RelaxedPrecision + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 +%_ptr_Output_float = OpTypePointer Output %float + %FragColor = OpVariable %_ptr_Output_float Output + %uint = OpTypeInt 32 0 + %uint_16 = OpConstant %uint 16 +%_arr_float_uint_16 = OpTypeArray %float %uint_16 + %float_1 = OpConstant %float 1 + %float_2 = OpConstant %float 2 + %float_3 = OpConstant %float 3 + %float_4 = OpConstant %float 4 + %16 = OpConstantComposite %_arr_float_uint_16 %float_1 %float_2 %float_3 %float_4 %float_1 %float_2 %float_3 %float_4 %float_1 %float_2 %float_3 %float_4 %float_1 %float_2 %float_3 %float_4 + %int = OpTypeInt 32 1 +%_ptr_Input_int = OpTypePointer Input %int + %index = OpVariable %_ptr_Input_int Input +%_ptr_Function__arr_float_uint_16 = OpTypePointer Function %_arr_float_uint_16 +%_ptr_Function_float = OpTypePointer Function %float + %int_10 = OpConstant %int 10 + %bool = OpTypeBool + %int_1 = OpConstant %int 1 + %v4float = OpTypeVector %float 4 + %uint_4 = OpConstant %uint 4 +%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4 +%_ptr_Function__arr_v4float_uint_4 = OpTypePointer Function %_arr_v4float_uint_4 + %float_0 = OpConstant %float 0 + %54 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0 + %55 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 + %float_8 = OpConstant %float 8 + %57 = OpConstantComposite %v4float %float_8 %float_8 %float_8 %float_8 + %float_5 = OpConstant %float 5 + %59 = OpConstantComposite %v4float %float_5 %float_5 %float_5 %float_5 + %60 = OpConstantComposite %_arr_v4float_uint_4 %54 %55 %57 %59 + %int_30 = OpConstant %int 30 + %int_3 = OpConstant %int 3 + %uint_1 = OpConstant %uint 1 + %uint_0 = OpConstant %uint 0 + %float_20 = OpConstant %float 20 + %uint_2 = OpConstant %uint 2 + %97 = OpConstantComposite %v4float %float_20 %float_20 %float_20 %float_20 + %float_30 = OpConstant %float 30 + %99 = OpConstantComposite %v4float %float_30 %float_30 %float_30 %float_30 + %float_50 = OpConstant %float 50 + %101 = OpConstantComposite %v4float %float_50 %float_50 %float_50 %float_50 + %float_60 = OpConstant %float 60 + %103 = OpConstantComposite %v4float %float_60 %float_60 %float_60 %float_60 + %104 = OpConstantComposite %_arr_v4float_uint_4 %97 %99 %101 %103 + %main = OpFunction %void None %3 + %5 = OpLabel + %indexable = OpVariable %_ptr_Function__arr_float_uint_16 Function %16 +%indexable_0 = OpVariable %_ptr_Function__arr_float_uint_16 Function %16 +%indexable_1 = OpVariable %_ptr_Function__arr_float_uint_16 Function %16 + %foo = OpVariable %_ptr_Function__arr_v4float_uint_4 Function %60 + %foobar = OpVariable %_ptr_Function__arr_v4float_uint_4 Function %60 + %baz = OpVariable %_ptr_Function__arr_v4float_uint_4 Function %60 + %20 = OpLoad %int %index + %24 = OpAccessChain %_ptr_Function_float %indexable %20 + %25 = OpLoad %float %24 + OpStore %FragColor %25 + %26 = OpLoad %int %index + %29 = OpSLessThan %bool %26 %int_10 + OpSelectionMerge %31 None + OpBranchConditional %29 %30 %40 + %30 = OpLabel + %32 = OpLoad %int %index + %34 = OpBitwiseXor %int %32 %int_1 + %36 = OpAccessChain %_ptr_Function_float %indexable_0 %34 + %37 = OpLoad %float %36 + %38 = OpLoad %float %FragColor + %39 = OpFAdd %float %38 %37 + OpStore %FragColor %39 + OpBranch %31 + %40 = OpLabel + %41 = OpLoad %int %index + %42 = OpBitwiseAnd %int %41 %int_1 + %44 = OpAccessChain %_ptr_Function_float %indexable_1 %42 + %45 = OpLoad %float %44 + %46 = OpLoad %float %FragColor + %47 = OpFAdd %float %46 %45 + OpStore %FragColor %47 + OpBranch %31 + %31 = OpLabel + %61 = OpLoad %int %index + %63 = OpSGreaterThan %bool %61 %int_30 + OpSelectionMerge %65 None + OpBranchConditional %63 %64 %74 + %64 = OpLabel + %66 = OpLoad %int %index + %68 = OpBitwiseAnd %int %66 %int_3 + %70 = OpAccessChain %_ptr_Function_float %foo %68 %uint_1 + %71 = OpLoad %float %70 + %72 = OpLoad %float %FragColor + %73 = OpFAdd %float %72 %71 + OpStore %FragColor %73 + OpBranch %65 + %74 = OpLabel + %75 = OpLoad %int %index + %76 = OpBitwiseAnd %int %75 %int_1 + %78 = OpAccessChain %_ptr_Function_float %foo %76 %uint_0 + %79 = OpLoad %float %78 + %80 = OpLoad %float %FragColor + %81 = OpFAdd %float %80 %79 + OpStore %FragColor %81 + OpBranch %65 + %65 = OpLabel + %83 = OpLoad %int %index + %84 = OpSGreaterThan %bool %83 %int_30 + OpSelectionMerge %86 None + OpBranchConditional %84 %85 %86 + %85 = OpLabel + %89 = OpAccessChain %_ptr_Function_float %foobar %int_1 %uint_2 + OpStore %89 %float_20 + OpBranch %86 + %86 = OpLabel + %90 = OpLoad %int %index + %91 = OpBitwiseAnd %int %90 %int_3 + %92 = OpAccessChain %_ptr_Function_float %foobar %91 %uint_2 + %93 = OpLoad %float %92 + %94 = OpLoad %float %FragColor + %95 = OpFAdd %float %94 %93 + OpStore %FragColor %95 + OpStore %baz %104 + %105 = OpLoad %int %index + %106 = OpBitwiseAnd %int %105 %int_3 + %107 = OpAccessChain %_ptr_Function_float %baz %106 %uint_2 + %108 = OpLoad %float %107 + %109 = OpLoad %float %FragColor + %110 = OpFAdd %float %109 %108 + OpStore %FragColor %110 + OpReturn + OpFunctionEnd diff --git a/shaders-msl/frag/lut-promotion.frag b/shaders-msl/frag/lut-promotion.frag new file mode 100644 index 0000000000..0cdc8148f9 --- /dev/null +++ b/shaders-msl/frag/lut-promotion.frag @@ -0,0 +1,44 @@ +#version 310 es +precision mediump float; +layout(location = 0) out float FragColor; +layout(location = 0) flat in int index; + +const float LUT[16] = float[]( + 1.0, 2.0, 3.0, 4.0, + 1.0, 2.0, 3.0, 4.0, + 1.0, 2.0, 3.0, 4.0, + 1.0, 2.0, 3.0, 4.0); + +void main() +{ + // Try reading LUTs, both in branches and not branch. + FragColor = LUT[index]; + if (index < 10) + FragColor += LUT[index ^ 1]; + else + FragColor += LUT[index & 1]; + + // Not declared as a LUT, but can be promoted to one. + vec4 foo[4] = vec4[](vec4(0.0), vec4(1.0), vec4(8.0), vec4(5.0)); + if (index > 30) + { + FragColor += foo[index & 3].y; + } + else + { + FragColor += foo[index & 1].x; + } + + // Not declared as a LUT, but this cannot be promoted, because we have a partial write. + vec4 foobar[4] = vec4[](vec4(0.0), vec4(1.0), vec4(8.0), vec4(5.0)); + if (index > 30) + { + foobar[1].z = 20.0; + } + FragColor += foobar[index & 3].z; + + // Not declared as a LUT, but this cannot be promoted, because we have two complete writes. + vec4 baz[4] = vec4[](vec4(0.0), vec4(1.0), vec4(8.0), vec4(5.0)); + baz = vec4[](vec4(20.0), vec4(30.0), vec4(50.0), vec4(60.0)); + FragColor += baz[index & 3].z; +} diff --git a/shaders/asm/frag/lut-promotion-initializer.asm.frag b/shaders/asm/frag/lut-promotion-initializer.asm.frag new file mode 100644 index 0000000000..320e5ebfbd --- /dev/null +++ b/shaders/asm/frag/lut-promotion-initializer.asm.frag @@ -0,0 +1,195 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 6 +; Bound: 111 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %FragColor %index + OpExecutionMode %main OriginUpperLeft + OpSource ESSL 310 + OpName %main "main" + OpName %FragColor "FragColor" + OpName %index "index" + OpName %indexable "indexable" + OpName %indexable_0 "indexable" + OpName %indexable_1 "indexable" + OpName %foo "foo" + OpName %foobar "foobar" + OpName %baz "baz" + OpDecorate %FragColor RelaxedPrecision + OpDecorate %FragColor Location 0 + OpDecorate %index RelaxedPrecision + OpDecorate %index Flat + OpDecorate %index Location 0 + OpDecorate %20 RelaxedPrecision + OpDecorate %25 RelaxedPrecision + OpDecorate %26 RelaxedPrecision + OpDecorate %32 RelaxedPrecision + OpDecorate %34 RelaxedPrecision + OpDecorate %37 RelaxedPrecision + OpDecorate %38 RelaxedPrecision + OpDecorate %39 RelaxedPrecision + OpDecorate %41 RelaxedPrecision + OpDecorate %42 RelaxedPrecision + OpDecorate %45 RelaxedPrecision + OpDecorate %46 RelaxedPrecision + OpDecorate %47 RelaxedPrecision + OpDecorate %foo RelaxedPrecision + OpDecorate %61 RelaxedPrecision + OpDecorate %66 RelaxedPrecision + OpDecorate %68 RelaxedPrecision + OpDecorate %71 RelaxedPrecision + OpDecorate %72 RelaxedPrecision + OpDecorate %73 RelaxedPrecision + OpDecorate %75 RelaxedPrecision + OpDecorate %76 RelaxedPrecision + OpDecorate %79 RelaxedPrecision + OpDecorate %80 RelaxedPrecision + OpDecorate %81 RelaxedPrecision + OpDecorate %foobar RelaxedPrecision + OpDecorate %83 RelaxedPrecision + OpDecorate %90 RelaxedPrecision + OpDecorate %91 RelaxedPrecision + OpDecorate %93 RelaxedPrecision + OpDecorate %94 RelaxedPrecision + OpDecorate %95 RelaxedPrecision + OpDecorate %baz RelaxedPrecision + OpDecorate %105 RelaxedPrecision + OpDecorate %106 RelaxedPrecision + OpDecorate %108 RelaxedPrecision + OpDecorate %109 RelaxedPrecision + OpDecorate %110 RelaxedPrecision + OpDecorate %16 RelaxedPrecision + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 +%_ptr_Output_float = OpTypePointer Output %float + %FragColor = OpVariable %_ptr_Output_float Output + %uint = OpTypeInt 32 0 + %uint_16 = OpConstant %uint 16 +%_arr_float_uint_16 = OpTypeArray %float %uint_16 + %float_1 = OpConstant %float 1 + %float_2 = OpConstant %float 2 + %float_3 = OpConstant %float 3 + %float_4 = OpConstant %float 4 + %16 = OpConstantComposite %_arr_float_uint_16 %float_1 %float_2 %float_3 %float_4 %float_1 %float_2 %float_3 %float_4 %float_1 %float_2 %float_3 %float_4 %float_1 %float_2 %float_3 %float_4 + %int = OpTypeInt 32 1 +%_ptr_Input_int = OpTypePointer Input %int + %index = OpVariable %_ptr_Input_int Input +%_ptr_Function__arr_float_uint_16 = OpTypePointer Function %_arr_float_uint_16 +%_ptr_Function_float = OpTypePointer Function %float + %int_10 = OpConstant %int 10 + %bool = OpTypeBool + %int_1 = OpConstant %int 1 + %v4float = OpTypeVector %float 4 + %uint_4 = OpConstant %uint 4 +%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4 +%_ptr_Function__arr_v4float_uint_4 = OpTypePointer Function %_arr_v4float_uint_4 + %float_0 = OpConstant %float 0 + %54 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0 + %55 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 + %float_8 = OpConstant %float 8 + %57 = OpConstantComposite %v4float %float_8 %float_8 %float_8 %float_8 + %float_5 = OpConstant %float 5 + %59 = OpConstantComposite %v4float %float_5 %float_5 %float_5 %float_5 + %60 = OpConstantComposite %_arr_v4float_uint_4 %54 %55 %57 %59 + %int_30 = OpConstant %int 30 + %int_3 = OpConstant %int 3 + %uint_1 = OpConstant %uint 1 + %uint_0 = OpConstant %uint 0 + %float_20 = OpConstant %float 20 + %uint_2 = OpConstant %uint 2 + %97 = OpConstantComposite %v4float %float_20 %float_20 %float_20 %float_20 + %float_30 = OpConstant %float 30 + %99 = OpConstantComposite %v4float %float_30 %float_30 %float_30 %float_30 + %float_50 = OpConstant %float 50 + %101 = OpConstantComposite %v4float %float_50 %float_50 %float_50 %float_50 + %float_60 = OpConstant %float 60 + %103 = OpConstantComposite %v4float %float_60 %float_60 %float_60 %float_60 + %104 = OpConstantComposite %_arr_v4float_uint_4 %97 %99 %101 %103 + %main = OpFunction %void None %3 + %5 = OpLabel + %indexable = OpVariable %_ptr_Function__arr_float_uint_16 Function %16 +%indexable_0 = OpVariable %_ptr_Function__arr_float_uint_16 Function %16 +%indexable_1 = OpVariable %_ptr_Function__arr_float_uint_16 Function %16 + %foo = OpVariable %_ptr_Function__arr_v4float_uint_4 Function %60 + %foobar = OpVariable %_ptr_Function__arr_v4float_uint_4 Function %60 + %baz = OpVariable %_ptr_Function__arr_v4float_uint_4 Function %60 + %20 = OpLoad %int %index + %24 = OpAccessChain %_ptr_Function_float %indexable %20 + %25 = OpLoad %float %24 + OpStore %FragColor %25 + %26 = OpLoad %int %index + %29 = OpSLessThan %bool %26 %int_10 + OpSelectionMerge %31 None + OpBranchConditional %29 %30 %40 + %30 = OpLabel + %32 = OpLoad %int %index + %34 = OpBitwiseXor %int %32 %int_1 + %36 = OpAccessChain %_ptr_Function_float %indexable_0 %34 + %37 = OpLoad %float %36 + %38 = OpLoad %float %FragColor + %39 = OpFAdd %float %38 %37 + OpStore %FragColor %39 + OpBranch %31 + %40 = OpLabel + %41 = OpLoad %int %index + %42 = OpBitwiseAnd %int %41 %int_1 + %44 = OpAccessChain %_ptr_Function_float %indexable_1 %42 + %45 = OpLoad %float %44 + %46 = OpLoad %float %FragColor + %47 = OpFAdd %float %46 %45 + OpStore %FragColor %47 + OpBranch %31 + %31 = OpLabel + %61 = OpLoad %int %index + %63 = OpSGreaterThan %bool %61 %int_30 + OpSelectionMerge %65 None + OpBranchConditional %63 %64 %74 + %64 = OpLabel + %66 = OpLoad %int %index + %68 = OpBitwiseAnd %int %66 %int_3 + %70 = OpAccessChain %_ptr_Function_float %foo %68 %uint_1 + %71 = OpLoad %float %70 + %72 = OpLoad %float %FragColor + %73 = OpFAdd %float %72 %71 + OpStore %FragColor %73 + OpBranch %65 + %74 = OpLabel + %75 = OpLoad %int %index + %76 = OpBitwiseAnd %int %75 %int_1 + %78 = OpAccessChain %_ptr_Function_float %foo %76 %uint_0 + %79 = OpLoad %float %78 + %80 = OpLoad %float %FragColor + %81 = OpFAdd %float %80 %79 + OpStore %FragColor %81 + OpBranch %65 + %65 = OpLabel + %83 = OpLoad %int %index + %84 = OpSGreaterThan %bool %83 %int_30 + OpSelectionMerge %86 None + OpBranchConditional %84 %85 %86 + %85 = OpLabel + %89 = OpAccessChain %_ptr_Function_float %foobar %int_1 %uint_2 + OpStore %89 %float_20 + OpBranch %86 + %86 = OpLabel + %90 = OpLoad %int %index + %91 = OpBitwiseAnd %int %90 %int_3 + %92 = OpAccessChain %_ptr_Function_float %foobar %91 %uint_2 + %93 = OpLoad %float %92 + %94 = OpLoad %float %FragColor + %95 = OpFAdd %float %94 %93 + OpStore %FragColor %95 + OpStore %baz %104 + %105 = OpLoad %int %index + %106 = OpBitwiseAnd %int %105 %int_3 + %107 = OpAccessChain %_ptr_Function_float %baz %106 %uint_2 + %108 = OpLoad %float %107 + %109 = OpLoad %float %FragColor + %110 = OpFAdd %float %109 %108 + OpStore %FragColor %110 + OpReturn + OpFunctionEnd diff --git a/shaders/frag/lut-promotion.frag b/shaders/frag/lut-promotion.frag new file mode 100644 index 0000000000..0cdc8148f9 --- /dev/null +++ b/shaders/frag/lut-promotion.frag @@ -0,0 +1,44 @@ +#version 310 es +precision mediump float; +layout(location = 0) out float FragColor; +layout(location = 0) flat in int index; + +const float LUT[16] = float[]( + 1.0, 2.0, 3.0, 4.0, + 1.0, 2.0, 3.0, 4.0, + 1.0, 2.0, 3.0, 4.0, + 1.0, 2.0, 3.0, 4.0); + +void main() +{ + // Try reading LUTs, both in branches and not branch. + FragColor = LUT[index]; + if (index < 10) + FragColor += LUT[index ^ 1]; + else + FragColor += LUT[index & 1]; + + // Not declared as a LUT, but can be promoted to one. + vec4 foo[4] = vec4[](vec4(0.0), vec4(1.0), vec4(8.0), vec4(5.0)); + if (index > 30) + { + FragColor += foo[index & 3].y; + } + else + { + FragColor += foo[index & 1].x; + } + + // Not declared as a LUT, but this cannot be promoted, because we have a partial write. + vec4 foobar[4] = vec4[](vec4(0.0), vec4(1.0), vec4(8.0), vec4(5.0)); + if (index > 30) + { + foobar[1].z = 20.0; + } + FragColor += foobar[index & 3].z; + + // Not declared as a LUT, but this cannot be promoted, because we have two complete writes. + vec4 baz[4] = vec4[](vec4(0.0), vec4(1.0), vec4(8.0), vec4(5.0)); + baz = vec4[](vec4(20.0), vec4(30.0), vec4(50.0), vec4(60.0)); + FragColor += baz[index & 3].z; +} diff --git a/spirv_common.hpp b/spirv_common.hpp index e3fb2b7238..0ea94fcb0c 100644 --- a/spirv_common.hpp +++ b/spirv_common.hpp @@ -764,7 +764,6 @@ struct SPIRFunction : IVariant bool active = false; bool flush_undeclared = true; bool do_combined_parameters = true; - bool analyzed_variable_scope = false; }; struct SPIRAccessChain : IVariant @@ -1102,6 +1101,9 @@ struct SPIRConstant : IVariant // If this constant is used as an array length which creates specialization restrictions on some backends. bool is_used_as_array_length = false; + // If true, this is a LUT, and should always be declared in the outer scope. + bool is_used_as_lut = false; + // For composites which are constant arrays, etc. std::vector subconstants; }; diff --git a/spirv_cpp.cpp b/spirv_cpp.cpp index 9302a07441..7d781e9172 100644 --- a/spirv_cpp.cpp +++ b/spirv_cpp.cpp @@ -300,6 +300,7 @@ string CompilerCPP::compile() backend.explicit_struct_type = true; backend.use_initializer_list = true; + build_function_control_flow_graphs_and_analyze(); update_active_builtins(); uint32_t pass_count = 0; diff --git a/spirv_cross.cpp b/spirv_cross.cpp index 3e460e9f61..dd2a44ca33 100644 --- a/spirv_cross.cpp +++ b/spirv_cross.cpp @@ -3647,303 +3647,449 @@ void Compiler::analyze_parameter_preservation( } } -void Compiler::analyze_variable_scope(SPIRFunction &entry) +Compiler::AnalyzeVariableScopeAccessHandler::AnalyzeVariableScopeAccessHandler(Compiler &compiler_, + SPIRFunction &entry_) + : compiler(compiler_) + , entry(entry_) { - struct AccessHandler : OpcodeHandler +} + +bool Compiler::AnalyzeVariableScopeAccessHandler::follow_function_call(const SPIRFunction &) +{ + // Only analyze within this function. + return false; +} + +void Compiler::AnalyzeVariableScopeAccessHandler::set_current_block(const SPIRBlock &block) +{ + current_block = █ + + // If we're branching to a block which uses OpPhi, in GLSL + // this will be a variable write when we branch, + // so we need to track access to these variables as well to + // have a complete picture. + const auto test_phi = [this, &block](uint32_t to) { + auto &next = compiler.get(to); + for (auto &phi : next.phi_variables) + { + if (phi.parent == block.self) + { + accessed_variables_to_block[phi.function_variable].insert(block.self); + // Phi variables are also accessed in our target branch block. + accessed_variables_to_block[phi.function_variable].insert(next.self); + + notify_variable_access(phi.local_variable, block.self); + } + } + }; + + switch (block.terminator) { - public: - AccessHandler(Compiler &compiler_, SPIRFunction &entry_) - : compiler(compiler_) - , entry(entry_) - { - } + case SPIRBlock::Direct: + notify_variable_access(block.condition, block.self); + test_phi(block.next_block); + break; - bool follow_function_call(const SPIRFunction &) - { - // Only analyze within this function. + case SPIRBlock::Select: + notify_variable_access(block.condition, block.self); + test_phi(block.true_block); + test_phi(block.false_block); + break; + + case SPIRBlock::MultiSelect: + notify_variable_access(block.condition, block.self); + for (auto &target : block.cases) + test_phi(target.block); + if (block.default_block) + test_phi(block.default_block); + break; + + default: + break; + } +} + +void Compiler::AnalyzeVariableScopeAccessHandler::notify_variable_access(uint32_t id, uint32_t block) +{ + if (id_is_phi_variable(id)) + accessed_variables_to_block[id].insert(block); + else if (id_is_potential_temporary(id)) + accessed_temporaries_to_block[id].insert(block); +} + +bool Compiler::AnalyzeVariableScopeAccessHandler::id_is_phi_variable(uint32_t id) const +{ + if (id >= compiler.get_current_id_bound()) + return false; + auto *var = compiler.maybe_get(id); + return var && var->phi_variable; +} + +bool Compiler::AnalyzeVariableScopeAccessHandler::id_is_potential_temporary(uint32_t id) const +{ + if (id >= compiler.get_current_id_bound()) + return false; + + // Temporaries are not created before we start emitting code. + return compiler.ids[id].empty() || (compiler.ids[id].get_type() == TypeExpression); +} + +bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint32_t *args, uint32_t length) +{ + // Keep track of the types of temporaries, so we can hoist them out as necessary. + uint32_t result_type, result_id; + if (compiler.instruction_to_result_type(result_type, result_id, op, args, length)) + result_id_to_type[result_id] = result_type; + + switch (op) + { + case OpStore: + { + if (length < 2) return false; - } - void set_current_block(const SPIRBlock &block) + uint32_t ptr = args[0]; + auto *var = compiler.maybe_get_backing_variable(ptr); + + // If we store through an access chain, we have a partial write. + if (var) { - current_block = █ - - // If we're branching to a block which uses OpPhi, in GLSL - // this will be a variable write when we branch, - // so we need to track access to these variables as well to - // have a complete picture. - const auto test_phi = [this, &block](uint32_t to) { - auto &next = compiler.get(to); - for (auto &phi : next.phi_variables) - { - if (phi.parent == block.self) - { - accessed_variables_to_block[phi.function_variable].insert(block.self); - // Phi variables are also accessed in our target branch block. - accessed_variables_to_block[phi.function_variable].insert(next.self); - - notify_variable_access(phi.local_variable, block.self); - } - } - }; - - switch (block.terminator) - { - case SPIRBlock::Direct: - notify_variable_access(block.condition, block.self); - test_phi(block.next_block); - break; - - case SPIRBlock::Select: - notify_variable_access(block.condition, block.self); - test_phi(block.true_block); - test_phi(block.false_block); - break; - - case SPIRBlock::MultiSelect: - notify_variable_access(block.condition, block.self); - for (auto &target : block.cases) - test_phi(target.block); - if (block.default_block) - test_phi(block.default_block); - break; - - default: - break; - } + accessed_variables_to_block[var->self].insert(current_block->self); + if (var->self == ptr) + complete_write_variables_to_block[var->self].insert(current_block->self); + else + partial_write_variables_to_block[var->self].insert(current_block->self); } - void notify_variable_access(uint32_t id, uint32_t block) + // Might try to store a Phi variable here. + notify_variable_access(args[1], current_block->self); + break; + } + + case OpAccessChain: + case OpInBoundsAccessChain: + { + if (length < 3) + return false; + + uint32_t ptr = args[2]; + auto *var = compiler.maybe_get(ptr); + if (var) + accessed_variables_to_block[var->self].insert(current_block->self); + + for (uint32_t i = 3; i < length; i++) + notify_variable_access(args[i], current_block->self); + + // The result of an access chain is a fixed expression and is not really considered a temporary. + auto &e = compiler.set(args[1], "", args[0], true); + auto *backing_variable = compiler.maybe_get_backing_variable(ptr); + e.loaded_from = backing_variable ? backing_variable->self : 0; + + // Other backends might use SPIRAccessChain for this later. + compiler.ids[args[1]].set_allow_type_rewrite(); + break; + } + + case OpCopyMemory: + { + if (length < 2) + return false; + + uint32_t lhs = args[0]; + uint32_t rhs = args[1]; + auto *var = compiler.maybe_get_backing_variable(lhs); + + // If we store through an access chain, we have a partial write. + if (var) { - if (id_is_phi_variable(id)) - accessed_variables_to_block[id].insert(block); - else if (id_is_potential_temporary(id)) - accessed_temporaries_to_block[id].insert(block); + accessed_variables_to_block[var->self].insert(current_block->self); + if (var->self == lhs) + complete_write_variables_to_block[var->self].insert(current_block->self); + else + partial_write_variables_to_block[var->self].insert(current_block->self); } - bool id_is_phi_variable(uint32_t id) + var = compiler.maybe_get_backing_variable(rhs); + if (var) + accessed_variables_to_block[var->self].insert(current_block->self); + break; + } + + case OpCopyObject: + { + if (length < 3) + return false; + + auto *var = compiler.maybe_get_backing_variable(args[2]); + if (var) + accessed_variables_to_block[var->self].insert(current_block->self); + + // Might try to copy a Phi variable here. + notify_variable_access(args[2], current_block->self); + break; + } + + case OpLoad: + { + if (length < 3) + return false; + uint32_t ptr = args[2]; + auto *var = compiler.maybe_get_backing_variable(ptr); + if (var) + accessed_variables_to_block[var->self].insert(current_block->self); + + // Loaded value is a temporary. + notify_variable_access(args[1], current_block->self); + break; + } + + case OpFunctionCall: + { + if (length < 3) + return false; + + length -= 3; + args += 3; + + for (uint32_t i = 0; i < length; i++) { - if (id >= compiler.get_current_id_bound()) - return false; - auto *var = compiler.maybe_get(id); - return var && var->phi_variable; + auto *var = compiler.maybe_get_backing_variable(args[i]); + if (var) + { + accessed_variables_to_block[var->self].insert(current_block->self); + // Assume we can get partial writes to this variable. + partial_write_variables_to_block[var->self].insert(current_block->self); + } + + // Cannot easily prove if argument we pass to a function is completely written. + // Usually, functions write to a dummy variable, + // which is then copied to in full to the real argument. + + // Might try to copy a Phi variable here. + notify_variable_access(args[i], current_block->self); } - bool id_is_potential_temporary(uint32_t id) + // Return value may be a temporary. + notify_variable_access(args[1], current_block->self); + break; + } + + case OpExtInst: + { + for (uint32_t i = 4; i < length; i++) + notify_variable_access(args[i], current_block->self); + notify_variable_access(args[1], current_block->self); + break; + } + + case OpArrayLength: + // Uses literals, but cannot be a phi variable, so ignore. + break; + + // Atomics shouldn't be able to access function-local variables. + // Some GLSL builtins access a pointer. + + case OpCompositeInsert: + case OpVectorShuffle: + // Specialize for opcode which contains literals. + for (uint32_t i = 1; i < 4; i++) + notify_variable_access(args[i], current_block->self); + break; + + case OpCompositeExtract: + // Specialize for opcode which contains literals. + for (uint32_t i = 1; i < 3; i++) + notify_variable_access(args[i], current_block->self); + break; + + default: + { + // Rather dirty way of figuring out where Phi variables are used. + // As long as only IDs are used, we can scan through instructions and try to find any evidence that + // the ID of a variable has been used. + // There are potential false positives here where a literal is used in-place of an ID, + // but worst case, it does not affect the correctness of the compile. + // Exhaustive analysis would be better here, but it's not worth it for now. + for (uint32_t i = 0; i < length; i++) + notify_variable_access(args[i], current_block->self); + break; + } + } + return true; +} + +Compiler::StaticExpressionAccessHandler::StaticExpressionAccessHandler(Compiler &compiler_, uint32_t variable_id_) + : compiler(compiler_) + , variable_id(variable_id_) +{ +} + +bool Compiler::StaticExpressionAccessHandler::follow_function_call(const SPIRFunction &) +{ + return false; +} + +bool Compiler::StaticExpressionAccessHandler::handle(spv::Op op, const uint32_t *args, uint32_t length) +{ + switch (op) + { + case OpStore: + if (length < 2) + return false; + if (args[0] == variable_id) { - if (id >= compiler.get_current_id_bound()) - return false; - - // Temporaries are not created before we start emitting code. - return compiler.ids[id].empty() || (compiler.ids[id].get_type() == TypeExpression); + static_expression = args[1]; + write_count++; } + break; - bool handle(spv::Op op, const uint32_t *args, uint32_t length) + case OpLoad: + if (length < 3) + return false; + if (args[2] == variable_id && static_expression == 0) // Tried to read from variable before it was initialized. + return false; + break; + + case OpAccessChain: + case OpInBoundsAccessChain: + if (length < 3) + return false; + if (args[2] == variable_id) // If we try to access chain our candidate variable before we store to it, bail. + return false; + break; + + default: + break; + } + + return true; +} + +void Compiler::find_function_local_luts(SPIRFunction &entry, const AnalyzeVariableScopeAccessHandler &handler) +{ + auto &cfg = *function_cfgs.find(entry.self)->second; + + // For each variable which is statically accessed. + for (auto &accessed_var : handler.accessed_variables_to_block) + { + auto &blocks = accessed_var.second; + auto &var = get(accessed_var.first); + auto &type = expression_type(accessed_var.first); + + // Only consider function local variables here. + if (var.storage != StorageClassFunction) + continue; + + // We cannot be a phi variable. + if (var.phi_variable) + continue; + + // Only consider arrays here. + if (type.array.empty()) + continue; + + // HACK: Do not consider structs. This is a quirk with how types are currently being emitted. + // Structs are emitted after specialization constants and composite constants. + // FIXME: Fix declaration order so declared constants can have struct types. + if (type.basetype == SPIRType::Struct) + continue; + + // If the variable has an initializer, make sure it is a constant expression. + uint32_t static_constant_expression = 0; + if (var.initializer) { - // Keep track of the types of temporaries, so we can hoist them out as necessary. - uint32_t result_type, result_id; - if (compiler.instruction_to_result_type(result_type, result_id, op, args, length)) - result_id_to_type[result_id] = result_type; + if (ids[var.initializer].get_type() != TypeConstant) + continue; + static_constant_expression = var.initializer; - switch (op) - { - case OpStore: - { - if (length < 2) - return false; + // There can be no stores to this variable, we have now proved we have a LUT. + if (handler.complete_write_variables_to_block.count(var.self) != 0 || + handler.partial_write_variables_to_block.count(var.self) != 0) + continue; + } + else + { + // We can have one, and only one write to the variable, and that write needs to be a constant. - uint32_t ptr = args[0]; - auto *var = compiler.maybe_get_backing_variable(ptr); - if (var && var->storage == StorageClassFunction) - accessed_variables_to_block[var->self].insert(current_block->self); + // No partial writes allowed. + if (handler.partial_write_variables_to_block.count(var.self) != 0) + continue; - // If we store through an access chain, we have a partial write. - if (var && var->self == ptr && var->storage == StorageClassFunction) - complete_write_variables_to_block[var->self].insert(current_block->self); + auto itr = handler.complete_write_variables_to_block.find(var.self); - // Might try to store a Phi variable here. - notify_variable_access(args[1], current_block->self); - break; - } + // No writes? + if (itr == end(handler.complete_write_variables_to_block)) + continue; - case OpAccessChain: - case OpInBoundsAccessChain: - { - if (length < 3) - return false; + // We write to the variable in more than one block. + auto &write_blocks = itr->second; + if (write_blocks.size() != 1) + continue; - uint32_t ptr = args[2]; - auto *var = compiler.maybe_get(ptr); - if (var && var->storage == StorageClassFunction) - accessed_variables_to_block[var->self].insert(current_block->self); + // The write needs to happen in the dominating block. + DominatorBuilder builder(cfg); + for (auto &block : blocks) + builder.add_block(block); + uint32_t dominator = builder.get_dominator(); - for (uint32_t i = 3; i < length; i++) - notify_variable_access(args[i], current_block->self); + // The complete write happened in a branch or similar, cannot deduce static expression. + if (write_blocks.count(dominator) == 0) + continue; - // The result of an access chain is a fixed expression and is not really considered a temporary. - auto &e = compiler.set(args[1], "", args[0], true); - auto *backing_variable = compiler.maybe_get_backing_variable(ptr); - e.loaded_from = backing_variable ? backing_variable->self : 0; + // Find the static expression for this variable. + StaticExpressionAccessHandler static_expression_handler(*this, var.self); + traverse_all_reachable_opcodes(get(dominator), static_expression_handler); - // Other backends might use SPIRAccessChain for this later. - compiler.ids[args[1]].set_allow_type_rewrite(); - break; - } + // We want one, and exactly one write + if (static_expression_handler.write_count != 1 || static_expression_handler.static_expression == 0) + continue; - case OpCopyMemory: - { - if (length < 2) - return false; + // Is it a constant expression? + if (ids[static_expression_handler.static_expression].get_type() != TypeConstant) + continue; - uint32_t lhs = args[0]; - uint32_t rhs = args[1]; - auto *var = compiler.maybe_get_backing_variable(lhs); - if (var && var->storage == StorageClassFunction) - accessed_variables_to_block[var->self].insert(current_block->self); - - // If we store through an access chain, we have a partial write. - if (var && var->self == lhs) - complete_write_variables_to_block[var->self].insert(current_block->self); - - var = compiler.maybe_get_backing_variable(rhs); - if (var && var->storage == StorageClassFunction) - accessed_variables_to_block[var->self].insert(current_block->self); - break; - } - - case OpCopyObject: - { - if (length < 3) - return false; - - auto *var = compiler.maybe_get_backing_variable(args[2]); - if (var && var->storage == StorageClassFunction) - accessed_variables_to_block[var->self].insert(current_block->self); - - // Might try to copy a Phi variable here. - notify_variable_access(args[2], current_block->self); - break; - } - - case OpLoad: - { - if (length < 3) - return false; - uint32_t ptr = args[2]; - auto *var = compiler.maybe_get_backing_variable(ptr); - if (var && var->storage == StorageClassFunction) - accessed_variables_to_block[var->self].insert(current_block->self); - - // Loaded value is a temporary. - notify_variable_access(args[1], current_block->self); - break; - } - - case OpFunctionCall: - { - if (length < 3) - return false; - - length -= 3; - args += 3; - for (uint32_t i = 0; i < length; i++) - { - auto *var = compiler.maybe_get_backing_variable(args[i]); - if (var && var->storage == StorageClassFunction) - accessed_variables_to_block[var->self].insert(current_block->self); - - // Cannot easily prove if argument we pass to a function is completely written. - // Usually, functions write to a dummy variable, - // which is then copied to in full to the real argument. - - // Might try to copy a Phi variable here. - notify_variable_access(args[i], current_block->self); - } - - // Return value may be a temporary. - notify_variable_access(args[1], current_block->self); - break; - } - - case OpExtInst: - { - for (uint32_t i = 4; i < length; i++) - notify_variable_access(args[i], current_block->self); - notify_variable_access(args[1], current_block->self); - break; - } - - case OpArrayLength: - // Uses literals, but cannot be a phi variable, so ignore. - break; - - // Atomics shouldn't be able to access function-local variables. - // Some GLSL builtins access a pointer. - - case OpCompositeInsert: - case OpVectorShuffle: - // Specialize for opcode which contains literals. - for (uint32_t i = 1; i < 4; i++) - notify_variable_access(args[i], current_block->self); - break; - - case OpCompositeExtract: - // Specialize for opcode which contains literals. - for (uint32_t i = 1; i < 3; i++) - notify_variable_access(args[i], current_block->self); - break; - - default: - { - // Rather dirty way of figuring out where Phi variables are used. - // As long as only IDs are used, we can scan through instructions and try to find any evidence that - // the ID of a variable has been used. - // There are potential false positives here where a literal is used in-place of an ID, - // but worst case, it does not affect the correctness of the compile. - // Exhaustive analysis would be better here, but it's not worth it for now. - for (uint32_t i = 0; i < length; i++) - notify_variable_access(args[i], current_block->self); - break; - } - } - return true; + // We found a LUT! + static_constant_expression = static_expression_handler.static_expression; } - Compiler &compiler; - SPIRFunction &entry; - std::unordered_map> accessed_variables_to_block; - std::unordered_map> accessed_temporaries_to_block; - std::unordered_map result_id_to_type; - std::unordered_map> complete_write_variables_to_block; - const SPIRBlock *current_block = nullptr; - } handler(*this, entry); + get(static_constant_expression).is_used_as_lut = true; + var.static_expression = static_constant_expression; + var.statically_assigned = true; + var.remapped_variable = true; + } +} +void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeAccessHandler &handler) +{ // First, we map out all variable access within a function. // Essentially a map of block -> { variables accessed in the basic block } - this->traverse_all_reachable_opcodes(entry, handler); + traverse_all_reachable_opcodes(entry, handler); - // Compute the control flow graph for this function. - CFG cfg(*this, entry); + auto &cfg = *function_cfgs.find(entry.self)->second; // Analyze if there are parameters which need to be implicitly preserved with an "in" qualifier. - this->analyze_parameter_preservation(entry, cfg, handler.accessed_variables_to_block, - handler.complete_write_variables_to_block); + analyze_parameter_preservation(entry, cfg, handler.accessed_variables_to_block, + handler.complete_write_variables_to_block); unordered_map potential_loop_variables; // For each variable which is statically accessed. for (auto &var : handler.accessed_variables_to_block) { + // Only deal with variables which are considered local variables in this function. + if (find(begin(entry.local_variables), end(entry.local_variables), var.first) == end(entry.local_variables)) + continue; + DominatorBuilder builder(cfg); auto &blocks = var.second; - auto &type = this->expression_type(var.first); + auto &type = expression_type(var.first); // Figure out which block is dominating all accesses of those variables. for (auto &block : blocks) { // If we're accessing a variable inside a continue block, this variable might be a loop variable. // We can only use loop variables with scalars, as we cannot track static expressions for vectors. - if (this->is_continue(block)) + if (is_continue(block)) { // Potentially awkward case to check for. // We might have a variable inside a loop, which is touched by the continue block, @@ -3951,7 +4097,7 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry) // The continue block is dominated by the inner part of the loop, which does not make sense in high-level // language output because it will be declared before the body, // so we will have to lift the dominator up to the relevant loop header instead. - builder.add_block(this->continue_block_to_loop_header[block]); + builder.add_block(continue_block_to_loop_header[block]); // Arrays or structs cannot be loop variables. if (type.vecsize == 1 && type.columns == 1 && type.basetype != SPIRType::Struct && type.array.empty()) @@ -3978,9 +4124,9 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry) // will be completely eliminated. if (dominating_block) { - auto &block = this->get(dominating_block); + auto &block = get(dominating_block); block.dominated_variables.push_back(var.first); - this->get(var.first).dominator = dominating_block; + get(var.first).dominator = dominating_block; } } @@ -4006,9 +4152,9 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry) // If a temporary is used in more than one block, we might have to lift continue block // access up to loop header like we did for variables. - if (blocks.size() != 1 && this->is_continue(block)) - builder.add_block(this->continue_block_to_loop_header[block]); - else if (blocks.size() != 1 && this->is_single_block_loop(block)) + if (blocks.size() != 1 && is_continue(block)) + builder.add_block(continue_block_to_loop_header[block]); + else if (blocks.size() != 1 && is_single_block_loop(block)) { // Awkward case, because the loop header is also the continue block. force_temporary = true; @@ -4027,10 +4173,10 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry) // This should be very rare, but if we try to declare a temporary inside a loop, // and that temporary is used outside the loop as well (spirv-opt inliner likes this) // we should actually emit the temporary outside the loop. - this->hoisted_temporaries.insert(var.first); - this->forced_temporaries.insert(var.first); + hoisted_temporaries.insert(var.first); + forced_temporaries.insert(var.first); - auto &block_temporaries = this->get(dominating_block).declare_temporary; + auto &block_temporaries = get(dominating_block).declare_temporary; block_temporaries.emplace_back(handler.result_id_to_type[var.first], var.first); } else if (blocks.size() > 1) @@ -4040,7 +4186,7 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry) // In this case, the header is actually inside the for (;;) {} block, and we have problems. // What we need to do is hoist the temporaries outside the for (;;) {} block in case the header block // declares the temporary. - auto &block_temporaries = this->get(dominating_block).potential_declare_temporary; + auto &block_temporaries = get(dominating_block).potential_declare_temporary; block_temporaries.emplace_back(handler.result_id_to_type[var.first], var.first); } } @@ -4051,7 +4197,7 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry) // Now, try to analyze whether or not these variables are actually loop variables. for (auto &loop_variable : potential_loop_variables) { - auto &var = this->get(loop_variable.first); + auto &var = get(loop_variable.first); auto dominator = var.dominator; auto block = loop_variable.second; @@ -4066,9 +4212,9 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry) uint32_t header = 0; // Find the loop header for this block. - for (auto b : this->loop_blocks) + for (auto b : loop_blocks) { - auto &potential_header = this->get(b); + auto &potential_header = get(b); if (potential_header.continue_block == block) { header = b; @@ -4077,7 +4223,7 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry) } assert(header); - auto &header_block = this->get(header); + auto &header_block = get(header); auto &blocks = handler.accessed_variables_to_block[loop_variable.first]; // If a loop variable is not used before the loop, it's probably not a loop variable. @@ -4133,7 +4279,7 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry) // Need to sort here as variables come from an unordered container, and pushing stuff in wrong order // will break reproducability in regression runs. sort(begin(header_block.loop_variables), end(header_block.loop_variables)); - this->get(loop_variable.first).loop_variable = true; + get(loop_variable.first).loop_variable = true; } } @@ -4406,6 +4552,72 @@ bool Compiler::CombinedImageSamplerDrefHandler::handle(spv::Op opcode, const uin return true; } +void Compiler::build_function_control_flow_graphs_and_analyze() +{ + CFGBuilder handler(*this); + handler.function_cfgs[entry_point].reset(new CFG(*this, get(entry_point))); + traverse_all_reachable_opcodes(get(entry_point), handler); + function_cfgs = move(handler.function_cfgs); + + for (auto &f : function_cfgs) + { + auto &func = get(f.first); + AnalyzeVariableScopeAccessHandler scope_handler(*this, func); + analyze_variable_scope(func, scope_handler); + find_function_local_luts(func, scope_handler); + + // Check if we can actually use the loop variables we found in analyze_variable_scope. + // To use multiple initializers, we need the same type and qualifiers. + for (auto block : func.blocks) + { + auto &b = get(block); + if (b.loop_variables.size() < 2) + continue; + + auto &flags = get_decoration_bitset(b.loop_variables.front()); + uint32_t type = get(b.loop_variables.front()).basetype; + bool invalid_initializers = false; + for (auto loop_variable : b.loop_variables) + { + if (flags != get_decoration_bitset(loop_variable) || + type != get(b.loop_variables.front()).basetype) + { + invalid_initializers = true; + break; + } + } + + if (invalid_initializers) + { + for (auto loop_variable : b.loop_variables) + get(loop_variable).loop_variable = false; + b.loop_variables.clear(); + } + } + } +} + +Compiler::CFGBuilder::CFGBuilder(spirv_cross::Compiler &compiler_) + : compiler(compiler_) +{ +} + +bool Compiler::CFGBuilder::handle(spv::Op, const uint32_t *, uint32_t) +{ + return true; +} + +bool Compiler::CFGBuilder::follow_function_call(const SPIRFunction &func) +{ + if (function_cfgs.find(func.self) == end(function_cfgs)) + { + function_cfgs[func.self].reset(new CFG(compiler, func)); + return true; + } + else + return false; +} + bool Compiler::CombinedImageSamplerUsageHandler::begin_function_scope(const uint32_t *args, uint32_t length) { if (length < 3) diff --git a/spirv_cross.hpp b/spirv_cross.hpp index b25e1c19cb..159e1988be 100644 --- a/spirv_cross.hpp +++ b/spirv_cross.hpp @@ -18,11 +18,11 @@ #define SPIRV_CROSS_HPP #include "spirv.hpp" +#include "spirv_cfg.hpp" #include "spirv_common.hpp" namespace spirv_cross { -class CFG; struct Resource { // Resources are identified with their SPIR-V ID. @@ -676,8 +676,6 @@ protected: variable_remap_callback(type, var_name, type_name); } - void analyze_variable_scope(SPIRFunction &function); - void parse(); void parse(const Instruction &i); @@ -869,6 +867,55 @@ protected: bool need_subpass_input = false; }; + void build_function_control_flow_graphs_and_analyze(); + std::unordered_map> function_cfgs; + struct CFGBuilder : OpcodeHandler + { + CFGBuilder(Compiler &compiler_); + + bool follow_function_call(const SPIRFunction &func) override; + bool handle(spv::Op op, const uint32_t *args, uint32_t length) override; + Compiler &compiler; + std::unordered_map> function_cfgs; + }; + + struct AnalyzeVariableScopeAccessHandler : OpcodeHandler + { + AnalyzeVariableScopeAccessHandler(Compiler &compiler_, SPIRFunction &entry_); + + bool follow_function_call(const SPIRFunction &) override; + void set_current_block(const SPIRBlock &block) override; + + void notify_variable_access(uint32_t id, uint32_t block); + bool id_is_phi_variable(uint32_t id) const; + bool id_is_potential_temporary(uint32_t id) const; + bool handle(spv::Op op, const uint32_t *args, uint32_t length) override; + + Compiler &compiler; + SPIRFunction &entry; + std::unordered_map> accessed_variables_to_block; + std::unordered_map> accessed_temporaries_to_block; + std::unordered_map result_id_to_type; + std::unordered_map> complete_write_variables_to_block; + std::unordered_map> partial_write_variables_to_block; + const SPIRBlock *current_block = nullptr; + }; + + struct StaticExpressionAccessHandler : OpcodeHandler + { + StaticExpressionAccessHandler(Compiler &compiler_, uint32_t variable_id_); + bool follow_function_call(const SPIRFunction &) override; + bool handle(spv::Op op, const uint32_t *args, uint32_t length) override; + + Compiler &compiler; + uint32_t variable_id; + uint32_t static_expression = 0; + uint32_t write_count = 0; + }; + + void analyze_variable_scope(SPIRFunction &function, AnalyzeVariableScopeAccessHandler &handler); + void find_function_local_luts(SPIRFunction &function, const AnalyzeVariableScopeAccessHandler &handler); + void make_constant_null(uint32_t id, uint32_t type); std::vector declared_capabilities; diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index 5b71ab1d55..a973274616 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -418,6 +418,7 @@ string CompilerGLSL::compile() backend.supports_extensions = true; // Scan the SPIR-V to find trivial uses of extensions. + build_function_control_flow_graphs_and_analyze(); find_static_extensions(); fixup_image_load_store_access(); update_active_builtins(); @@ -1650,7 +1651,7 @@ void CompilerGLSL::emit_specialization_constant_op(const SPIRConstantOp &constan statement("const ", variable_decl(type, name), " = ", constant_op_expression(constant), ";"); } -void CompilerGLSL::emit_specialization_constant(const SPIRConstant &constant) +void CompilerGLSL::emit_constant(const SPIRConstant &constant) { auto &type = get(constant.constant_type); auto name = to_name(constant.self); @@ -2071,25 +2072,25 @@ void CompilerGLSL::emit_resources() // // TODO: If we have the fringe case that we create a spec constant which depends on a struct type, // we'll have to deal with that, but there's currently no known way to express that. - if (options.vulkan_semantics) + for (auto &id : ids) { - for (auto &id : ids) + if (id.get_type() == TypeConstant) { - if (id.get_type() == TypeConstant) - { - auto &c = id.get(); - if (!c.specialization) - continue; + auto &c = id.get(); - emit_specialization_constant(c); - emitted = true; - } - else if (id.get_type() == TypeConstantOp) + bool needs_declaration = (c.specialization && options.vulkan_semantics) || c.is_used_as_lut; + + if (needs_declaration) { - emit_specialization_constant_op(id.get()); + emit_constant(c); emitted = true; } } + else if (options.vulkan_semantics && id.get_type() == TypeConstantOp) + { + emit_specialization_constant_op(id.get()); + emitted = true; + } } if (emitted) @@ -2298,9 +2299,9 @@ string CompilerGLSL::enclose_expression(const string &expr) uint32_t paren_count = 0; for (auto c : expr) { - if (c == '(') + if (c == '(' || c == '[') paren_count++; - else if (c == ')') + else if (c == ')' || c == ']') { assert(paren_count); paren_count--; @@ -2424,6 +2425,8 @@ string CompilerGLSL::to_expression(uint32_t id) return builtin_to_glsl(dec.builtin_type, StorageClassGeneric); else if (c.specialization && options.vulkan_semantics) return to_name(id); + else if (c.is_used_as_lut) + return to_name(id); else if (type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline) return to_name(id); else if (!type.array.empty() && !backend.can_declare_arrays_inline) @@ -6190,6 +6193,10 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) var->static_expression = ops[1]; else if (var && var->loop_variable && !var->loop_variable_enable) var->static_expression = ops[1]; + else if (var && var->remapped_variable) + { + // Skip the write. + } else if (var && flattened_structs.count(ops[0])) { store_flattened_struct(*var, ops[1]); @@ -8289,6 +8296,11 @@ string CompilerGLSL::argument_decl(const SPIRFunction::Parameter &arg) return join(direction, to_qualifiers_glsl(arg.id), variable_decl(type, to_name(arg.id), arg.id)); } +string CompilerGLSL::to_initializer_expression(const SPIRVariable &var) +{ + return to_expression(var.initializer); +} + string CompilerGLSL::variable_decl(const SPIRVariable &variable) { // Ignore the pointer type since GLSL doesn't have pointers. @@ -8306,7 +8318,7 @@ string CompilerGLSL::variable_decl(const SPIRVariable &variable) { uint32_t expr = variable.initializer; if (ids[expr].get_type() != TypeUndef) - res += join(" = ", to_expression(variable.initializer)); + res += join(" = ", to_initializer_expression(variable)); } return res; } @@ -8908,41 +8920,6 @@ void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags) current_function = &func; auto &entry_block = get(func.entry_block); - if (!func.analyzed_variable_scope) - { - analyze_variable_scope(func); - - // Check if we can actually use the loop variables we found in analyze_variable_scope. - // To use multiple initializers, we need the same type and qualifiers. - for (auto block : func.blocks) - { - auto &b = get(block); - if (b.loop_variables.size() < 2) - continue; - - auto &flags = get_decoration_bitset(b.loop_variables.front()); - uint32_t type = get(b.loop_variables.front()).basetype; - bool invalid_initializers = false; - for (auto loop_variable : b.loop_variables) - { - if (flags != get_decoration_bitset(loop_variable) || - type != get(b.loop_variables.front()).basetype) - { - invalid_initializers = true; - break; - } - } - - if (invalid_initializers) - { - for (auto loop_variable : b.loop_variables) - get(loop_variable).loop_variable = false; - b.loop_variables.clear(); - } - } - func.analyzed_variable_scope = true; - } - for (auto &v : func.local_variables) { auto &var = get(v); @@ -8969,6 +8946,11 @@ void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags) entry_block.dominated_variables.push_back(var.self); var.deferred_declaration = true; } + else if (var.storage == StorageClassFunction && var.remapped_variable && var.static_expression) + { + // No need to declare this variable, it has a static expression. + var.deferred_declaration = false; + } else if (expression_is_lvalue(v)) { add_local_variable_name(var.self); diff --git a/spirv_glsl.hpp b/spirv_glsl.hpp index 7205def759..f5cc6ceb86 100644 --- a/spirv_glsl.hpp +++ b/spirv_glsl.hpp @@ -374,7 +374,7 @@ protected: void emit_flattened_io_block(const SPIRVariable &var, const char *qual); void emit_block_chain(SPIRBlock &block); void emit_hoisted_temporaries(std::vector> &temporaries); - void emit_specialization_constant(const SPIRConstant &constant); + void emit_constant(const SPIRConstant &constant); void emit_specialization_constant_op(const SPIRConstantOp &constant); std::string emit_continue_block(uint32_t continue_block); bool attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method method); @@ -463,6 +463,7 @@ protected: virtual bool skip_argument(uint32_t id) const; virtual void emit_array_copy(const std::string &lhs, uint32_t rhs_id); virtual void emit_block_hints(const SPIRBlock &block); + virtual std::string to_initializer_expression(const SPIRVariable &var); bool buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing, uint32_t start_offset = 0, uint32_t end_offset = std::numeric_limits::max()); diff --git a/spirv_hlsl.cpp b/spirv_hlsl.cpp index aaa05cc07e..34e1d19fb5 100644 --- a/spirv_hlsl.cpp +++ b/spirv_hlsl.cpp @@ -4572,6 +4572,7 @@ string CompilerHLSL::compile() backend.can_declare_arrays_inline = false; backend.can_return_array = false; + build_function_control_flow_graphs_and_analyze(); update_active_builtins(); analyze_image_and_sampler_usage(); diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 4f69c57150..3f8de118b1 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -280,6 +280,7 @@ string CompilerMSL::compile() struct_member_padding.clear(); + build_function_control_flow_graphs_and_analyze(); update_active_builtins(); analyze_image_and_sampler_usage(); build_implicit_builtins(); @@ -2109,6 +2110,11 @@ bool CompilerMSL::maybe_emit_array_assignment(uint32_t id_lhs, uint32_t id_rhs) return false; auto *var = maybe_get(id_lhs); + + // Is this a remapped, static constant? Don't do anything. + if (var->remapped_variable && var->statically_assigned) + return true; + if (ids[id_rhs].get_type() == TypeConstant && var && var->deferred_declaration) { // Special case, if we end up declaring a variable when assigning the constant array, @@ -4072,6 +4078,7 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o { // Get the result type of the RHS. Since this is run as a pre-processing stage, // we must extract the result type directly from the Instruction, rather than the ID. + uint32_t id_lhs = args[0]; uint32_t id_rhs = args[1]; const SPIRType *type = nullptr; @@ -4088,7 +4095,13 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o type = &compiler.get(tid); } - if (type && compiler.is_array(*type)) + auto *var = compiler.maybe_get(id_lhs); + + // Are we simply assigning to a statically assigned variable which takes a constant? + // Don't bother emitting this function. + bool static_expression_lhs = + var && var->storage == StorageClassFunction && var->statically_assigned && var->remapped_variable; + if (type && compiler.is_array(*type) && !static_expression_lhs) return SPVFuncImplArrayCopy; break; @@ -4209,7 +4222,7 @@ CompilerMSL::MemberSorter::MemberSorter(SPIRType &t, Meta &m, SortAspect sa) meta.members.resize(max(type.member_types.size(), meta.members.size())); } -void CompilerMSL::remap_constexpr_sampler(uint32_t id, const spirv_cross::MSLConstexprSampler &sampler) +void CompilerMSL::remap_constexpr_sampler(uint32_t id, const MSLConstexprSampler &sampler) { auto &type = get(get(id).basetype); if (type.basetype != SPIRType::SampledImage && type.basetype != SPIRType::Sampler) @@ -4220,10 +4233,22 @@ void CompilerMSL::remap_constexpr_sampler(uint32_t id, const spirv_cross::MSLCon } // MSL always declares builtins with their SPIR-V type. -void CompilerMSL::bitcast_from_builtin_load(uint32_t, std::string &, const spirv_cross::SPIRType &) +void CompilerMSL::bitcast_from_builtin_load(uint32_t, std::string &, const SPIRType &) { } -void CompilerMSL::bitcast_to_builtin_store(uint32_t, std::string &, const spirv_cross::SPIRType &) +void CompilerMSL::bitcast_to_builtin_store(uint32_t, std::string &, const SPIRType &) { } + +std::string CompilerMSL::to_initializer_expression(const SPIRVariable &var) +{ + // We risk getting an array initializer here with MSL. If we have an array. + // FIXME: We cannot handle non-constant arrays being initialized. + // We will need to inject spvArrayCopy here somehow ... + auto &type = get(var.basetype); + if (ids[var.initializer].get_type() == TypeConstant && (!type.array.empty() || type.basetype == SPIRType::Struct)) + return constant_expression(get(var.initializer)); + else + return CompilerGLSL::to_initializer_expression(var); +} diff --git a/spirv_msl.hpp b/spirv_msl.hpp index fe3401d91b..7997cd30fb 100644 --- a/spirv_msl.hpp +++ b/spirv_msl.hpp @@ -293,6 +293,7 @@ protected: uint32_t coord, uint32_t coord_components, uint32_t dref, uint32_t grad_x, uint32_t grad_y, uint32_t lod, uint32_t coffset, uint32_t offset, uint32_t bias, uint32_t comp, uint32_t sample, bool *p_forward) override; + std::string to_initializer_expression(const SPIRVariable &var) override; std::string unpack_expression_type(std::string expr_str, const SPIRType &type) override; std::string bitcast_glsl_op(const SPIRType &result_type, const SPIRType &argument_type) override; bool skip_argument(uint32_t id) const override;