diff --git a/reference/opt/shaders-hlsl/asm/frag/lut-promotion-initializer.asm.frag b/reference/opt/shaders-hlsl/asm/frag/lut-promotion-initializer.asm.frag new file mode 100644 index 0000000000..5deae3a569 --- /dev/null +++ b/reference/opt/shaders-hlsl/asm/frag/lut-promotion-initializer.asm.frag @@ -0,0 +1,57 @@ +static const float _46[16] = { 1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f }; +static const float4 _76[4] = { 0.0f.xxxx, 1.0f.xxxx, 8.0f.xxxx, 5.0f.xxxx }; +static const float4 _90[4] = { 20.0f.xxxx, 30.0f.xxxx, 50.0f.xxxx, 60.0f.xxxx }; + +static float FragColor; +static int index; + +struct SPIRV_Cross_Input +{ + nointerpolation int index : TEXCOORD0; +}; + +struct SPIRV_Cross_Output +{ + float FragColor : SV_Target0; +}; + +void frag_main() +{ + float4 foobar[4] = _76; + float4 baz[4] = _76; + FragColor = _46[index]; + if (index < 10) + { + FragColor += _46[index ^ 1]; + } + else + { + FragColor += _46[index & 1]; + } + bool _99 = index > 30; + if (_99) + { + FragColor += _76[index & 3].y; + } + else + { + FragColor += _76[index & 1].x; + } + if (_99) + { + foobar[1].z = 20.0f; + } + int _37 = index & 3; + FragColor += foobar[_37].z; + baz = _90; + FragColor += baz[_37].z; +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + index = stage_input.index; + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/opt/shaders-hlsl/frag/array-lut-no-loop-variable.frag b/reference/opt/shaders-hlsl/frag/array-lut-no-loop-variable.frag index 8cb52f0a4d..3adf7d9852 100644 --- a/reference/opt/shaders-hlsl/frag/array-lut-no-loop-variable.frag +++ b/reference/opt/shaders-hlsl/frag/array-lut-no-loop-variable.frag @@ -15,11 +15,10 @@ struct SPIRV_Cross_Output void frag_main() { - float lut[5] = _17; for (int _46 = 0; _46 < 4; ) { int _33 = _46 + 1; - FragColor += lut[_33].xxxx; + FragColor += _17[_33].xxxx; _46 = _33; continue; } diff --git a/reference/opt/shaders-hlsl/frag/constant-composites.frag b/reference/opt/shaders-hlsl/frag/constant-composites.frag index 0514eef1ee..2613e1c2c5 100644 --- a/reference/opt/shaders-hlsl/frag/constant-composites.frag +++ b/reference/opt/shaders-hlsl/frag/constant-composites.frag @@ -30,7 +30,7 @@ void frag_main() lut = _16; foos = _28; FragColor = lut[_line].xxxx; - FragColor += (foos[_line].a * (foos[1 - _line].a)).xxxx; + FragColor += (foos[_line].a * foos[1 - _line].a).xxxx; } SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) diff --git a/reference/opt/shaders-hlsl/frag/lut-promotion.frag b/reference/opt/shaders-hlsl/frag/lut-promotion.frag new file mode 100644 index 0000000000..aae0d39de2 --- /dev/null +++ b/reference/opt/shaders-hlsl/frag/lut-promotion.frag @@ -0,0 +1,57 @@ +static const float _16[16] = { 1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f }; +static const float4 _60[4] = { 0.0f.xxxx, 1.0f.xxxx, 8.0f.xxxx, 5.0f.xxxx }; +static const float4 _104[4] = { 20.0f.xxxx, 30.0f.xxxx, 50.0f.xxxx, 60.0f.xxxx }; + +static float FragColor; +static int index; + +struct SPIRV_Cross_Input +{ + nointerpolation int index : TEXCOORD0; +}; + +struct SPIRV_Cross_Output +{ + float FragColor : SV_Target0; +}; + +void frag_main() +{ + FragColor = _16[index]; + if (index < 10) + { + FragColor += _16[index ^ 1]; + } + else + { + FragColor += _16[index & 1]; + } + bool _63 = index > 30; + if (_63) + { + FragColor += _60[index & 3].y; + } + else + { + FragColor += _60[index & 1].x; + } + float4 foobar[4] = _60; + if (_63) + { + foobar[1].z = 20.0f; + } + int _91 = index & 3; + FragColor += foobar[_91].z; + float4 baz[4] = _60; + baz = _104; + FragColor += baz[_91].z; +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + index = stage_input.index; + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/opt/shaders-msl/asm/frag/lut-promotion-initializer.asm.frag b/reference/opt/shaders-msl/asm/frag/lut-promotion-initializer.asm.frag new file mode 100644 index 0000000000..9db6b5470d --- /dev/null +++ b/reference/opt/shaders-msl/asm/frag/lut-promotion-initializer.asm.frag @@ -0,0 +1,69 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +constant float _46[16] = {1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0}; +constant float4 _76[4] = {float4(0.0), float4(1.0), float4(8.0), float4(5.0)}; +constant float4 _90[4] = {float4(20.0), float4(30.0), float4(50.0), float4(60.0)}; + +struct main0_out +{ + float FragColor [[color(0)]]; +}; + +struct main0_in +{ + int index [[user(locn0)]]; +}; + +// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment. +template +void spvArrayCopy(thread T (&dst)[N], thread const T (&src)[N]) +{ + for (uint i = 0; i < N; dst[i] = src[i], i++); +} + +// An overload for constant arrays. +template +void spvArrayCopyConstant(thread T (&dst)[N], constant T (&src)[N]) +{ + for (uint i = 0; i < N; dst[i] = src[i], i++); +} + +fragment main0_out main0(main0_in in [[stage_in]]) +{ + float4 foobar[4] = {float4(0.0), float4(1.0), float4(8.0), float4(5.0)}; + float4 baz[4] = {float4(0.0), float4(1.0), float4(8.0), float4(5.0)}; + main0_out out = {}; + out.FragColor = _46[in.index]; + if (in.index < 10) + { + out.FragColor += _46[in.index ^ 1]; + } + else + { + out.FragColor += _46[in.index & 1]; + } + bool _99 = in.index > 30; + if (_99) + { + out.FragColor += _76[in.index & 3].y; + } + else + { + out.FragColor += _76[in.index & 1].x; + } + if (_99) + { + foobar[1].z = 20.0; + } + int _37 = in.index & 3; + out.FragColor += foobar[_37].z; + spvArrayCopyConstant(baz, _90); + out.FragColor += baz[_37].z; + return out; +} + diff --git a/reference/opt/shaders-msl/frag/array-lut-no-loop-variable.frag b/reference/opt/shaders-msl/frag/array-lut-no-loop-variable.frag index 6c8299fa91..60868acd87 100644 --- a/reference/opt/shaders-msl/frag/array-lut-no-loop-variable.frag +++ b/reference/opt/shaders-msl/frag/array-lut-no-loop-variable.frag @@ -1,5 +1,3 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" - #include #include @@ -12,28 +10,13 @@ struct main0_out float4 FragColor [[color(0)]]; }; -// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment. -template -void spvArrayCopy(thread T (&dst)[N], thread const T (&src)[N]) -{ - for (uint i = 0; i < N; dst[i] = src[i], i++); -} - -// An overload for constant arrays. -template -void spvArrayCopyConstant(thread T (&dst)[N], constant T (&src)[N]) -{ - for (uint i = 0; i < N; dst[i] = src[i], i++); -} - fragment main0_out main0() { main0_out out = {}; - float lut[5] = {1.0, 2.0, 3.0, 4.0, 5.0}; for (int _46 = 0; _46 < 4; ) { int _33 = _46 + 1; - out.FragColor += float4(lut[_33]); + out.FragColor += float4(_17[_33]); _46 = _33; continue; } diff --git a/reference/opt/shaders-msl/frag/constant-array.frag b/reference/opt/shaders-msl/frag/constant-array.frag index 63576f109c..bb55934c6d 100644 --- a/reference/opt/shaders-msl/frag/constant-array.frag +++ b/reference/opt/shaders-msl/frag/constant-array.frag @@ -44,10 +44,8 @@ void spvArrayCopyConstant(thread T (&dst)[N], constant T (&src)[N]) fragment main0_out main0(main0_in in [[stage_in]]) { main0_out out = {}; - float4 indexable[3] = {float4(1.0), float4(2.0), float4(3.0)}; - float4 indexable_1[2][2] = {{float4(1.0), float4(2.0)}, {float4(8.0), float4(10.0)}}; - Foobar indexable_2[2] = {{10.0, 40.0}, {90.0, 70.0}}; - out.FragColor = ((indexable[in.index] + (indexable_1[in.index][in.index + 1])) + float4(30.0)) + float4(indexable_2[in.index].a + indexable_2[in.index].b); + Foobar indexable[2] = {{10.0, 40.0}, {90.0, 70.0}}; + out.FragColor = ((_37[in.index] + _55[in.index][in.index + 1]) + float4(30.0)) + float4(indexable[in.index].a + indexable[in.index].b); return out; } diff --git a/reference/opt/shaders-msl/frag/constant-composites.frag b/reference/opt/shaders-msl/frag/constant-composites.frag index ec5d66e86d..cb3e592337 100644 --- a/reference/opt/shaders-msl/frag/constant-composites.frag +++ b/reference/opt/shaders-msl/frag/constant-composites.frag @@ -44,7 +44,7 @@ fragment main0_out main0(main0_in in [[stage_in]]) float lut[4] = {1.0, 4.0, 3.0, 2.0}; Foo foos[2] = {{10.0, 20.0}, {30.0, 40.0}}; out.FragColor = float4(lut[in.line]); - out.FragColor += float4(foos[in.line].a * (foos[1 - in.line].a)); + out.FragColor += float4(foos[in.line].a * foos[1 - in.line].a); return out; } diff --git a/reference/opt/shaders-msl/frag/lut-promotion.frag b/reference/opt/shaders-msl/frag/lut-promotion.frag new file mode 100644 index 0000000000..4efdf4a196 --- /dev/null +++ b/reference/opt/shaders-msl/frag/lut-promotion.frag @@ -0,0 +1,69 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +constant float _16[16] = {1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0}; +constant float4 _60[4] = {float4(0.0), float4(1.0), float4(8.0), float4(5.0)}; +constant float4 _104[4] = {float4(20.0), float4(30.0), float4(50.0), float4(60.0)}; + +struct main0_out +{ + float FragColor [[color(0)]]; +}; + +struct main0_in +{ + int index [[user(locn0)]]; +}; + +// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment. +template +void spvArrayCopy(thread T (&dst)[N], thread const T (&src)[N]) +{ + for (uint i = 0; i < N; dst[i] = src[i], i++); +} + +// An overload for constant arrays. +template +void spvArrayCopyConstant(thread T (&dst)[N], constant T (&src)[N]) +{ + for (uint i = 0; i < N; dst[i] = src[i], i++); +} + +fragment main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + out.FragColor = _16[in.index]; + if (in.index < 10) + { + out.FragColor += _16[in.index ^ 1]; + } + else + { + out.FragColor += _16[in.index & 1]; + } + bool _63 = in.index > 30; + if (_63) + { + out.FragColor += _60[in.index & 3].y; + } + else + { + out.FragColor += _60[in.index & 1].x; + } + float4 foobar[4] = {float4(0.0), float4(1.0), float4(8.0), float4(5.0)}; + if (_63) + { + foobar[1].z = 20.0; + } + int _91 = in.index & 3; + out.FragColor += foobar[_91].z; + float4 baz[4] = {float4(0.0), float4(1.0), float4(8.0), float4(5.0)}; + spvArrayCopyConstant(baz, _104); + out.FragColor += baz[_91].z; + return out; +} + diff --git a/reference/opt/shaders/asm/frag/lut-promotion-initializer.asm.frag b/reference/opt/shaders/asm/frag/lut-promotion-initializer.asm.frag new file mode 100644 index 0000000000..d88c0e36d9 --- /dev/null +++ b/reference/opt/shaders/asm/frag/lut-promotion-initializer.asm.frag @@ -0,0 +1,42 @@ +#version 310 es +precision mediump float; +precision highp int; + +const float _46[16] = float[](1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0); +const vec4 _76[4] = vec4[](vec4(0.0), vec4(1.0), vec4(8.0), vec4(5.0)); + +layout(location = 0) out float FragColor; +layout(location = 0) flat in mediump int index; + +void main() +{ + vec4 foobar[4] = _76; + vec4 baz[4] = _76; + FragColor = _46[index]; + if (index < 10) + { + FragColor += _46[index ^ 1]; + } + else + { + FragColor += _46[index & 1]; + } + bool _99 = index > 30; + if (_99) + { + FragColor += _76[index & 3].y; + } + else + { + FragColor += _76[index & 1].x; + } + if (_99) + { + foobar[1].z = 20.0; + } + mediump int _37 = index & 3; + FragColor += foobar[_37].z; + baz = vec4[](vec4(20.0), vec4(30.0), vec4(50.0), vec4(60.0)); + FragColor += baz[_37].z; +} + diff --git a/reference/opt/shaders/comp/generate_height.comp b/reference/opt/shaders/comp/generate_height.comp index 17d3073d2e..ff96e7505a 100644 --- a/reference/opt/shaders/comp/generate_height.comp +++ b/reference/opt/shaders/comp/generate_height.comp @@ -48,8 +48,8 @@ void main() vec2 _387 = _316.xx; vec2 _392 = _316.yy; vec2 _395 = _392 * _137.distribution[_280].yx; - vec2 _421 = _392 * (_137.distribution[(_476 * _448) + _475]).yx; - vec2 _429 = ((_137.distribution[(_476 * _448) + _475]) * _387) + vec2(-_421.x, _421.y); + vec2 _421 = _392 * _137.distribution[(_476 * _448) + _475].yx; + vec2 _429 = (_137.distribution[(_476 * _448) + _475] * _387) + vec2(-_421.x, _421.y); _225.heights[_280] = packHalf2x16(((_137.distribution[_280] * _387) + vec2(-_395.x, _395.y)) + vec2(_429.x, -_429.y)); } diff --git a/reference/opt/shaders/flatten/copy.flatten.vert b/reference/opt/shaders/flatten/copy.flatten.vert index 27ce450d62..33caec4f78 100644 --- a/reference/opt/shaders/flatten/copy.flatten.vert +++ b/reference/opt/shaders/flatten/copy.flatten.vert @@ -19,7 +19,7 @@ void main() for (int _96 = 0; _96 < 4; ) { vec3 _68 = aVertex.xyz - Light(UBO[_96 * 2 + 4].xyz, UBO[_96 * 2 + 4].w, UBO[_96 * 2 + 5]).Position; - vColor += (((UBO[_96 * 2 + 5]) * clamp(1.0 - (length(_68) / Light(UBO[_96 * 2 + 4].xyz, UBO[_96 * 2 + 4].w, UBO[_96 * 2 + 5]).Radius), 0.0, 1.0)) * dot(aNormal, normalize(_68))); + vColor += ((UBO[_96 * 2 + 5] * clamp(1.0 - (length(_68) / Light(UBO[_96 * 2 + 4].xyz, UBO[_96 * 2 + 4].w, UBO[_96 * 2 + 5]).Radius), 0.0, 1.0)) * dot(aNormal, normalize(_68))); _96++; continue; } diff --git a/reference/opt/shaders/flatten/dynamic.flatten.vert b/reference/opt/shaders/flatten/dynamic.flatten.vert index 8fc8ff6eed..7129af2869 100644 --- a/reference/opt/shaders/flatten/dynamic.flatten.vert +++ b/reference/opt/shaders/flatten/dynamic.flatten.vert @@ -18,8 +18,8 @@ void main() vColor = vec4(0.0); for (int _82 = 0; _82 < 4; ) { - vec3 _54 = aVertex.xyz - (UBO[_82 * 2 + 4].xyz); - vColor += (((UBO[_82 * 2 + 5]) * clamp(1.0 - (length(_54) / (UBO[_82 * 2 + 4].w)), 0.0, 1.0)) * dot(aNormal, normalize(_54))); + vec3 _54 = aVertex.xyz - UBO[_82 * 2 + 4].xyz; + vColor += ((UBO[_82 * 2 + 5] * clamp(1.0 - (length(_54) / UBO[_82 * 2 + 4].w), 0.0, 1.0)) * dot(aNormal, normalize(_54))); _82++; continue; } diff --git a/reference/opt/shaders/flatten/multi-dimensional.desktop.flatten_dim.frag b/reference/opt/shaders/flatten/multi-dimensional.desktop.flatten_dim.frag index 3657298f18..2ecee89377 100644 --- a/reference/opt/shaders/flatten/multi-dimensional.desktop.flatten_dim.frag +++ b/reference/opt/shaders/flatten/multi-dimensional.desktop.flatten_dim.frag @@ -25,6 +25,6 @@ void main() } } } - FragColor = ((values3[1 * 3 * 1 + 2 * 1 + 0]) + (values3[0 * 3 * 1 + 2 * 1 + 0])) + (values3[(vIndex + 1) * 3 * 1 + 2 * 1 + vIndex]); + FragColor = (values3[1 * 3 * 1 + 2 * 1 + 0] + values3[0 * 3 * 1 + 2 * 1 + 0]) + values3[(vIndex + 1) * 3 * 1 + 2 * 1 + vIndex]; } diff --git a/reference/opt/shaders/frag/array-lut-no-loop-variable.frag b/reference/opt/shaders/frag/array-lut-no-loop-variable.frag index 7bdfe543e4..869a76e4e7 100644 --- a/reference/opt/shaders/frag/array-lut-no-loop-variable.frag +++ b/reference/opt/shaders/frag/array-lut-no-loop-variable.frag @@ -2,15 +2,16 @@ precision mediump float; precision highp int; +const float _17[5] = float[](1.0, 2.0, 3.0, 4.0, 5.0); + layout(location = 0) out vec4 FragColor; void main() { - float lut[5] = float[](1.0, 2.0, 3.0, 4.0, 5.0); for (int _46 = 0; _46 < 4; ) { mediump int _33 = _46 + 1; - FragColor += vec4(lut[_33]); + FragColor += vec4(_17[_33]); _46 = _33; continue; } diff --git a/reference/opt/shaders/frag/constant-array.frag b/reference/opt/shaders/frag/constant-array.frag index 2af87ad80d..749fc80980 100644 --- a/reference/opt/shaders/frag/constant-array.frag +++ b/reference/opt/shaders/frag/constant-array.frag @@ -2,6 +2,9 @@ precision mediump float; precision highp int; +const vec4 _37[3] = vec4[](vec4(1.0), vec4(2.0), vec4(3.0)); +const vec4 _55[2][2] = vec4[][](vec4[](vec4(1.0), vec4(2.0)), vec4[](vec4(8.0), vec4(10.0))); + struct Foobar { float a; @@ -13,9 +16,7 @@ layout(location = 0) flat in mediump int index; void main() { - highp vec4 indexable[3] = vec4[](vec4(1.0), vec4(2.0), vec4(3.0)); - highp vec4 indexable_1[2][2] = vec4[][](vec4[](vec4(1.0), vec4(2.0)), vec4[](vec4(8.0), vec4(10.0))); - Foobar indexable_2[2] = Foobar[](Foobar(10.0, 40.0), Foobar(90.0, 70.0)); - FragColor = ((indexable[index] + (indexable_1[index][index + 1])) + vec4(30.0)) + vec4(indexable_2[index].a + indexable_2[index].b); + Foobar indexable[2] = Foobar[](Foobar(10.0, 40.0), Foobar(90.0, 70.0)); + FragColor = ((_37[index] + _55[index][index + 1]) + vec4(30.0)) + vec4(indexable[index].a + indexable[index].b); } diff --git a/reference/opt/shaders/frag/constant-composites.frag b/reference/opt/shaders/frag/constant-composites.frag index b105dbd26c..c65c60613d 100644 --- a/reference/opt/shaders/frag/constant-composites.frag +++ b/reference/opt/shaders/frag/constant-composites.frag @@ -18,6 +18,6 @@ void main() lut = float[](1.0, 4.0, 3.0, 2.0); foos = Foo[](Foo(10.0, 20.0), Foo(30.0, 40.0)); FragColor = vec4(lut[line]); - FragColor += vec4(foos[line].a * (foos[1 - line].a)); + FragColor += vec4(foos[line].a * foos[1 - line].a); } diff --git a/reference/opt/shaders/frag/lut-promotion.frag b/reference/opt/shaders/frag/lut-promotion.frag new file mode 100644 index 0000000000..21c925796e --- /dev/null +++ b/reference/opt/shaders/frag/lut-promotion.frag @@ -0,0 +1,42 @@ +#version 310 es +precision mediump float; +precision highp int; + +const float _16[16] = float[](1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0); +const vec4 _60[4] = vec4[](vec4(0.0), vec4(1.0), vec4(8.0), vec4(5.0)); + +layout(location = 0) out float FragColor; +layout(location = 0) flat in mediump int index; + +void main() +{ + FragColor = _16[index]; + if (index < 10) + { + FragColor += _16[index ^ 1]; + } + else + { + FragColor += _16[index & 1]; + } + bool _63 = index > 30; + if (_63) + { + FragColor += _60[index & 3].y; + } + else + { + FragColor += _60[index & 1].x; + } + vec4 foobar[4] = _60; + if (_63) + { + foobar[1].z = 20.0; + } + mediump int _91 = index & 3; + FragColor += foobar[_91].z; + vec4 baz[4] = _60; + baz = vec4[](vec4(20.0), vec4(30.0), vec4(50.0), vec4(60.0)); + FragColor += baz[_91].z; +} + diff --git a/reference/shaders-hlsl/asm/frag/lut-promotion-initializer.asm.frag b/reference/shaders-hlsl/asm/frag/lut-promotion-initializer.asm.frag new file mode 100644 index 0000000000..a7aec01bad --- /dev/null +++ b/reference/shaders-hlsl/asm/frag/lut-promotion-initializer.asm.frag @@ -0,0 +1,55 @@ +static const float _46[16] = { 1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f }; +static const float4 _76[4] = { 0.0f.xxxx, 1.0f.xxxx, 8.0f.xxxx, 5.0f.xxxx }; +static const float4 _90[4] = { 20.0f.xxxx, 30.0f.xxxx, 50.0f.xxxx, 60.0f.xxxx }; + +static float FragColor; +static int index; + +struct SPIRV_Cross_Input +{ + nointerpolation int index : TEXCOORD0; +}; + +struct SPIRV_Cross_Output +{ + float FragColor : SV_Target0; +}; + +void frag_main() +{ + float4 foobar[4] = _76; + float4 baz[4] = _76; + FragColor = _46[index]; + if (index < 10) + { + FragColor += _46[index ^ 1]; + } + else + { + FragColor += _46[index & 1]; + } + if (index > 30) + { + FragColor += _76[index & 3].y; + } + else + { + FragColor += _76[index & 1].x; + } + if (index > 30) + { + foobar[1].z = 20.0f; + } + FragColor += foobar[index & 3].z; + baz = _90; + FragColor += baz[index & 3].z; +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + index = stage_input.index; + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/shaders-hlsl/frag/array-lut-no-loop-variable.frag b/reference/shaders-hlsl/frag/array-lut-no-loop-variable.frag index 04d4d7fa75..407fa2bda4 100644 --- a/reference/shaders-hlsl/frag/array-lut-no-loop-variable.frag +++ b/reference/shaders-hlsl/frag/array-lut-no-loop-variable.frag @@ -15,8 +15,7 @@ struct SPIRV_Cross_Output void frag_main() { - float lut[5] = _17; - for (int i = 0; i < 4; i++, FragColor += lut[i].xxxx) + for (int i = 0; i < 4; i++, FragColor += _17[i].xxxx) { } } diff --git a/reference/shaders-hlsl/frag/constant-composites.frag b/reference/shaders-hlsl/frag/constant-composites.frag index 0514eef1ee..2613e1c2c5 100644 --- a/reference/shaders-hlsl/frag/constant-composites.frag +++ b/reference/shaders-hlsl/frag/constant-composites.frag @@ -30,7 +30,7 @@ void frag_main() lut = _16; foos = _28; FragColor = lut[_line].xxxx; - FragColor += (foos[_line].a * (foos[1 - _line].a)).xxxx; + FragColor += (foos[_line].a * foos[1 - _line].a).xxxx; } SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) diff --git a/reference/shaders-hlsl/frag/lut-promotion.frag b/reference/shaders-hlsl/frag/lut-promotion.frag new file mode 100644 index 0000000000..d148bc12cb --- /dev/null +++ b/reference/shaders-hlsl/frag/lut-promotion.frag @@ -0,0 +1,55 @@ +static const float _16[16] = { 1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f }; +static const float4 _60[4] = { 0.0f.xxxx, 1.0f.xxxx, 8.0f.xxxx, 5.0f.xxxx }; +static const float4 _104[4] = { 20.0f.xxxx, 30.0f.xxxx, 50.0f.xxxx, 60.0f.xxxx }; + +static float FragColor; +static int index; + +struct SPIRV_Cross_Input +{ + nointerpolation int index : TEXCOORD0; +}; + +struct SPIRV_Cross_Output +{ + float FragColor : SV_Target0; +}; + +void frag_main() +{ + FragColor = _16[index]; + if (index < 10) + { + FragColor += _16[index ^ 1]; + } + else + { + FragColor += _16[index & 1]; + } + if (index > 30) + { + FragColor += _60[index & 3].y; + } + else + { + FragColor += _60[index & 1].x; + } + float4 foobar[4] = _60; + if (index > 30) + { + foobar[1].z = 20.0f; + } + FragColor += foobar[index & 3].z; + float4 baz[4] = _60; + baz = _104; + FragColor += baz[index & 3].z; +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + index = stage_input.index; + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/shaders-msl/asm/frag/lut-promotion-initializer.asm.frag b/reference/shaders-msl/asm/frag/lut-promotion-initializer.asm.frag new file mode 100644 index 0000000000..72d41f3e70 --- /dev/null +++ b/reference/shaders-msl/asm/frag/lut-promotion-initializer.asm.frag @@ -0,0 +1,67 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +constant float _46[16] = {1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0}; +constant float4 _76[4] = {float4(0.0), float4(1.0), float4(8.0), float4(5.0)}; +constant float4 _90[4] = {float4(20.0), float4(30.0), float4(50.0), float4(60.0)}; + +struct main0_out +{ + float FragColor [[color(0)]]; +}; + +struct main0_in +{ + int index [[user(locn0)]]; +}; + +// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment. +template +void spvArrayCopy(thread T (&dst)[N], thread const T (&src)[N]) +{ + for (uint i = 0; i < N; dst[i] = src[i], i++); +} + +// An overload for constant arrays. +template +void spvArrayCopyConstant(thread T (&dst)[N], constant T (&src)[N]) +{ + for (uint i = 0; i < N; dst[i] = src[i], i++); +} + +fragment main0_out main0(main0_in in [[stage_in]]) +{ + float4 foobar[4] = {float4(0.0), float4(1.0), float4(8.0), float4(5.0)}; + float4 baz[4] = {float4(0.0), float4(1.0), float4(8.0), float4(5.0)}; + main0_out out = {}; + out.FragColor = _46[in.index]; + if (in.index < 10) + { + out.FragColor += _46[in.index ^ 1]; + } + else + { + out.FragColor += _46[in.index & 1]; + } + if (in.index > 30) + { + out.FragColor += _76[in.index & 3].y; + } + else + { + out.FragColor += _76[in.index & 1].x; + } + if (in.index > 30) + { + foobar[1].z = 20.0; + } + out.FragColor += foobar[in.index & 3].z; + spvArrayCopyConstant(baz, _90); + out.FragColor += baz[in.index & 3].z; + return out; +} + diff --git a/reference/shaders-msl/asm/frag/op-constant-null.asm.frag b/reference/shaders-msl/asm/frag/op-constant-null.asm.frag index 1d9d11c978..670cb5f371 100644 --- a/reference/shaders-msl/asm/frag/op-constant-null.asm.frag +++ b/reference/shaders-msl/asm/frag/op-constant-null.asm.frag @@ -1,5 +1,3 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" - #include #include @@ -18,20 +16,6 @@ struct main0_out float FragColor [[color(0)]]; }; -// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment. -template -void spvArrayCopy(thread T (&dst)[N], thread const T (&src)[N]) -{ - for (uint i = 0; i < N; dst[i] = src[i], i++); -} - -// An overload for constant arrays. -template -void spvArrayCopyConstant(thread T (&dst)[N], constant T (&src)[N]) -{ - for (uint i = 0; i < N; dst[i] = src[i], i++); -} - fragment main0_out main0() { main0_out out = {}; @@ -39,7 +23,6 @@ fragment main0_out main0() float4 b = float4(0.0); float2x3 c = float2x3(float3(0.0), float3(0.0)); D d = {float4(0.0), 0.0}; - float4 e[4] = {float4(0.0), float4(0.0), float4(0.0), float4(0.0)}; out.FragColor = a; return out; } diff --git a/reference/shaders-msl/comp/composite-construct.comp b/reference/shaders-msl/comp/composite-construct.comp index fb4ed1f101..ef0412b1dc 100644 --- a/reference/shaders-msl/comp/composite-construct.comp +++ b/reference/shaders-msl/comp/composite-construct.comp @@ -42,8 +42,7 @@ kernel void main0(device SSBO0& _16 [[buffer(0)]], device SSBO1& _32 [[buffer(1) float4 _37[2] = { _16.as[gl_GlobalInvocationID.x], _32.bs[gl_GlobalInvocationID.x] }; float4 values[2]; spvArrayCopy(values, _37); - float4 copy_values[2] = {float4(20.0), float4(40.0)}; - Composite c = Composite{ values[0], copy_values[1] }; + Composite c = Composite{ values[0], _43[1] }; _16.as[0] = values[gl_LocalInvocationIndex]; _32.bs[1] = c.b; } diff --git a/reference/shaders-msl/frag/array-lut-no-loop-variable.frag b/reference/shaders-msl/frag/array-lut-no-loop-variable.frag index 8943a8e5a1..37f83aae26 100644 --- a/reference/shaders-msl/frag/array-lut-no-loop-variable.frag +++ b/reference/shaders-msl/frag/array-lut-no-loop-variable.frag @@ -1,5 +1,3 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" - #include #include @@ -12,25 +10,10 @@ struct main0_out float4 FragColor [[color(0)]]; }; -// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment. -template -void spvArrayCopy(thread T (&dst)[N], thread const T (&src)[N]) -{ - for (uint i = 0; i < N; dst[i] = src[i], i++); -} - -// An overload for constant arrays. -template -void spvArrayCopyConstant(thread T (&dst)[N], constant T (&src)[N]) -{ - for (uint i = 0; i < N; dst[i] = src[i], i++); -} - fragment main0_out main0() { main0_out out = {}; - float lut[5] = {1.0, 2.0, 3.0, 4.0, 5.0}; - for (int i = 0; i < 4; i++, out.FragColor += float4(lut[i])) + for (int i = 0; i < 4; i++, out.FragColor += float4(_17[i])) { } return out; diff --git a/reference/shaders-msl/frag/constant-array.frag b/reference/shaders-msl/frag/constant-array.frag index 09f123b29a..212d4f0803 100644 --- a/reference/shaders-msl/frag/constant-array.frag +++ b/reference/shaders-msl/frag/constant-array.frag @@ -49,12 +49,10 @@ float4 resolve(thread const Foobar& f) fragment main0_out main0(main0_in in [[stage_in]]) { main0_out out = {}; - float4 indexable[3] = {float4(1.0), float4(2.0), float4(3.0)}; - float4 indexable_1[2][2] = {{float4(1.0), float4(2.0)}, {float4(8.0), float4(10.0)}}; Foobar param = {10.0, 20.0}; - Foobar indexable_2[2] = {{10.0, 40.0}, {90.0, 70.0}}; - Foobar param_1 = indexable_2[in.index]; - out.FragColor = ((indexable[in.index] + (indexable_1[in.index][in.index + 1])) + resolve(param)) + resolve(param_1); + Foobar indexable[2] = {{10.0, 40.0}, {90.0, 70.0}}; + Foobar param_1 = indexable[in.index]; + out.FragColor = ((_37[in.index] + _55[in.index][in.index + 1]) + resolve(param)) + resolve(param_1); return out; } diff --git a/reference/shaders-msl/frag/constant-composites.frag b/reference/shaders-msl/frag/constant-composites.frag index ec5d66e86d..cb3e592337 100644 --- a/reference/shaders-msl/frag/constant-composites.frag +++ b/reference/shaders-msl/frag/constant-composites.frag @@ -44,7 +44,7 @@ fragment main0_out main0(main0_in in [[stage_in]]) float lut[4] = {1.0, 4.0, 3.0, 2.0}; Foo foos[2] = {{10.0, 20.0}, {30.0, 40.0}}; out.FragColor = float4(lut[in.line]); - out.FragColor += float4(foos[in.line].a * (foos[1 - in.line].a)); + out.FragColor += float4(foos[in.line].a * foos[1 - in.line].a); return out; } diff --git a/reference/shaders-msl/frag/lut-promotion.frag b/reference/shaders-msl/frag/lut-promotion.frag new file mode 100644 index 0000000000..a8eda3e13a --- /dev/null +++ b/reference/shaders-msl/frag/lut-promotion.frag @@ -0,0 +1,67 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +constant float _16[16] = {1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0}; +constant float4 _60[4] = {float4(0.0), float4(1.0), float4(8.0), float4(5.0)}; +constant float4 _104[4] = {float4(20.0), float4(30.0), float4(50.0), float4(60.0)}; + +struct main0_out +{ + float FragColor [[color(0)]]; +}; + +struct main0_in +{ + int index [[user(locn0)]]; +}; + +// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment. +template +void spvArrayCopy(thread T (&dst)[N], thread const T (&src)[N]) +{ + for (uint i = 0; i < N; dst[i] = src[i], i++); +} + +// An overload for constant arrays. +template +void spvArrayCopyConstant(thread T (&dst)[N], constant T (&src)[N]) +{ + for (uint i = 0; i < N; dst[i] = src[i], i++); +} + +fragment main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + out.FragColor = _16[in.index]; + if (in.index < 10) + { + out.FragColor += _16[in.index ^ 1]; + } + else + { + out.FragColor += _16[in.index & 1]; + } + if (in.index > 30) + { + out.FragColor += _60[in.index & 3].y; + } + else + { + out.FragColor += _60[in.index & 1].x; + } + float4 foobar[4] = {float4(0.0), float4(1.0), float4(8.0), float4(5.0)}; + if (in.index > 30) + { + foobar[1].z = 20.0; + } + out.FragColor += foobar[in.index & 3].z; + float4 baz[4] = {float4(0.0), float4(1.0), float4(8.0), float4(5.0)}; + spvArrayCopyConstant(baz, _104); + out.FragColor += baz[in.index & 3].z; + return out; +} + diff --git a/reference/shaders/asm/frag/lut-promotion-initializer.asm.frag b/reference/shaders/asm/frag/lut-promotion-initializer.asm.frag new file mode 100644 index 0000000000..c08bc2c781 --- /dev/null +++ b/reference/shaders/asm/frag/lut-promotion-initializer.asm.frag @@ -0,0 +1,40 @@ +#version 310 es +precision mediump float; +precision highp int; + +const float _46[16] = float[](1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0); +const vec4 _76[4] = vec4[](vec4(0.0), vec4(1.0), vec4(8.0), vec4(5.0)); + +layout(location = 0) out float FragColor; +layout(location = 0) flat in mediump int index; + +void main() +{ + vec4 foobar[4] = _76; + vec4 baz[4] = _76; + FragColor = _46[index]; + if (index < 10) + { + FragColor += _46[index ^ 1]; + } + else + { + FragColor += _46[index & 1]; + } + if (index > 30) + { + FragColor += _76[index & 3].y; + } + else + { + FragColor += _76[index & 1].x; + } + if (index > 30) + { + foobar[1].z = 20.0; + } + FragColor += foobar[index & 3].z; + baz = vec4[](vec4(20.0), vec4(30.0), vec4(50.0), vec4(60.0)); + FragColor += baz[index & 3].z; +} + diff --git a/reference/shaders/asm/frag/op-constant-null.asm.frag b/reference/shaders/asm/frag/op-constant-null.asm.frag index c4ae981f64..970b4c4a66 100644 --- a/reference/shaders/asm/frag/op-constant-null.asm.frag +++ b/reference/shaders/asm/frag/op-constant-null.asm.frag @@ -2,6 +2,8 @@ precision mediump float; precision highp int; +const vec4 _14[4] = vec4[](vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0)); + struct D { vec4 a; @@ -16,7 +18,6 @@ void main() vec4 b = vec4(0.0); mat2x3 c = mat2x3(vec3(0.0), vec3(0.0)); D d = D(vec4(0.0), 0.0); - vec4 e[4] = vec4[](vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0)); FragColor = a; } diff --git a/reference/shaders/comp/composite-construct.comp b/reference/shaders/comp/composite-construct.comp index 91bb5348f5..3018be8f1b 100644 --- a/reference/shaders/comp/composite-construct.comp +++ b/reference/shaders/comp/composite-construct.comp @@ -1,6 +1,9 @@ #version 310 es layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; +const vec4 _66[2] = vec4[](vec4(10.0), vec4(30.0)); +const float _94[2][3] = float[][](float[](1.0, 1.0, 1.0), float[](2.0, 2.0, 2.0)); + struct Composite { vec4 a[2]; @@ -25,13 +28,11 @@ vec4 summe(vec4 values[3][2]) void main() { vec4 values[2] = vec4[](_41.as[gl_GlobalInvocationID.x], _55.bs[gl_GlobalInvocationID.x]); - vec4 const_values[2] = vec4[](vec4(10.0), vec4(30.0)); - vec4 copy_values[2] = const_values; + vec4 copy_values[2] = _66; vec4 copy_values2[2] = values; vec4 param[3][2] = vec4[][](values, copy_values, copy_values2); _41.as[gl_GlobalInvocationID.x] = summe(param); Composite c = Composite(values, copy_values); - float arrayofarray[2][3] = float[][](float[](1.0, 1.0, 1.0), float[](2.0, 2.0, 2.0)); float b = 10.0; float values_scalar[4] = float[](b, b, b, b); } diff --git a/reference/shaders/flatten/copy.flatten.vert b/reference/shaders/flatten/copy.flatten.vert index 2bdd723886..6416a4f425 100644 --- a/reference/shaders/flatten/copy.flatten.vert +++ b/reference/shaders/flatten/copy.flatten.vert @@ -23,7 +23,7 @@ void main() light.Radius = Light(UBO[i * 2 + 4].xyz, UBO[i * 2 + 4].w, UBO[i * 2 + 5]).Radius; light.Color = Light(UBO[i * 2 + 4].xyz, UBO[i * 2 + 4].w, UBO[i * 2 + 5]).Color; vec3 L = aVertex.xyz - light.Position; - vColor += (((UBO[i * 2 + 5]) * clamp(1.0 - (length(L) / light.Radius), 0.0, 1.0)) * dot(aNormal, normalize(L))); + vColor += ((UBO[i * 2 + 5] * clamp(1.0 - (length(L) / light.Radius), 0.0, 1.0)) * dot(aNormal, normalize(L))); } } diff --git a/reference/shaders/flatten/dynamic.flatten.vert b/reference/shaders/flatten/dynamic.flatten.vert index 6214ca450a..8be397ea3f 100644 --- a/reference/shaders/flatten/dynamic.flatten.vert +++ b/reference/shaders/flatten/dynamic.flatten.vert @@ -18,8 +18,8 @@ void main() vColor = vec4(0.0); for (int i = 0; i < 4; i++) { - vec3 L = aVertex.xyz - (UBO[i * 2 + 4].xyz); - vColor += (((UBO[i * 2 + 5]) * clamp(1.0 - (length(L) / (UBO[i * 2 + 4].w)), 0.0, 1.0)) * dot(aNormal, normalize(L))); + vec3 L = aVertex.xyz - UBO[i * 2 + 4].xyz; + vColor += ((UBO[i * 2 + 5] * clamp(1.0 - (length(L) / UBO[i * 2 + 4].w), 0.0, 1.0)) * dot(aNormal, normalize(L))); } } diff --git a/reference/shaders/flatten/multi-dimensional.desktop.flatten_dim.frag b/reference/shaders/flatten/multi-dimensional.desktop.flatten_dim.frag index 21c3363ca6..ef6bb526ab 100644 --- a/reference/shaders/flatten/multi-dimensional.desktop.flatten_dim.frag +++ b/reference/shaders/flatten/multi-dimensional.desktop.flatten_dim.frag @@ -19,6 +19,6 @@ void main() } } } - FragColor = ((values3[1 * 3 * 1 + 2 * 1 + 0]) + (values3[0 * 3 * 1 + 2 * 1 + 0])) + (values3[(vIndex + 1) * 3 * 1 + 2 * 1 + vIndex]); + FragColor = (values3[1 * 3 * 1 + 2 * 1 + 0] + values3[0 * 3 * 1 + 2 * 1 + 0]) + values3[(vIndex + 1) * 3 * 1 + 2 * 1 + vIndex]; } diff --git a/reference/shaders/frag/array-lut-no-loop-variable.frag b/reference/shaders/frag/array-lut-no-loop-variable.frag index 54d7bf774c..baf2302519 100644 --- a/reference/shaders/frag/array-lut-no-loop-variable.frag +++ b/reference/shaders/frag/array-lut-no-loop-variable.frag @@ -2,12 +2,13 @@ precision mediump float; precision highp int; +const float _17[5] = float[](1.0, 2.0, 3.0, 4.0, 5.0); + layout(location = 0) out vec4 FragColor; void main() { - float lut[5] = float[](1.0, 2.0, 3.0, 4.0, 5.0); - for (mediump int i = 0; i < 4; i++, FragColor += vec4(lut[i])) + for (mediump int i = 0; i < 4; i++, FragColor += vec4(_17[i])) { } } diff --git a/reference/shaders/frag/constant-array.frag b/reference/shaders/frag/constant-array.frag index 4da9b8948b..be033f3873 100644 --- a/reference/shaders/frag/constant-array.frag +++ b/reference/shaders/frag/constant-array.frag @@ -2,6 +2,9 @@ precision mediump float; precision highp int; +const vec4 _37[3] = vec4[](vec4(1.0), vec4(2.0), vec4(3.0)); +const vec4 _55[2][2] = vec4[][](vec4[](vec4(1.0), vec4(2.0)), vec4[](vec4(8.0), vec4(10.0))); + struct Foobar { float a; @@ -18,11 +21,9 @@ vec4 resolve(Foobar f) void main() { - highp vec4 indexable[3] = vec4[](vec4(1.0), vec4(2.0), vec4(3.0)); - highp vec4 indexable_1[2][2] = vec4[][](vec4[](vec4(1.0), vec4(2.0)), vec4[](vec4(8.0), vec4(10.0))); Foobar param = Foobar(10.0, 20.0); - Foobar indexable_2[2] = Foobar[](Foobar(10.0, 40.0), Foobar(90.0, 70.0)); - Foobar param_1 = indexable_2[index]; - FragColor = ((indexable[index] + (indexable_1[index][index + 1])) + resolve(param)) + resolve(param_1); + Foobar indexable[2] = Foobar[](Foobar(10.0, 40.0), Foobar(90.0, 70.0)); + Foobar param_1 = indexable[index]; + FragColor = ((_37[index] + _55[index][index + 1]) + resolve(param)) + resolve(param_1); } diff --git a/reference/shaders/frag/constant-composites.frag b/reference/shaders/frag/constant-composites.frag index b105dbd26c..c65c60613d 100644 --- a/reference/shaders/frag/constant-composites.frag +++ b/reference/shaders/frag/constant-composites.frag @@ -18,6 +18,6 @@ void main() lut = float[](1.0, 4.0, 3.0, 2.0); foos = Foo[](Foo(10.0, 20.0), Foo(30.0, 40.0)); FragColor = vec4(lut[line]); - FragColor += vec4(foos[line].a * (foos[1 - line].a)); + FragColor += vec4(foos[line].a * foos[1 - line].a); } diff --git a/reference/shaders/frag/lut-promotion.frag b/reference/shaders/frag/lut-promotion.frag new file mode 100644 index 0000000000..019393f9f3 --- /dev/null +++ b/reference/shaders/frag/lut-promotion.frag @@ -0,0 +1,40 @@ +#version 310 es +precision mediump float; +precision highp int; + +const float _16[16] = float[](1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0); +const vec4 _60[4] = vec4[](vec4(0.0), vec4(1.0), vec4(8.0), vec4(5.0)); + +layout(location = 0) out float FragColor; +layout(location = 0) flat in mediump int index; + +void main() +{ + FragColor = _16[index]; + if (index < 10) + { + FragColor += _16[index ^ 1]; + } + else + { + FragColor += _16[index & 1]; + } + if (index > 30) + { + FragColor += _60[index & 3].y; + } + else + { + FragColor += _60[index & 1].x; + } + vec4 foobar[4] = _60; + if (index > 30) + { + foobar[1].z = 20.0; + } + FragColor += foobar[index & 3].z; + vec4 baz[4] = _60; + baz = vec4[](vec4(20.0), vec4(30.0), vec4(50.0), vec4(60.0)); + FragColor += baz[index & 3].z; +} + diff --git a/shaders-hlsl/asm/frag/lut-promotion-initializer.asm.frag b/shaders-hlsl/asm/frag/lut-promotion-initializer.asm.frag new file mode 100644 index 0000000000..320e5ebfbd --- /dev/null +++ b/shaders-hlsl/asm/frag/lut-promotion-initializer.asm.frag @@ -0,0 +1,195 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 6 +; Bound: 111 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %FragColor %index + OpExecutionMode %main OriginUpperLeft + OpSource ESSL 310 + OpName %main "main" + OpName %FragColor "FragColor" + OpName %index "index" + OpName %indexable "indexable" + OpName %indexable_0 "indexable" + OpName %indexable_1 "indexable" + OpName %foo "foo" + OpName %foobar "foobar" + OpName %baz "baz" + OpDecorate %FragColor RelaxedPrecision + OpDecorate %FragColor Location 0 + OpDecorate %index RelaxedPrecision + OpDecorate %index Flat + OpDecorate %index Location 0 + OpDecorate %20 RelaxedPrecision + OpDecorate %25 RelaxedPrecision + OpDecorate %26 RelaxedPrecision + OpDecorate %32 RelaxedPrecision + OpDecorate %34 RelaxedPrecision + OpDecorate %37 RelaxedPrecision + OpDecorate %38 RelaxedPrecision + OpDecorate %39 RelaxedPrecision + OpDecorate %41 RelaxedPrecision + OpDecorate %42 RelaxedPrecision + OpDecorate %45 RelaxedPrecision + OpDecorate %46 RelaxedPrecision + OpDecorate %47 RelaxedPrecision + OpDecorate %foo RelaxedPrecision + OpDecorate %61 RelaxedPrecision + OpDecorate %66 RelaxedPrecision + OpDecorate %68 RelaxedPrecision + OpDecorate %71 RelaxedPrecision + OpDecorate %72 RelaxedPrecision + OpDecorate %73 RelaxedPrecision + OpDecorate %75 RelaxedPrecision + OpDecorate %76 RelaxedPrecision + OpDecorate %79 RelaxedPrecision + OpDecorate %80 RelaxedPrecision + OpDecorate %81 RelaxedPrecision + OpDecorate %foobar RelaxedPrecision + OpDecorate %83 RelaxedPrecision + OpDecorate %90 RelaxedPrecision + OpDecorate %91 RelaxedPrecision + OpDecorate %93 RelaxedPrecision + OpDecorate %94 RelaxedPrecision + OpDecorate %95 RelaxedPrecision + OpDecorate %baz RelaxedPrecision + OpDecorate %105 RelaxedPrecision + OpDecorate %106 RelaxedPrecision + OpDecorate %108 RelaxedPrecision + OpDecorate %109 RelaxedPrecision + OpDecorate %110 RelaxedPrecision + OpDecorate %16 RelaxedPrecision + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 +%_ptr_Output_float = OpTypePointer Output %float + %FragColor = OpVariable %_ptr_Output_float Output + %uint = OpTypeInt 32 0 + %uint_16 = OpConstant %uint 16 +%_arr_float_uint_16 = OpTypeArray %float %uint_16 + %float_1 = OpConstant %float 1 + %float_2 = OpConstant %float 2 + %float_3 = OpConstant %float 3 + %float_4 = OpConstant %float 4 + %16 = OpConstantComposite %_arr_float_uint_16 %float_1 %float_2 %float_3 %float_4 %float_1 %float_2 %float_3 %float_4 %float_1 %float_2 %float_3 %float_4 %float_1 %float_2 %float_3 %float_4 + %int = OpTypeInt 32 1 +%_ptr_Input_int = OpTypePointer Input %int + %index = OpVariable %_ptr_Input_int Input +%_ptr_Function__arr_float_uint_16 = OpTypePointer Function %_arr_float_uint_16 +%_ptr_Function_float = OpTypePointer Function %float + %int_10 = OpConstant %int 10 + %bool = OpTypeBool + %int_1 = OpConstant %int 1 + %v4float = OpTypeVector %float 4 + %uint_4 = OpConstant %uint 4 +%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4 +%_ptr_Function__arr_v4float_uint_4 = OpTypePointer Function %_arr_v4float_uint_4 + %float_0 = OpConstant %float 0 + %54 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0 + %55 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 + %float_8 = OpConstant %float 8 + %57 = OpConstantComposite %v4float %float_8 %float_8 %float_8 %float_8 + %float_5 = OpConstant %float 5 + %59 = OpConstantComposite %v4float %float_5 %float_5 %float_5 %float_5 + %60 = OpConstantComposite %_arr_v4float_uint_4 %54 %55 %57 %59 + %int_30 = OpConstant %int 30 + %int_3 = OpConstant %int 3 + %uint_1 = OpConstant %uint 1 + %uint_0 = OpConstant %uint 0 + %float_20 = OpConstant %float 20 + %uint_2 = OpConstant %uint 2 + %97 = OpConstantComposite %v4float %float_20 %float_20 %float_20 %float_20 + %float_30 = OpConstant %float 30 + %99 = OpConstantComposite %v4float %float_30 %float_30 %float_30 %float_30 + %float_50 = OpConstant %float 50 + %101 = OpConstantComposite %v4float %float_50 %float_50 %float_50 %float_50 + %float_60 = OpConstant %float 60 + %103 = OpConstantComposite %v4float %float_60 %float_60 %float_60 %float_60 + %104 = OpConstantComposite %_arr_v4float_uint_4 %97 %99 %101 %103 + %main = OpFunction %void None %3 + %5 = OpLabel + %indexable = OpVariable %_ptr_Function__arr_float_uint_16 Function %16 +%indexable_0 = OpVariable %_ptr_Function__arr_float_uint_16 Function %16 +%indexable_1 = OpVariable %_ptr_Function__arr_float_uint_16 Function %16 + %foo = OpVariable %_ptr_Function__arr_v4float_uint_4 Function %60 + %foobar = OpVariable %_ptr_Function__arr_v4float_uint_4 Function %60 + %baz = OpVariable %_ptr_Function__arr_v4float_uint_4 Function %60 + %20 = OpLoad %int %index + %24 = OpAccessChain %_ptr_Function_float %indexable %20 + %25 = OpLoad %float %24 + OpStore %FragColor %25 + %26 = OpLoad %int %index + %29 = OpSLessThan %bool %26 %int_10 + OpSelectionMerge %31 None + OpBranchConditional %29 %30 %40 + %30 = OpLabel + %32 = OpLoad %int %index + %34 = OpBitwiseXor %int %32 %int_1 + %36 = OpAccessChain %_ptr_Function_float %indexable_0 %34 + %37 = OpLoad %float %36 + %38 = OpLoad %float %FragColor + %39 = OpFAdd %float %38 %37 + OpStore %FragColor %39 + OpBranch %31 + %40 = OpLabel + %41 = OpLoad %int %index + %42 = OpBitwiseAnd %int %41 %int_1 + %44 = OpAccessChain %_ptr_Function_float %indexable_1 %42 + %45 = OpLoad %float %44 + %46 = OpLoad %float %FragColor + %47 = OpFAdd %float %46 %45 + OpStore %FragColor %47 + OpBranch %31 + %31 = OpLabel + %61 = OpLoad %int %index + %63 = OpSGreaterThan %bool %61 %int_30 + OpSelectionMerge %65 None + OpBranchConditional %63 %64 %74 + %64 = OpLabel + %66 = OpLoad %int %index + %68 = OpBitwiseAnd %int %66 %int_3 + %70 = OpAccessChain %_ptr_Function_float %foo %68 %uint_1 + %71 = OpLoad %float %70 + %72 = OpLoad %float %FragColor + %73 = OpFAdd %float %72 %71 + OpStore %FragColor %73 + OpBranch %65 + %74 = OpLabel + %75 = OpLoad %int %index + %76 = OpBitwiseAnd %int %75 %int_1 + %78 = OpAccessChain %_ptr_Function_float %foo %76 %uint_0 + %79 = OpLoad %float %78 + %80 = OpLoad %float %FragColor + %81 = OpFAdd %float %80 %79 + OpStore %FragColor %81 + OpBranch %65 + %65 = OpLabel + %83 = OpLoad %int %index + %84 = OpSGreaterThan %bool %83 %int_30 + OpSelectionMerge %86 None + OpBranchConditional %84 %85 %86 + %85 = OpLabel + %89 = OpAccessChain %_ptr_Function_float %foobar %int_1 %uint_2 + OpStore %89 %float_20 + OpBranch %86 + %86 = OpLabel + %90 = OpLoad %int %index + %91 = OpBitwiseAnd %int %90 %int_3 + %92 = OpAccessChain %_ptr_Function_float %foobar %91 %uint_2 + %93 = OpLoad %float %92 + %94 = OpLoad %float %FragColor + %95 = OpFAdd %float %94 %93 + OpStore %FragColor %95 + OpStore %baz %104 + %105 = OpLoad %int %index + %106 = OpBitwiseAnd %int %105 %int_3 + %107 = OpAccessChain %_ptr_Function_float %baz %106 %uint_2 + %108 = OpLoad %float %107 + %109 = OpLoad %float %FragColor + %110 = OpFAdd %float %109 %108 + OpStore %FragColor %110 + OpReturn + OpFunctionEnd diff --git a/shaders-hlsl/frag/lut-promotion.frag b/shaders-hlsl/frag/lut-promotion.frag new file mode 100644 index 0000000000..0cdc8148f9 --- /dev/null +++ b/shaders-hlsl/frag/lut-promotion.frag @@ -0,0 +1,44 @@ +#version 310 es +precision mediump float; +layout(location = 0) out float FragColor; +layout(location = 0) flat in int index; + +const float LUT[16] = float[]( + 1.0, 2.0, 3.0, 4.0, + 1.0, 2.0, 3.0, 4.0, + 1.0, 2.0, 3.0, 4.0, + 1.0, 2.0, 3.0, 4.0); + +void main() +{ + // Try reading LUTs, both in branches and not branch. + FragColor = LUT[index]; + if (index < 10) + FragColor += LUT[index ^ 1]; + else + FragColor += LUT[index & 1]; + + // Not declared as a LUT, but can be promoted to one. + vec4 foo[4] = vec4[](vec4(0.0), vec4(1.0), vec4(8.0), vec4(5.0)); + if (index > 30) + { + FragColor += foo[index & 3].y; + } + else + { + FragColor += foo[index & 1].x; + } + + // Not declared as a LUT, but this cannot be promoted, because we have a partial write. + vec4 foobar[4] = vec4[](vec4(0.0), vec4(1.0), vec4(8.0), vec4(5.0)); + if (index > 30) + { + foobar[1].z = 20.0; + } + FragColor += foobar[index & 3].z; + + // Not declared as a LUT, but this cannot be promoted, because we have two complete writes. + vec4 baz[4] = vec4[](vec4(0.0), vec4(1.0), vec4(8.0), vec4(5.0)); + baz = vec4[](vec4(20.0), vec4(30.0), vec4(50.0), vec4(60.0)); + FragColor += baz[index & 3].z; +} diff --git a/shaders-msl/asm/frag/lut-promotion-initializer.asm.frag b/shaders-msl/asm/frag/lut-promotion-initializer.asm.frag new file mode 100644 index 0000000000..320e5ebfbd --- /dev/null +++ b/shaders-msl/asm/frag/lut-promotion-initializer.asm.frag @@ -0,0 +1,195 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 6 +; Bound: 111 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %FragColor %index + OpExecutionMode %main OriginUpperLeft + OpSource ESSL 310 + OpName %main "main" + OpName %FragColor "FragColor" + OpName %index "index" + OpName %indexable "indexable" + OpName %indexable_0 "indexable" + OpName %indexable_1 "indexable" + OpName %foo "foo" + OpName %foobar "foobar" + OpName %baz "baz" + OpDecorate %FragColor RelaxedPrecision + OpDecorate %FragColor Location 0 + OpDecorate %index RelaxedPrecision + OpDecorate %index Flat + OpDecorate %index Location 0 + OpDecorate %20 RelaxedPrecision + OpDecorate %25 RelaxedPrecision + OpDecorate %26 RelaxedPrecision + OpDecorate %32 RelaxedPrecision + OpDecorate %34 RelaxedPrecision + OpDecorate %37 RelaxedPrecision + OpDecorate %38 RelaxedPrecision + OpDecorate %39 RelaxedPrecision + OpDecorate %41 RelaxedPrecision + OpDecorate %42 RelaxedPrecision + OpDecorate %45 RelaxedPrecision + OpDecorate %46 RelaxedPrecision + OpDecorate %47 RelaxedPrecision + OpDecorate %foo RelaxedPrecision + OpDecorate %61 RelaxedPrecision + OpDecorate %66 RelaxedPrecision + OpDecorate %68 RelaxedPrecision + OpDecorate %71 RelaxedPrecision + OpDecorate %72 RelaxedPrecision + OpDecorate %73 RelaxedPrecision + OpDecorate %75 RelaxedPrecision + OpDecorate %76 RelaxedPrecision + OpDecorate %79 RelaxedPrecision + OpDecorate %80 RelaxedPrecision + OpDecorate %81 RelaxedPrecision + OpDecorate %foobar RelaxedPrecision + OpDecorate %83 RelaxedPrecision + OpDecorate %90 RelaxedPrecision + OpDecorate %91 RelaxedPrecision + OpDecorate %93 RelaxedPrecision + OpDecorate %94 RelaxedPrecision + OpDecorate %95 RelaxedPrecision + OpDecorate %baz RelaxedPrecision + OpDecorate %105 RelaxedPrecision + OpDecorate %106 RelaxedPrecision + OpDecorate %108 RelaxedPrecision + OpDecorate %109 RelaxedPrecision + OpDecorate %110 RelaxedPrecision + OpDecorate %16 RelaxedPrecision + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 +%_ptr_Output_float = OpTypePointer Output %float + %FragColor = OpVariable %_ptr_Output_float Output + %uint = OpTypeInt 32 0 + %uint_16 = OpConstant %uint 16 +%_arr_float_uint_16 = OpTypeArray %float %uint_16 + %float_1 = OpConstant %float 1 + %float_2 = OpConstant %float 2 + %float_3 = OpConstant %float 3 + %float_4 = OpConstant %float 4 + %16 = OpConstantComposite %_arr_float_uint_16 %float_1 %float_2 %float_3 %float_4 %float_1 %float_2 %float_3 %float_4 %float_1 %float_2 %float_3 %float_4 %float_1 %float_2 %float_3 %float_4 + %int = OpTypeInt 32 1 +%_ptr_Input_int = OpTypePointer Input %int + %index = OpVariable %_ptr_Input_int Input +%_ptr_Function__arr_float_uint_16 = OpTypePointer Function %_arr_float_uint_16 +%_ptr_Function_float = OpTypePointer Function %float + %int_10 = OpConstant %int 10 + %bool = OpTypeBool + %int_1 = OpConstant %int 1 + %v4float = OpTypeVector %float 4 + %uint_4 = OpConstant %uint 4 +%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4 +%_ptr_Function__arr_v4float_uint_4 = OpTypePointer Function %_arr_v4float_uint_4 + %float_0 = OpConstant %float 0 + %54 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0 + %55 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 + %float_8 = OpConstant %float 8 + %57 = OpConstantComposite %v4float %float_8 %float_8 %float_8 %float_8 + %float_5 = OpConstant %float 5 + %59 = OpConstantComposite %v4float %float_5 %float_5 %float_5 %float_5 + %60 = OpConstantComposite %_arr_v4float_uint_4 %54 %55 %57 %59 + %int_30 = OpConstant %int 30 + %int_3 = OpConstant %int 3 + %uint_1 = OpConstant %uint 1 + %uint_0 = OpConstant %uint 0 + %float_20 = OpConstant %float 20 + %uint_2 = OpConstant %uint 2 + %97 = OpConstantComposite %v4float %float_20 %float_20 %float_20 %float_20 + %float_30 = OpConstant %float 30 + %99 = OpConstantComposite %v4float %float_30 %float_30 %float_30 %float_30 + %float_50 = OpConstant %float 50 + %101 = OpConstantComposite %v4float %float_50 %float_50 %float_50 %float_50 + %float_60 = OpConstant %float 60 + %103 = OpConstantComposite %v4float %float_60 %float_60 %float_60 %float_60 + %104 = OpConstantComposite %_arr_v4float_uint_4 %97 %99 %101 %103 + %main = OpFunction %void None %3 + %5 = OpLabel + %indexable = OpVariable %_ptr_Function__arr_float_uint_16 Function %16 +%indexable_0 = OpVariable %_ptr_Function__arr_float_uint_16 Function %16 +%indexable_1 = OpVariable %_ptr_Function__arr_float_uint_16 Function %16 + %foo = OpVariable %_ptr_Function__arr_v4float_uint_4 Function %60 + %foobar = OpVariable %_ptr_Function__arr_v4float_uint_4 Function %60 + %baz = OpVariable %_ptr_Function__arr_v4float_uint_4 Function %60 + %20 = OpLoad %int %index + %24 = OpAccessChain %_ptr_Function_float %indexable %20 + %25 = OpLoad %float %24 + OpStore %FragColor %25 + %26 = OpLoad %int %index + %29 = OpSLessThan %bool %26 %int_10 + OpSelectionMerge %31 None + OpBranchConditional %29 %30 %40 + %30 = OpLabel + %32 = OpLoad %int %index + %34 = OpBitwiseXor %int %32 %int_1 + %36 = OpAccessChain %_ptr_Function_float %indexable_0 %34 + %37 = OpLoad %float %36 + %38 = OpLoad %float %FragColor + %39 = OpFAdd %float %38 %37 + OpStore %FragColor %39 + OpBranch %31 + %40 = OpLabel + %41 = OpLoad %int %index + %42 = OpBitwiseAnd %int %41 %int_1 + %44 = OpAccessChain %_ptr_Function_float %indexable_1 %42 + %45 = OpLoad %float %44 + %46 = OpLoad %float %FragColor + %47 = OpFAdd %float %46 %45 + OpStore %FragColor %47 + OpBranch %31 + %31 = OpLabel + %61 = OpLoad %int %index + %63 = OpSGreaterThan %bool %61 %int_30 + OpSelectionMerge %65 None + OpBranchConditional %63 %64 %74 + %64 = OpLabel + %66 = OpLoad %int %index + %68 = OpBitwiseAnd %int %66 %int_3 + %70 = OpAccessChain %_ptr_Function_float %foo %68 %uint_1 + %71 = OpLoad %float %70 + %72 = OpLoad %float %FragColor + %73 = OpFAdd %float %72 %71 + OpStore %FragColor %73 + OpBranch %65 + %74 = OpLabel + %75 = OpLoad %int %index + %76 = OpBitwiseAnd %int %75 %int_1 + %78 = OpAccessChain %_ptr_Function_float %foo %76 %uint_0 + %79 = OpLoad %float %78 + %80 = OpLoad %float %FragColor + %81 = OpFAdd %float %80 %79 + OpStore %FragColor %81 + OpBranch %65 + %65 = OpLabel + %83 = OpLoad %int %index + %84 = OpSGreaterThan %bool %83 %int_30 + OpSelectionMerge %86 None + OpBranchConditional %84 %85 %86 + %85 = OpLabel + %89 = OpAccessChain %_ptr_Function_float %foobar %int_1 %uint_2 + OpStore %89 %float_20 + OpBranch %86 + %86 = OpLabel + %90 = OpLoad %int %index + %91 = OpBitwiseAnd %int %90 %int_3 + %92 = OpAccessChain %_ptr_Function_float %foobar %91 %uint_2 + %93 = OpLoad %float %92 + %94 = OpLoad %float %FragColor + %95 = OpFAdd %float %94 %93 + OpStore %FragColor %95 + OpStore %baz %104 + %105 = OpLoad %int %index + %106 = OpBitwiseAnd %int %105 %int_3 + %107 = OpAccessChain %_ptr_Function_float %baz %106 %uint_2 + %108 = OpLoad %float %107 + %109 = OpLoad %float %FragColor + %110 = OpFAdd %float %109 %108 + OpStore %FragColor %110 + OpReturn + OpFunctionEnd diff --git a/shaders-msl/frag/lut-promotion.frag b/shaders-msl/frag/lut-promotion.frag new file mode 100644 index 0000000000..0cdc8148f9 --- /dev/null +++ b/shaders-msl/frag/lut-promotion.frag @@ -0,0 +1,44 @@ +#version 310 es +precision mediump float; +layout(location = 0) out float FragColor; +layout(location = 0) flat in int index; + +const float LUT[16] = float[]( + 1.0, 2.0, 3.0, 4.0, + 1.0, 2.0, 3.0, 4.0, + 1.0, 2.0, 3.0, 4.0, + 1.0, 2.0, 3.0, 4.0); + +void main() +{ + // Try reading LUTs, both in branches and not branch. + FragColor = LUT[index]; + if (index < 10) + FragColor += LUT[index ^ 1]; + else + FragColor += LUT[index & 1]; + + // Not declared as a LUT, but can be promoted to one. + vec4 foo[4] = vec4[](vec4(0.0), vec4(1.0), vec4(8.0), vec4(5.0)); + if (index > 30) + { + FragColor += foo[index & 3].y; + } + else + { + FragColor += foo[index & 1].x; + } + + // Not declared as a LUT, but this cannot be promoted, because we have a partial write. + vec4 foobar[4] = vec4[](vec4(0.0), vec4(1.0), vec4(8.0), vec4(5.0)); + if (index > 30) + { + foobar[1].z = 20.0; + } + FragColor += foobar[index & 3].z; + + // Not declared as a LUT, but this cannot be promoted, because we have two complete writes. + vec4 baz[4] = vec4[](vec4(0.0), vec4(1.0), vec4(8.0), vec4(5.0)); + baz = vec4[](vec4(20.0), vec4(30.0), vec4(50.0), vec4(60.0)); + FragColor += baz[index & 3].z; +} diff --git a/shaders/asm/frag/lut-promotion-initializer.asm.frag b/shaders/asm/frag/lut-promotion-initializer.asm.frag new file mode 100644 index 0000000000..320e5ebfbd --- /dev/null +++ b/shaders/asm/frag/lut-promotion-initializer.asm.frag @@ -0,0 +1,195 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 6 +; Bound: 111 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %FragColor %index + OpExecutionMode %main OriginUpperLeft + OpSource ESSL 310 + OpName %main "main" + OpName %FragColor "FragColor" + OpName %index "index" + OpName %indexable "indexable" + OpName %indexable_0 "indexable" + OpName %indexable_1 "indexable" + OpName %foo "foo" + OpName %foobar "foobar" + OpName %baz "baz" + OpDecorate %FragColor RelaxedPrecision + OpDecorate %FragColor Location 0 + OpDecorate %index RelaxedPrecision + OpDecorate %index Flat + OpDecorate %index Location 0 + OpDecorate %20 RelaxedPrecision + OpDecorate %25 RelaxedPrecision + OpDecorate %26 RelaxedPrecision + OpDecorate %32 RelaxedPrecision + OpDecorate %34 RelaxedPrecision + OpDecorate %37 RelaxedPrecision + OpDecorate %38 RelaxedPrecision + OpDecorate %39 RelaxedPrecision + OpDecorate %41 RelaxedPrecision + OpDecorate %42 RelaxedPrecision + OpDecorate %45 RelaxedPrecision + OpDecorate %46 RelaxedPrecision + OpDecorate %47 RelaxedPrecision + OpDecorate %foo RelaxedPrecision + OpDecorate %61 RelaxedPrecision + OpDecorate %66 RelaxedPrecision + OpDecorate %68 RelaxedPrecision + OpDecorate %71 RelaxedPrecision + OpDecorate %72 RelaxedPrecision + OpDecorate %73 RelaxedPrecision + OpDecorate %75 RelaxedPrecision + OpDecorate %76 RelaxedPrecision + OpDecorate %79 RelaxedPrecision + OpDecorate %80 RelaxedPrecision + OpDecorate %81 RelaxedPrecision + OpDecorate %foobar RelaxedPrecision + OpDecorate %83 RelaxedPrecision + OpDecorate %90 RelaxedPrecision + OpDecorate %91 RelaxedPrecision + OpDecorate %93 RelaxedPrecision + OpDecorate %94 RelaxedPrecision + OpDecorate %95 RelaxedPrecision + OpDecorate %baz RelaxedPrecision + OpDecorate %105 RelaxedPrecision + OpDecorate %106 RelaxedPrecision + OpDecorate %108 RelaxedPrecision + OpDecorate %109 RelaxedPrecision + OpDecorate %110 RelaxedPrecision + OpDecorate %16 RelaxedPrecision + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 +%_ptr_Output_float = OpTypePointer Output %float + %FragColor = OpVariable %_ptr_Output_float Output + %uint = OpTypeInt 32 0 + %uint_16 = OpConstant %uint 16 +%_arr_float_uint_16 = OpTypeArray %float %uint_16 + %float_1 = OpConstant %float 1 + %float_2 = OpConstant %float 2 + %float_3 = OpConstant %float 3 + %float_4 = OpConstant %float 4 + %16 = OpConstantComposite %_arr_float_uint_16 %float_1 %float_2 %float_3 %float_4 %float_1 %float_2 %float_3 %float_4 %float_1 %float_2 %float_3 %float_4 %float_1 %float_2 %float_3 %float_4 + %int = OpTypeInt 32 1 +%_ptr_Input_int = OpTypePointer Input %int + %index = OpVariable %_ptr_Input_int Input +%_ptr_Function__arr_float_uint_16 = OpTypePointer Function %_arr_float_uint_16 +%_ptr_Function_float = OpTypePointer Function %float + %int_10 = OpConstant %int 10 + %bool = OpTypeBool + %int_1 = OpConstant %int 1 + %v4float = OpTypeVector %float 4 + %uint_4 = OpConstant %uint 4 +%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4 +%_ptr_Function__arr_v4float_uint_4 = OpTypePointer Function %_arr_v4float_uint_4 + %float_0 = OpConstant %float 0 + %54 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0 + %55 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1 + %float_8 = OpConstant %float 8 + %57 = OpConstantComposite %v4float %float_8 %float_8 %float_8 %float_8 + %float_5 = OpConstant %float 5 + %59 = OpConstantComposite %v4float %float_5 %float_5 %float_5 %float_5 + %60 = OpConstantComposite %_arr_v4float_uint_4 %54 %55 %57 %59 + %int_30 = OpConstant %int 30 + %int_3 = OpConstant %int 3 + %uint_1 = OpConstant %uint 1 + %uint_0 = OpConstant %uint 0 + %float_20 = OpConstant %float 20 + %uint_2 = OpConstant %uint 2 + %97 = OpConstantComposite %v4float %float_20 %float_20 %float_20 %float_20 + %float_30 = OpConstant %float 30 + %99 = OpConstantComposite %v4float %float_30 %float_30 %float_30 %float_30 + %float_50 = OpConstant %float 50 + %101 = OpConstantComposite %v4float %float_50 %float_50 %float_50 %float_50 + %float_60 = OpConstant %float 60 + %103 = OpConstantComposite %v4float %float_60 %float_60 %float_60 %float_60 + %104 = OpConstantComposite %_arr_v4float_uint_4 %97 %99 %101 %103 + %main = OpFunction %void None %3 + %5 = OpLabel + %indexable = OpVariable %_ptr_Function__arr_float_uint_16 Function %16 +%indexable_0 = OpVariable %_ptr_Function__arr_float_uint_16 Function %16 +%indexable_1 = OpVariable %_ptr_Function__arr_float_uint_16 Function %16 + %foo = OpVariable %_ptr_Function__arr_v4float_uint_4 Function %60 + %foobar = OpVariable %_ptr_Function__arr_v4float_uint_4 Function %60 + %baz = OpVariable %_ptr_Function__arr_v4float_uint_4 Function %60 + %20 = OpLoad %int %index + %24 = OpAccessChain %_ptr_Function_float %indexable %20 + %25 = OpLoad %float %24 + OpStore %FragColor %25 + %26 = OpLoad %int %index + %29 = OpSLessThan %bool %26 %int_10 + OpSelectionMerge %31 None + OpBranchConditional %29 %30 %40 + %30 = OpLabel + %32 = OpLoad %int %index + %34 = OpBitwiseXor %int %32 %int_1 + %36 = OpAccessChain %_ptr_Function_float %indexable_0 %34 + %37 = OpLoad %float %36 + %38 = OpLoad %float %FragColor + %39 = OpFAdd %float %38 %37 + OpStore %FragColor %39 + OpBranch %31 + %40 = OpLabel + %41 = OpLoad %int %index + %42 = OpBitwiseAnd %int %41 %int_1 + %44 = OpAccessChain %_ptr_Function_float %indexable_1 %42 + %45 = OpLoad %float %44 + %46 = OpLoad %float %FragColor + %47 = OpFAdd %float %46 %45 + OpStore %FragColor %47 + OpBranch %31 + %31 = OpLabel + %61 = OpLoad %int %index + %63 = OpSGreaterThan %bool %61 %int_30 + OpSelectionMerge %65 None + OpBranchConditional %63 %64 %74 + %64 = OpLabel + %66 = OpLoad %int %index + %68 = OpBitwiseAnd %int %66 %int_3 + %70 = OpAccessChain %_ptr_Function_float %foo %68 %uint_1 + %71 = OpLoad %float %70 + %72 = OpLoad %float %FragColor + %73 = OpFAdd %float %72 %71 + OpStore %FragColor %73 + OpBranch %65 + %74 = OpLabel + %75 = OpLoad %int %index + %76 = OpBitwiseAnd %int %75 %int_1 + %78 = OpAccessChain %_ptr_Function_float %foo %76 %uint_0 + %79 = OpLoad %float %78 + %80 = OpLoad %float %FragColor + %81 = OpFAdd %float %80 %79 + OpStore %FragColor %81 + OpBranch %65 + %65 = OpLabel + %83 = OpLoad %int %index + %84 = OpSGreaterThan %bool %83 %int_30 + OpSelectionMerge %86 None + OpBranchConditional %84 %85 %86 + %85 = OpLabel + %89 = OpAccessChain %_ptr_Function_float %foobar %int_1 %uint_2 + OpStore %89 %float_20 + OpBranch %86 + %86 = OpLabel + %90 = OpLoad %int %index + %91 = OpBitwiseAnd %int %90 %int_3 + %92 = OpAccessChain %_ptr_Function_float %foobar %91 %uint_2 + %93 = OpLoad %float %92 + %94 = OpLoad %float %FragColor + %95 = OpFAdd %float %94 %93 + OpStore %FragColor %95 + OpStore %baz %104 + %105 = OpLoad %int %index + %106 = OpBitwiseAnd %int %105 %int_3 + %107 = OpAccessChain %_ptr_Function_float %baz %106 %uint_2 + %108 = OpLoad %float %107 + %109 = OpLoad %float %FragColor + %110 = OpFAdd %float %109 %108 + OpStore %FragColor %110 + OpReturn + OpFunctionEnd diff --git a/shaders/frag/lut-promotion.frag b/shaders/frag/lut-promotion.frag new file mode 100644 index 0000000000..0cdc8148f9 --- /dev/null +++ b/shaders/frag/lut-promotion.frag @@ -0,0 +1,44 @@ +#version 310 es +precision mediump float; +layout(location = 0) out float FragColor; +layout(location = 0) flat in int index; + +const float LUT[16] = float[]( + 1.0, 2.0, 3.0, 4.0, + 1.0, 2.0, 3.0, 4.0, + 1.0, 2.0, 3.0, 4.0, + 1.0, 2.0, 3.0, 4.0); + +void main() +{ + // Try reading LUTs, both in branches and not branch. + FragColor = LUT[index]; + if (index < 10) + FragColor += LUT[index ^ 1]; + else + FragColor += LUT[index & 1]; + + // Not declared as a LUT, but can be promoted to one. + vec4 foo[4] = vec4[](vec4(0.0), vec4(1.0), vec4(8.0), vec4(5.0)); + if (index > 30) + { + FragColor += foo[index & 3].y; + } + else + { + FragColor += foo[index & 1].x; + } + + // Not declared as a LUT, but this cannot be promoted, because we have a partial write. + vec4 foobar[4] = vec4[](vec4(0.0), vec4(1.0), vec4(8.0), vec4(5.0)); + if (index > 30) + { + foobar[1].z = 20.0; + } + FragColor += foobar[index & 3].z; + + // Not declared as a LUT, but this cannot be promoted, because we have two complete writes. + vec4 baz[4] = vec4[](vec4(0.0), vec4(1.0), vec4(8.0), vec4(5.0)); + baz = vec4[](vec4(20.0), vec4(30.0), vec4(50.0), vec4(60.0)); + FragColor += baz[index & 3].z; +} diff --git a/spirv_common.hpp b/spirv_common.hpp index e3fb2b7238..0ea94fcb0c 100644 --- a/spirv_common.hpp +++ b/spirv_common.hpp @@ -764,7 +764,6 @@ struct SPIRFunction : IVariant bool active = false; bool flush_undeclared = true; bool do_combined_parameters = true; - bool analyzed_variable_scope = false; }; struct SPIRAccessChain : IVariant @@ -1102,6 +1101,9 @@ struct SPIRConstant : IVariant // If this constant is used as an array length which creates specialization restrictions on some backends. bool is_used_as_array_length = false; + // If true, this is a LUT, and should always be declared in the outer scope. + bool is_used_as_lut = false; + // For composites which are constant arrays, etc. std::vector subconstants; }; diff --git a/spirv_cpp.cpp b/spirv_cpp.cpp index 9302a07441..7d781e9172 100644 --- a/spirv_cpp.cpp +++ b/spirv_cpp.cpp @@ -300,6 +300,7 @@ string CompilerCPP::compile() backend.explicit_struct_type = true; backend.use_initializer_list = true; + build_function_control_flow_graphs_and_analyze(); update_active_builtins(); uint32_t pass_count = 0; diff --git a/spirv_cross.cpp b/spirv_cross.cpp index 3e460e9f61..dd2a44ca33 100644 --- a/spirv_cross.cpp +++ b/spirv_cross.cpp @@ -3647,303 +3647,449 @@ void Compiler::analyze_parameter_preservation( } } -void Compiler::analyze_variable_scope(SPIRFunction &entry) +Compiler::AnalyzeVariableScopeAccessHandler::AnalyzeVariableScopeAccessHandler(Compiler &compiler_, + SPIRFunction &entry_) + : compiler(compiler_) + , entry(entry_) { - struct AccessHandler : OpcodeHandler +} + +bool Compiler::AnalyzeVariableScopeAccessHandler::follow_function_call(const SPIRFunction &) +{ + // Only analyze within this function. + return false; +} + +void Compiler::AnalyzeVariableScopeAccessHandler::set_current_block(const SPIRBlock &block) +{ + current_block = █ + + // If we're branching to a block which uses OpPhi, in GLSL + // this will be a variable write when we branch, + // so we need to track access to these variables as well to + // have a complete picture. + const auto test_phi = [this, &block](uint32_t to) { + auto &next = compiler.get(to); + for (auto &phi : next.phi_variables) + { + if (phi.parent == block.self) + { + accessed_variables_to_block[phi.function_variable].insert(block.self); + // Phi variables are also accessed in our target branch block. + accessed_variables_to_block[phi.function_variable].insert(next.self); + + notify_variable_access(phi.local_variable, block.self); + } + } + }; + + switch (block.terminator) { - public: - AccessHandler(Compiler &compiler_, SPIRFunction &entry_) - : compiler(compiler_) - , entry(entry_) - { - } + case SPIRBlock::Direct: + notify_variable_access(block.condition, block.self); + test_phi(block.next_block); + break; - bool follow_function_call(const SPIRFunction &) - { - // Only analyze within this function. + case SPIRBlock::Select: + notify_variable_access(block.condition, block.self); + test_phi(block.true_block); + test_phi(block.false_block); + break; + + case SPIRBlock::MultiSelect: + notify_variable_access(block.condition, block.self); + for (auto &target : block.cases) + test_phi(target.block); + if (block.default_block) + test_phi(block.default_block); + break; + + default: + break; + } +} + +void Compiler::AnalyzeVariableScopeAccessHandler::notify_variable_access(uint32_t id, uint32_t block) +{ + if (id_is_phi_variable(id)) + accessed_variables_to_block[id].insert(block); + else if (id_is_potential_temporary(id)) + accessed_temporaries_to_block[id].insert(block); +} + +bool Compiler::AnalyzeVariableScopeAccessHandler::id_is_phi_variable(uint32_t id) const +{ + if (id >= compiler.get_current_id_bound()) + return false; + auto *var = compiler.maybe_get(id); + return var && var->phi_variable; +} + +bool Compiler::AnalyzeVariableScopeAccessHandler::id_is_potential_temporary(uint32_t id) const +{ + if (id >= compiler.get_current_id_bound()) + return false; + + // Temporaries are not created before we start emitting code. + return compiler.ids[id].empty() || (compiler.ids[id].get_type() == TypeExpression); +} + +bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint32_t *args, uint32_t length) +{ + // Keep track of the types of temporaries, so we can hoist them out as necessary. + uint32_t result_type, result_id; + if (compiler.instruction_to_result_type(result_type, result_id, op, args, length)) + result_id_to_type[result_id] = result_type; + + switch (op) + { + case OpStore: + { + if (length < 2) return false; - } - void set_current_block(const SPIRBlock &block) + uint32_t ptr = args[0]; + auto *var = compiler.maybe_get_backing_variable(ptr); + + // If we store through an access chain, we have a partial write. + if (var) { - current_block = █ - - // If we're branching to a block which uses OpPhi, in GLSL - // this will be a variable write when we branch, - // so we need to track access to these variables as well to - // have a complete picture. - const auto test_phi = [this, &block](uint32_t to) { - auto &next = compiler.get(to); - for (auto &phi : next.phi_variables) - { - if (phi.parent == block.self) - { - accessed_variables_to_block[phi.function_variable].insert(block.self); - // Phi variables are also accessed in our target branch block. - accessed_variables_to_block[phi.function_variable].insert(next.self); - - notify_variable_access(phi.local_variable, block.self); - } - } - }; - - switch (block.terminator) - { - case SPIRBlock::Direct: - notify_variable_access(block.condition, block.self); - test_phi(block.next_block); - break; - - case SPIRBlock::Select: - notify_variable_access(block.condition, block.self); - test_phi(block.true_block); - test_phi(block.false_block); - break; - - case SPIRBlock::MultiSelect: - notify_variable_access(block.condition, block.self); - for (auto &target : block.cases) - test_phi(target.block); - if (block.default_block) - test_phi(block.default_block); - break; - - default: - break; - } + accessed_variables_to_block[var->self].insert(current_block->self); + if (var->self == ptr) + complete_write_variables_to_block[var->self].insert(current_block->self); + else + partial_write_variables_to_block[var->self].insert(current_block->self); } - void notify_variable_access(uint32_t id, uint32_t block) + // Might try to store a Phi variable here. + notify_variable_access(args[1], current_block->self); + break; + } + + case OpAccessChain: + case OpInBoundsAccessChain: + { + if (length < 3) + return false; + + uint32_t ptr = args[2]; + auto *var = compiler.maybe_get(ptr); + if (var) + accessed_variables_to_block[var->self].insert(current_block->self); + + for (uint32_t i = 3; i < length; i++) + notify_variable_access(args[i], current_block->self); + + // The result of an access chain is a fixed expression and is not really considered a temporary. + auto &e = compiler.set(args[1], "", args[0], true); + auto *backing_variable = compiler.maybe_get_backing_variable(ptr); + e.loaded_from = backing_variable ? backing_variable->self : 0; + + // Other backends might use SPIRAccessChain for this later. + compiler.ids[args[1]].set_allow_type_rewrite(); + break; + } + + case OpCopyMemory: + { + if (length < 2) + return false; + + uint32_t lhs = args[0]; + uint32_t rhs = args[1]; + auto *var = compiler.maybe_get_backing_variable(lhs); + + // If we store through an access chain, we have a partial write. + if (var) { - if (id_is_phi_variable(id)) - accessed_variables_to_block[id].insert(block); - else if (id_is_potential_temporary(id)) - accessed_temporaries_to_block[id].insert(block); + accessed_variables_to_block[var->self].insert(current_block->self); + if (var->self == lhs) + complete_write_variables_to_block[var->self].insert(current_block->self); + else + partial_write_variables_to_block[var->self].insert(current_block->self); } - bool id_is_phi_variable(uint32_t id) + var = compiler.maybe_get_backing_variable(rhs); + if (var) + accessed_variables_to_block[var->self].insert(current_block->self); + break; + } + + case OpCopyObject: + { + if (length < 3) + return false; + + auto *var = compiler.maybe_get_backing_variable(args[2]); + if (var) + accessed_variables_to_block[var->self].insert(current_block->self); + + // Might try to copy a Phi variable here. + notify_variable_access(args[2], current_block->self); + break; + } + + case OpLoad: + { + if (length < 3) + return false; + uint32_t ptr = args[2]; + auto *var = compiler.maybe_get_backing_variable(ptr); + if (var) + accessed_variables_to_block[var->self].insert(current_block->self); + + // Loaded value is a temporary. + notify_variable_access(args[1], current_block->self); + break; + } + + case OpFunctionCall: + { + if (length < 3) + return false; + + length -= 3; + args += 3; + + for (uint32_t i = 0; i < length; i++) { - if (id >= compiler.get_current_id_bound()) - return false; - auto *var = compiler.maybe_get(id); - return var && var->phi_variable; + auto *var = compiler.maybe_get_backing_variable(args[i]); + if (var) + { + accessed_variables_to_block[var->self].insert(current_block->self); + // Assume we can get partial writes to this variable. + partial_write_variables_to_block[var->self].insert(current_block->self); + } + + // Cannot easily prove if argument we pass to a function is completely written. + // Usually, functions write to a dummy variable, + // which is then copied to in full to the real argument. + + // Might try to copy a Phi variable here. + notify_variable_access(args[i], current_block->self); } - bool id_is_potential_temporary(uint32_t id) + // Return value may be a temporary. + notify_variable_access(args[1], current_block->self); + break; + } + + case OpExtInst: + { + for (uint32_t i = 4; i < length; i++) + notify_variable_access(args[i], current_block->self); + notify_variable_access(args[1], current_block->self); + break; + } + + case OpArrayLength: + // Uses literals, but cannot be a phi variable, so ignore. + break; + + // Atomics shouldn't be able to access function-local variables. + // Some GLSL builtins access a pointer. + + case OpCompositeInsert: + case OpVectorShuffle: + // Specialize for opcode which contains literals. + for (uint32_t i = 1; i < 4; i++) + notify_variable_access(args[i], current_block->self); + break; + + case OpCompositeExtract: + // Specialize for opcode which contains literals. + for (uint32_t i = 1; i < 3; i++) + notify_variable_access(args[i], current_block->self); + break; + + default: + { + // Rather dirty way of figuring out where Phi variables are used. + // As long as only IDs are used, we can scan through instructions and try to find any evidence that + // the ID of a variable has been used. + // There are potential false positives here where a literal is used in-place of an ID, + // but worst case, it does not affect the correctness of the compile. + // Exhaustive analysis would be better here, but it's not worth it for now. + for (uint32_t i = 0; i < length; i++) + notify_variable_access(args[i], current_block->self); + break; + } + } + return true; +} + +Compiler::StaticExpressionAccessHandler::StaticExpressionAccessHandler(Compiler &compiler_, uint32_t variable_id_) + : compiler(compiler_) + , variable_id(variable_id_) +{ +} + +bool Compiler::StaticExpressionAccessHandler::follow_function_call(const SPIRFunction &) +{ + return false; +} + +bool Compiler::StaticExpressionAccessHandler::handle(spv::Op op, const uint32_t *args, uint32_t length) +{ + switch (op) + { + case OpStore: + if (length < 2) + return false; + if (args[0] == variable_id) { - if (id >= compiler.get_current_id_bound()) - return false; - - // Temporaries are not created before we start emitting code. - return compiler.ids[id].empty() || (compiler.ids[id].get_type() == TypeExpression); + static_expression = args[1]; + write_count++; } + break; - bool handle(spv::Op op, const uint32_t *args, uint32_t length) + case OpLoad: + if (length < 3) + return false; + if (args[2] == variable_id && static_expression == 0) // Tried to read from variable before it was initialized. + return false; + break; + + case OpAccessChain: + case OpInBoundsAccessChain: + if (length < 3) + return false; + if (args[2] == variable_id) // If we try to access chain our candidate variable before we store to it, bail. + return false; + break; + + default: + break; + } + + return true; +} + +void Compiler::find_function_local_luts(SPIRFunction &entry, const AnalyzeVariableScopeAccessHandler &handler) +{ + auto &cfg = *function_cfgs.find(entry.self)->second; + + // For each variable which is statically accessed. + for (auto &accessed_var : handler.accessed_variables_to_block) + { + auto &blocks = accessed_var.second; + auto &var = get(accessed_var.first); + auto &type = expression_type(accessed_var.first); + + // Only consider function local variables here. + if (var.storage != StorageClassFunction) + continue; + + // We cannot be a phi variable. + if (var.phi_variable) + continue; + + // Only consider arrays here. + if (type.array.empty()) + continue; + + // HACK: Do not consider structs. This is a quirk with how types are currently being emitted. + // Structs are emitted after specialization constants and composite constants. + // FIXME: Fix declaration order so declared constants can have struct types. + if (type.basetype == SPIRType::Struct) + continue; + + // If the variable has an initializer, make sure it is a constant expression. + uint32_t static_constant_expression = 0; + if (var.initializer) { - // Keep track of the types of temporaries, so we can hoist them out as necessary. - uint32_t result_type, result_id; - if (compiler.instruction_to_result_type(result_type, result_id, op, args, length)) - result_id_to_type[result_id] = result_type; + if (ids[var.initializer].get_type() != TypeConstant) + continue; + static_constant_expression = var.initializer; - switch (op) - { - case OpStore: - { - if (length < 2) - return false; + // There can be no stores to this variable, we have now proved we have a LUT. + if (handler.complete_write_variables_to_block.count(var.self) != 0 || + handler.partial_write_variables_to_block.count(var.self) != 0) + continue; + } + else + { + // We can have one, and only one write to the variable, and that write needs to be a constant. - uint32_t ptr = args[0]; - auto *var = compiler.maybe_get_backing_variable(ptr); - if (var && var->storage == StorageClassFunction) - accessed_variables_to_block[var->self].insert(current_block->self); + // No partial writes allowed. + if (handler.partial_write_variables_to_block.count(var.self) != 0) + continue; - // If we store through an access chain, we have a partial write. - if (var && var->self == ptr && var->storage == StorageClassFunction) - complete_write_variables_to_block[var->self].insert(current_block->self); + auto itr = handler.complete_write_variables_to_block.find(var.self); - // Might try to store a Phi variable here. - notify_variable_access(args[1], current_block->self); - break; - } + // No writes? + if (itr == end(handler.complete_write_variables_to_block)) + continue; - case OpAccessChain: - case OpInBoundsAccessChain: - { - if (length < 3) - return false; + // We write to the variable in more than one block. + auto &write_blocks = itr->second; + if (write_blocks.size() != 1) + continue; - uint32_t ptr = args[2]; - auto *var = compiler.maybe_get(ptr); - if (var && var->storage == StorageClassFunction) - accessed_variables_to_block[var->self].insert(current_block->self); + // The write needs to happen in the dominating block. + DominatorBuilder builder(cfg); + for (auto &block : blocks) + builder.add_block(block); + uint32_t dominator = builder.get_dominator(); - for (uint32_t i = 3; i < length; i++) - notify_variable_access(args[i], current_block->self); + // The complete write happened in a branch or similar, cannot deduce static expression. + if (write_blocks.count(dominator) == 0) + continue; - // The result of an access chain is a fixed expression and is not really considered a temporary. - auto &e = compiler.set(args[1], "", args[0], true); - auto *backing_variable = compiler.maybe_get_backing_variable(ptr); - e.loaded_from = backing_variable ? backing_variable->self : 0; + // Find the static expression for this variable. + StaticExpressionAccessHandler static_expression_handler(*this, var.self); + traverse_all_reachable_opcodes(get(dominator), static_expression_handler); - // Other backends might use SPIRAccessChain for this later. - compiler.ids[args[1]].set_allow_type_rewrite(); - break; - } + // We want one, and exactly one write + if (static_expression_handler.write_count != 1 || static_expression_handler.static_expression == 0) + continue; - case OpCopyMemory: - { - if (length < 2) - return false; + // Is it a constant expression? + if (ids[static_expression_handler.static_expression].get_type() != TypeConstant) + continue; - uint32_t lhs = args[0]; - uint32_t rhs = args[1]; - auto *var = compiler.maybe_get_backing_variable(lhs); - if (var && var->storage == StorageClassFunction) - accessed_variables_to_block[var->self].insert(current_block->self); - - // If we store through an access chain, we have a partial write. - if (var && var->self == lhs) - complete_write_variables_to_block[var->self].insert(current_block->self); - - var = compiler.maybe_get_backing_variable(rhs); - if (var && var->storage == StorageClassFunction) - accessed_variables_to_block[var->self].insert(current_block->self); - break; - } - - case OpCopyObject: - { - if (length < 3) - return false; - - auto *var = compiler.maybe_get_backing_variable(args[2]); - if (var && var->storage == StorageClassFunction) - accessed_variables_to_block[var->self].insert(current_block->self); - - // Might try to copy a Phi variable here. - notify_variable_access(args[2], current_block->self); - break; - } - - case OpLoad: - { - if (length < 3) - return false; - uint32_t ptr = args[2]; - auto *var = compiler.maybe_get_backing_variable(ptr); - if (var && var->storage == StorageClassFunction) - accessed_variables_to_block[var->self].insert(current_block->self); - - // Loaded value is a temporary. - notify_variable_access(args[1], current_block->self); - break; - } - - case OpFunctionCall: - { - if (length < 3) - return false; - - length -= 3; - args += 3; - for (uint32_t i = 0; i < length; i++) - { - auto *var = compiler.maybe_get_backing_variable(args[i]); - if (var && var->storage == StorageClassFunction) - accessed_variables_to_block[var->self].insert(current_block->self); - - // Cannot easily prove if argument we pass to a function is completely written. - // Usually, functions write to a dummy variable, - // which is then copied to in full to the real argument. - - // Might try to copy a Phi variable here. - notify_variable_access(args[i], current_block->self); - } - - // Return value may be a temporary. - notify_variable_access(args[1], current_block->self); - break; - } - - case OpExtInst: - { - for (uint32_t i = 4; i < length; i++) - notify_variable_access(args[i], current_block->self); - notify_variable_access(args[1], current_block->self); - break; - } - - case OpArrayLength: - // Uses literals, but cannot be a phi variable, so ignore. - break; - - // Atomics shouldn't be able to access function-local variables. - // Some GLSL builtins access a pointer. - - case OpCompositeInsert: - case OpVectorShuffle: - // Specialize for opcode which contains literals. - for (uint32_t i = 1; i < 4; i++) - notify_variable_access(args[i], current_block->self); - break; - - case OpCompositeExtract: - // Specialize for opcode which contains literals. - for (uint32_t i = 1; i < 3; i++) - notify_variable_access(args[i], current_block->self); - break; - - default: - { - // Rather dirty way of figuring out where Phi variables are used. - // As long as only IDs are used, we can scan through instructions and try to find any evidence that - // the ID of a variable has been used. - // There are potential false positives here where a literal is used in-place of an ID, - // but worst case, it does not affect the correctness of the compile. - // Exhaustive analysis would be better here, but it's not worth it for now. - for (uint32_t i = 0; i < length; i++) - notify_variable_access(args[i], current_block->self); - break; - } - } - return true; + // We found a LUT! + static_constant_expression = static_expression_handler.static_expression; } - Compiler &compiler; - SPIRFunction &entry; - std::unordered_map> accessed_variables_to_block; - std::unordered_map> accessed_temporaries_to_block; - std::unordered_map result_id_to_type; - std::unordered_map> complete_write_variables_to_block; - const SPIRBlock *current_block = nullptr; - } handler(*this, entry); + get(static_constant_expression).is_used_as_lut = true; + var.static_expression = static_constant_expression; + var.statically_assigned = true; + var.remapped_variable = true; + } +} +void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeAccessHandler &handler) +{ // First, we map out all variable access within a function. // Essentially a map of block -> { variables accessed in the basic block } - this->traverse_all_reachable_opcodes(entry, handler); + traverse_all_reachable_opcodes(entry, handler); - // Compute the control flow graph for this function. - CFG cfg(*this, entry); + auto &cfg = *function_cfgs.find(entry.self)->second; // Analyze if there are parameters which need to be implicitly preserved with an "in" qualifier. - this->analyze_parameter_preservation(entry, cfg, handler.accessed_variables_to_block, - handler.complete_write_variables_to_block); + analyze_parameter_preservation(entry, cfg, handler.accessed_variables_to_block, + handler.complete_write_variables_to_block); unordered_map potential_loop_variables; // For each variable which is statically accessed. for (auto &var : handler.accessed_variables_to_block) { + // Only deal with variables which are considered local variables in this function. + if (find(begin(entry.local_variables), end(entry.local_variables), var.first) == end(entry.local_variables)) + continue; + DominatorBuilder builder(cfg); auto &blocks = var.second; - auto &type = this->expression_type(var.first); + auto &type = expression_type(var.first); // Figure out which block is dominating all accesses of those variables. for (auto &block : blocks) { // If we're accessing a variable inside a continue block, this variable might be a loop variable. // We can only use loop variables with scalars, as we cannot track static expressions for vectors. - if (this->is_continue(block)) + if (is_continue(block)) { // Potentially awkward case to check for. // We might have a variable inside a loop, which is touched by the continue block, @@ -3951,7 +4097,7 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry) // The continue block is dominated by the inner part of the loop, which does not make sense in high-level // language output because it will be declared before the body, // so we will have to lift the dominator up to the relevant loop header instead. - builder.add_block(this->continue_block_to_loop_header[block]); + builder.add_block(continue_block_to_loop_header[block]); // Arrays or structs cannot be loop variables. if (type.vecsize == 1 && type.columns == 1 && type.basetype != SPIRType::Struct && type.array.empty()) @@ -3978,9 +4124,9 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry) // will be completely eliminated. if (dominating_block) { - auto &block = this->get(dominating_block); + auto &block = get(dominating_block); block.dominated_variables.push_back(var.first); - this->get(var.first).dominator = dominating_block; + get(var.first).dominator = dominating_block; } } @@ -4006,9 +4152,9 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry) // If a temporary is used in more than one block, we might have to lift continue block // access up to loop header like we did for variables. - if (blocks.size() != 1 && this->is_continue(block)) - builder.add_block(this->continue_block_to_loop_header[block]); - else if (blocks.size() != 1 && this->is_single_block_loop(block)) + if (blocks.size() != 1 && is_continue(block)) + builder.add_block(continue_block_to_loop_header[block]); + else if (blocks.size() != 1 && is_single_block_loop(block)) { // Awkward case, because the loop header is also the continue block. force_temporary = true; @@ -4027,10 +4173,10 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry) // This should be very rare, but if we try to declare a temporary inside a loop, // and that temporary is used outside the loop as well (spirv-opt inliner likes this) // we should actually emit the temporary outside the loop. - this->hoisted_temporaries.insert(var.first); - this->forced_temporaries.insert(var.first); + hoisted_temporaries.insert(var.first); + forced_temporaries.insert(var.first); - auto &block_temporaries = this->get(dominating_block).declare_temporary; + auto &block_temporaries = get(dominating_block).declare_temporary; block_temporaries.emplace_back(handler.result_id_to_type[var.first], var.first); } else if (blocks.size() > 1) @@ -4040,7 +4186,7 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry) // In this case, the header is actually inside the for (;;) {} block, and we have problems. // What we need to do is hoist the temporaries outside the for (;;) {} block in case the header block // declares the temporary. - auto &block_temporaries = this->get(dominating_block).potential_declare_temporary; + auto &block_temporaries = get(dominating_block).potential_declare_temporary; block_temporaries.emplace_back(handler.result_id_to_type[var.first], var.first); } } @@ -4051,7 +4197,7 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry) // Now, try to analyze whether or not these variables are actually loop variables. for (auto &loop_variable : potential_loop_variables) { - auto &var = this->get(loop_variable.first); + auto &var = get(loop_variable.first); auto dominator = var.dominator; auto block = loop_variable.second; @@ -4066,9 +4212,9 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry) uint32_t header = 0; // Find the loop header for this block. - for (auto b : this->loop_blocks) + for (auto b : loop_blocks) { - auto &potential_header = this->get(b); + auto &potential_header = get(b); if (potential_header.continue_block == block) { header = b; @@ -4077,7 +4223,7 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry) } assert(header); - auto &header_block = this->get(header); + auto &header_block = get(header); auto &blocks = handler.accessed_variables_to_block[loop_variable.first]; // If a loop variable is not used before the loop, it's probably not a loop variable. @@ -4133,7 +4279,7 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry) // Need to sort here as variables come from an unordered container, and pushing stuff in wrong order // will break reproducability in regression runs. sort(begin(header_block.loop_variables), end(header_block.loop_variables)); - this->get(loop_variable.first).loop_variable = true; + get(loop_variable.first).loop_variable = true; } } @@ -4406,6 +4552,72 @@ bool Compiler::CombinedImageSamplerDrefHandler::handle(spv::Op opcode, const uin return true; } +void Compiler::build_function_control_flow_graphs_and_analyze() +{ + CFGBuilder handler(*this); + handler.function_cfgs[entry_point].reset(new CFG(*this, get(entry_point))); + traverse_all_reachable_opcodes(get(entry_point), handler); + function_cfgs = move(handler.function_cfgs); + + for (auto &f : function_cfgs) + { + auto &func = get(f.first); + AnalyzeVariableScopeAccessHandler scope_handler(*this, func); + analyze_variable_scope(func, scope_handler); + find_function_local_luts(func, scope_handler); + + // Check if we can actually use the loop variables we found in analyze_variable_scope. + // To use multiple initializers, we need the same type and qualifiers. + for (auto block : func.blocks) + { + auto &b = get(block); + if (b.loop_variables.size() < 2) + continue; + + auto &flags = get_decoration_bitset(b.loop_variables.front()); + uint32_t type = get(b.loop_variables.front()).basetype; + bool invalid_initializers = false; + for (auto loop_variable : b.loop_variables) + { + if (flags != get_decoration_bitset(loop_variable) || + type != get(b.loop_variables.front()).basetype) + { + invalid_initializers = true; + break; + } + } + + if (invalid_initializers) + { + for (auto loop_variable : b.loop_variables) + get(loop_variable).loop_variable = false; + b.loop_variables.clear(); + } + } + } +} + +Compiler::CFGBuilder::CFGBuilder(spirv_cross::Compiler &compiler_) + : compiler(compiler_) +{ +} + +bool Compiler::CFGBuilder::handle(spv::Op, const uint32_t *, uint32_t) +{ + return true; +} + +bool Compiler::CFGBuilder::follow_function_call(const SPIRFunction &func) +{ + if (function_cfgs.find(func.self) == end(function_cfgs)) + { + function_cfgs[func.self].reset(new CFG(compiler, func)); + return true; + } + else + return false; +} + bool Compiler::CombinedImageSamplerUsageHandler::begin_function_scope(const uint32_t *args, uint32_t length) { if (length < 3) diff --git a/spirv_cross.hpp b/spirv_cross.hpp index b25e1c19cb..159e1988be 100644 --- a/spirv_cross.hpp +++ b/spirv_cross.hpp @@ -18,11 +18,11 @@ #define SPIRV_CROSS_HPP #include "spirv.hpp" +#include "spirv_cfg.hpp" #include "spirv_common.hpp" namespace spirv_cross { -class CFG; struct Resource { // Resources are identified with their SPIR-V ID. @@ -676,8 +676,6 @@ protected: variable_remap_callback(type, var_name, type_name); } - void analyze_variable_scope(SPIRFunction &function); - void parse(); void parse(const Instruction &i); @@ -869,6 +867,55 @@ protected: bool need_subpass_input = false; }; + void build_function_control_flow_graphs_and_analyze(); + std::unordered_map> function_cfgs; + struct CFGBuilder : OpcodeHandler + { + CFGBuilder(Compiler &compiler_); + + bool follow_function_call(const SPIRFunction &func) override; + bool handle(spv::Op op, const uint32_t *args, uint32_t length) override; + Compiler &compiler; + std::unordered_map> function_cfgs; + }; + + struct AnalyzeVariableScopeAccessHandler : OpcodeHandler + { + AnalyzeVariableScopeAccessHandler(Compiler &compiler_, SPIRFunction &entry_); + + bool follow_function_call(const SPIRFunction &) override; + void set_current_block(const SPIRBlock &block) override; + + void notify_variable_access(uint32_t id, uint32_t block); + bool id_is_phi_variable(uint32_t id) const; + bool id_is_potential_temporary(uint32_t id) const; + bool handle(spv::Op op, const uint32_t *args, uint32_t length) override; + + Compiler &compiler; + SPIRFunction &entry; + std::unordered_map> accessed_variables_to_block; + std::unordered_map> accessed_temporaries_to_block; + std::unordered_map result_id_to_type; + std::unordered_map> complete_write_variables_to_block; + std::unordered_map> partial_write_variables_to_block; + const SPIRBlock *current_block = nullptr; + }; + + struct StaticExpressionAccessHandler : OpcodeHandler + { + StaticExpressionAccessHandler(Compiler &compiler_, uint32_t variable_id_); + bool follow_function_call(const SPIRFunction &) override; + bool handle(spv::Op op, const uint32_t *args, uint32_t length) override; + + Compiler &compiler; + uint32_t variable_id; + uint32_t static_expression = 0; + uint32_t write_count = 0; + }; + + void analyze_variable_scope(SPIRFunction &function, AnalyzeVariableScopeAccessHandler &handler); + void find_function_local_luts(SPIRFunction &function, const AnalyzeVariableScopeAccessHandler &handler); + void make_constant_null(uint32_t id, uint32_t type); std::vector declared_capabilities; diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index 5b71ab1d55..a973274616 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -418,6 +418,7 @@ string CompilerGLSL::compile() backend.supports_extensions = true; // Scan the SPIR-V to find trivial uses of extensions. + build_function_control_flow_graphs_and_analyze(); find_static_extensions(); fixup_image_load_store_access(); update_active_builtins(); @@ -1650,7 +1651,7 @@ void CompilerGLSL::emit_specialization_constant_op(const SPIRConstantOp &constan statement("const ", variable_decl(type, name), " = ", constant_op_expression(constant), ";"); } -void CompilerGLSL::emit_specialization_constant(const SPIRConstant &constant) +void CompilerGLSL::emit_constant(const SPIRConstant &constant) { auto &type = get(constant.constant_type); auto name = to_name(constant.self); @@ -2071,25 +2072,25 @@ void CompilerGLSL::emit_resources() // // TODO: If we have the fringe case that we create a spec constant which depends on a struct type, // we'll have to deal with that, but there's currently no known way to express that. - if (options.vulkan_semantics) + for (auto &id : ids) { - for (auto &id : ids) + if (id.get_type() == TypeConstant) { - if (id.get_type() == TypeConstant) - { - auto &c = id.get(); - if (!c.specialization) - continue; + auto &c = id.get(); - emit_specialization_constant(c); - emitted = true; - } - else if (id.get_type() == TypeConstantOp) + bool needs_declaration = (c.specialization && options.vulkan_semantics) || c.is_used_as_lut; + + if (needs_declaration) { - emit_specialization_constant_op(id.get()); + emit_constant(c); emitted = true; } } + else if (options.vulkan_semantics && id.get_type() == TypeConstantOp) + { + emit_specialization_constant_op(id.get()); + emitted = true; + } } if (emitted) @@ -2298,9 +2299,9 @@ string CompilerGLSL::enclose_expression(const string &expr) uint32_t paren_count = 0; for (auto c : expr) { - if (c == '(') + if (c == '(' || c == '[') paren_count++; - else if (c == ')') + else if (c == ')' || c == ']') { assert(paren_count); paren_count--; @@ -2424,6 +2425,8 @@ string CompilerGLSL::to_expression(uint32_t id) return builtin_to_glsl(dec.builtin_type, StorageClassGeneric); else if (c.specialization && options.vulkan_semantics) return to_name(id); + else if (c.is_used_as_lut) + return to_name(id); else if (type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline) return to_name(id); else if (!type.array.empty() && !backend.can_declare_arrays_inline) @@ -6190,6 +6193,10 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) var->static_expression = ops[1]; else if (var && var->loop_variable && !var->loop_variable_enable) var->static_expression = ops[1]; + else if (var && var->remapped_variable) + { + // Skip the write. + } else if (var && flattened_structs.count(ops[0])) { store_flattened_struct(*var, ops[1]); @@ -8289,6 +8296,11 @@ string CompilerGLSL::argument_decl(const SPIRFunction::Parameter &arg) return join(direction, to_qualifiers_glsl(arg.id), variable_decl(type, to_name(arg.id), arg.id)); } +string CompilerGLSL::to_initializer_expression(const SPIRVariable &var) +{ + return to_expression(var.initializer); +} + string CompilerGLSL::variable_decl(const SPIRVariable &variable) { // Ignore the pointer type since GLSL doesn't have pointers. @@ -8306,7 +8318,7 @@ string CompilerGLSL::variable_decl(const SPIRVariable &variable) { uint32_t expr = variable.initializer; if (ids[expr].get_type() != TypeUndef) - res += join(" = ", to_expression(variable.initializer)); + res += join(" = ", to_initializer_expression(variable)); } return res; } @@ -8908,41 +8920,6 @@ void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags) current_function = &func; auto &entry_block = get(func.entry_block); - if (!func.analyzed_variable_scope) - { - analyze_variable_scope(func); - - // Check if we can actually use the loop variables we found in analyze_variable_scope. - // To use multiple initializers, we need the same type and qualifiers. - for (auto block : func.blocks) - { - auto &b = get(block); - if (b.loop_variables.size() < 2) - continue; - - auto &flags = get_decoration_bitset(b.loop_variables.front()); - uint32_t type = get(b.loop_variables.front()).basetype; - bool invalid_initializers = false; - for (auto loop_variable : b.loop_variables) - { - if (flags != get_decoration_bitset(loop_variable) || - type != get(b.loop_variables.front()).basetype) - { - invalid_initializers = true; - break; - } - } - - if (invalid_initializers) - { - for (auto loop_variable : b.loop_variables) - get(loop_variable).loop_variable = false; - b.loop_variables.clear(); - } - } - func.analyzed_variable_scope = true; - } - for (auto &v : func.local_variables) { auto &var = get(v); @@ -8969,6 +8946,11 @@ void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags) entry_block.dominated_variables.push_back(var.self); var.deferred_declaration = true; } + else if (var.storage == StorageClassFunction && var.remapped_variable && var.static_expression) + { + // No need to declare this variable, it has a static expression. + var.deferred_declaration = false; + } else if (expression_is_lvalue(v)) { add_local_variable_name(var.self); diff --git a/spirv_glsl.hpp b/spirv_glsl.hpp index 7205def759..f5cc6ceb86 100644 --- a/spirv_glsl.hpp +++ b/spirv_glsl.hpp @@ -374,7 +374,7 @@ protected: void emit_flattened_io_block(const SPIRVariable &var, const char *qual); void emit_block_chain(SPIRBlock &block); void emit_hoisted_temporaries(std::vector> &temporaries); - void emit_specialization_constant(const SPIRConstant &constant); + void emit_constant(const SPIRConstant &constant); void emit_specialization_constant_op(const SPIRConstantOp &constant); std::string emit_continue_block(uint32_t continue_block); bool attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method method); @@ -463,6 +463,7 @@ protected: virtual bool skip_argument(uint32_t id) const; virtual void emit_array_copy(const std::string &lhs, uint32_t rhs_id); virtual void emit_block_hints(const SPIRBlock &block); + virtual std::string to_initializer_expression(const SPIRVariable &var); bool buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing, uint32_t start_offset = 0, uint32_t end_offset = std::numeric_limits::max()); diff --git a/spirv_hlsl.cpp b/spirv_hlsl.cpp index aaa05cc07e..34e1d19fb5 100644 --- a/spirv_hlsl.cpp +++ b/spirv_hlsl.cpp @@ -4572,6 +4572,7 @@ string CompilerHLSL::compile() backend.can_declare_arrays_inline = false; backend.can_return_array = false; + build_function_control_flow_graphs_and_analyze(); update_active_builtins(); analyze_image_and_sampler_usage(); diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 4f69c57150..3f8de118b1 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -280,6 +280,7 @@ string CompilerMSL::compile() struct_member_padding.clear(); + build_function_control_flow_graphs_and_analyze(); update_active_builtins(); analyze_image_and_sampler_usage(); build_implicit_builtins(); @@ -2109,6 +2110,11 @@ bool CompilerMSL::maybe_emit_array_assignment(uint32_t id_lhs, uint32_t id_rhs) return false; auto *var = maybe_get(id_lhs); + + // Is this a remapped, static constant? Don't do anything. + if (var->remapped_variable && var->statically_assigned) + return true; + if (ids[id_rhs].get_type() == TypeConstant && var && var->deferred_declaration) { // Special case, if we end up declaring a variable when assigning the constant array, @@ -4072,6 +4078,7 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o { // Get the result type of the RHS. Since this is run as a pre-processing stage, // we must extract the result type directly from the Instruction, rather than the ID. + uint32_t id_lhs = args[0]; uint32_t id_rhs = args[1]; const SPIRType *type = nullptr; @@ -4088,7 +4095,13 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o type = &compiler.get(tid); } - if (type && compiler.is_array(*type)) + auto *var = compiler.maybe_get(id_lhs); + + // Are we simply assigning to a statically assigned variable which takes a constant? + // Don't bother emitting this function. + bool static_expression_lhs = + var && var->storage == StorageClassFunction && var->statically_assigned && var->remapped_variable; + if (type && compiler.is_array(*type) && !static_expression_lhs) return SPVFuncImplArrayCopy; break; @@ -4209,7 +4222,7 @@ CompilerMSL::MemberSorter::MemberSorter(SPIRType &t, Meta &m, SortAspect sa) meta.members.resize(max(type.member_types.size(), meta.members.size())); } -void CompilerMSL::remap_constexpr_sampler(uint32_t id, const spirv_cross::MSLConstexprSampler &sampler) +void CompilerMSL::remap_constexpr_sampler(uint32_t id, const MSLConstexprSampler &sampler) { auto &type = get(get(id).basetype); if (type.basetype != SPIRType::SampledImage && type.basetype != SPIRType::Sampler) @@ -4220,10 +4233,22 @@ void CompilerMSL::remap_constexpr_sampler(uint32_t id, const spirv_cross::MSLCon } // MSL always declares builtins with their SPIR-V type. -void CompilerMSL::bitcast_from_builtin_load(uint32_t, std::string &, const spirv_cross::SPIRType &) +void CompilerMSL::bitcast_from_builtin_load(uint32_t, std::string &, const SPIRType &) { } -void CompilerMSL::bitcast_to_builtin_store(uint32_t, std::string &, const spirv_cross::SPIRType &) +void CompilerMSL::bitcast_to_builtin_store(uint32_t, std::string &, const SPIRType &) { } + +std::string CompilerMSL::to_initializer_expression(const SPIRVariable &var) +{ + // We risk getting an array initializer here with MSL. If we have an array. + // FIXME: We cannot handle non-constant arrays being initialized. + // We will need to inject spvArrayCopy here somehow ... + auto &type = get(var.basetype); + if (ids[var.initializer].get_type() == TypeConstant && (!type.array.empty() || type.basetype == SPIRType::Struct)) + return constant_expression(get(var.initializer)); + else + return CompilerGLSL::to_initializer_expression(var); +} diff --git a/spirv_msl.hpp b/spirv_msl.hpp index fe3401d91b..7997cd30fb 100644 --- a/spirv_msl.hpp +++ b/spirv_msl.hpp @@ -293,6 +293,7 @@ protected: uint32_t coord, uint32_t coord_components, uint32_t dref, uint32_t grad_x, uint32_t grad_y, uint32_t lod, uint32_t coffset, uint32_t offset, uint32_t bias, uint32_t comp, uint32_t sample, bool *p_forward) override; + std::string to_initializer_expression(const SPIRVariable &var) override; std::string unpack_expression_type(std::string expr_str, const SPIRType &type) override; std::string bitcast_glsl_op(const SPIRType &result_type, const SPIRType &argument_type) override; bool skip_argument(uint32_t id) const override;