From 032829847454432a4b149452d6354c13c5888239 Mon Sep 17 00:00:00 2001
From: twinaphex <libretro@gmail.com>
Date: Wed, 20 Jun 2018 09:32:45 +0200
Subject: [PATCH] Squashed 'deps/SPIRV-Cross/' changes from
 c9516fa917..e59cc24495

e59cc24495 Merge pull request #607 from billhollings/master
ac57a30ad6 Regenerate MSL shaders to fix conflicts from merge.
ab2ea93e35 Merge branch 'master' of https://github.com/KhronosGroup/SPIRV-Cross
9b4defe202 CompilerMSL support matrices & arrays in stage-in & stage-out.
285f214071 Merge pull request #605 from KhronosGroup/unpack-regular-operations-msl
58fab58e5e Do not unpack transposed matrices.
192a882df3 Also unpack regular unary/binary operations on MSL.
dc565136fb Merge pull request #604 from KhronosGroup/fix-603
b86bd0a265 Unpack expressions when used in functions on MSL.
c5b6ba239c Merge pull request #599 from KhronosGroup/fix-295
f1e8555801 Emit matrix layouts in structs directly.
b1196f2ace Merge pull request #598 from KhronosGroup/fix-594
6bcc890e63 Sanitize underscores in general, not just for members.
6fea07f2fd Merge pull request #597 from KhronosGroup/fix-595-596-592
3a9b045dc3 Various maintenance fixes.
0a83bacf3e Merge pull request #593 from KhronosGroup/old-clang-compile
2a1ab4108b Fix compile on older clang.
6ef1c49ec0 Merge pull request #591 from KhronosGroup/fix-578
04b149feb0 Fix image load/store on cube arrays in MSL.
8bac5c09f3 Merge pull request #590 from KhronosGroup/fix-581
f65120c147 Deal with packed expressions in more scenarios.
db1ed375b0 Merge pull request #589 from KhronosGroup/fix-580
280fb93204 Add test for reading SSBO from fragment shader on MSL.
a1b3964cbd Run format_all.sh.
6b3da831be Declare read-only SSBOs as const device in MSL.
08336e7bbb Merge pull request #588 from KhronosGroup/pr-583
46bf17c5d3 Add SREM tests for HLSL/MSL.
7cba89b4a5 Add reference files for SREM.
6b144cc609 handle OpSRem
e792cd6160 no expression type for OpSRem
f1eacba244 Merge pull request #587 from zeux/master
91fd41816f Fix textureGrad compilation for legacy targets
8cc1fdbb30 Merge pull request #586 from KhronosGroup/fix-584
ba15daee33 Add support for inheriting bindings for combined image samplers.
f929c361c5 Merge pull request #577 from KhronosGroup/fix-575
bcaae84c76 Deal with scoping for Private variables.
26b887ec99 Fix atomic_compare_exchange_weak_explicit.
62c6d2d498 Merge pull request #576 from KhronosGroup/fix-574
fb7181bff1 Run format_all.sh.
c643addacd Only reflect spec constant if it actually has a constant ID.
991b655c72 Declare OpSpecConstantOp up-front on relevant targets.
0617b98613 Run format_all.sh.
3951b9456f Fix SpecConstantComposite if input is SpecConstantOp.
c74dc4578a Merge pull request #572 from KhronosGroup/vulkan-glsl-sampler-mediump
01080365fa Use mediump on images in --vulkan-semantics as well.
9d370aca58 Merge pull request #571 from KhronosGroup/fix-570
7eba247864 Handle inout properly with split access chains.
97e38bcd38 Merge pull request #569 from KhronosGroup/fix-566
b71f5dfc0c Fix split access chains for builtin arrays.
e4694a8403 Merge pull request #568 from KhronosGroup/fix-567
903b798da7 Fix GCC 8.1 build.
d3b966322d Merge pull request #565 from pmuetschard/master
aced6058b4 Don't limit GLSL identifiers with HLSL keywords.
2792f8f3f2 Merge pull request #564 from KhronosGroup/fix-563
85a8f066f4 Do not use RMW rewrite for matrices.
04f0a08cfc Merge pull request #562 from KhronosGroup/fix-561
d2df067dd4 Force recompile if we add row-major transpose functions in MSL.
7b95168c3d Do not clear spv_function_implementations on MSL.
9279750a1d Merge pull request #560 from KhronosGroup/fix-557
17be3c652f Deal with fake overloads when using combined image samplers.
e80d6e0142 Merge pull request #559 from KhronosGroup/fix-558
3187d89c31 Reduce test case ... MSL can't deal with certain unusual cases yet.
f3e810b8b3 Fix OpCompositeConstruct with arrays in MSL.
9d43e9c02a Merge pull request #556 from billhollings/master
ba1026b899 Merge branch 'master' of https://github.com/KhronosGroup/SPIRV-Cross
57213cb7ca Compiler MSL default gather offset when component specified.
1ebccb4466 Mention Wiki in README.
8f07df016b Merge pull request #555 from KhronosGroup/fix-553
7e23e69f2a Run format_all.sh.
47081f810a Fix GatherDref on GLSL.
6b478c928d Merge pull request #554 from KhronosGroup/query-fixes
40bbf6be7a Build combined dummy samplers for Query functions without sampler as well.
d93807a625 Deal with OpImageFetch without explicit LOD.
454691f434 Merge pull request #547 from KhronosGroup/op-image-usage-tracking
aaf397cd1f Fix usage tracking issue for OpImage.
bbf6b60350 Merge pull request #546 from KhronosGroup/fix-545
893ed379d5 Remove some stale test files.
cd442b9c2f Clean and fix some issues with test_shaders.py.
18958ecccb Add test for combined-sampler-reuse on GLSL.
0280800a8f Fix case where SampledImage would get flushed to temporary.
a090907607 Merge pull request #543 from KhronosGroup/fix-542
8b75e46433 Fix some formatting issues.
f56433b802 Add new tests for combined arrays of sampler + image.
a39eb4826b Combined array of images is starting to work ...
1478847122 Change AppVeyor badge link to the correct one.
d559a8df39 Add AppVeyor status badge.
dd39d36726 Merge pull request #540 from KhronosGroup/appveyor
cc82d14ee8 Add appveyor script.
d0c829b580 Add no-opt to CTest, make test_shaders.py faster when FXC is missing.
2804d9acd7 Merge pull request #539 from KhronosGroup/fix-533
e351e5c565 Use convert_to_string for lod clamp.
e30a94225f Complete MSL constexpr samplers.
64f9461d72 Check for array of samplers.
df58debf7a Add support for constexpr samplers in MSL.
3a8335eee0 Merge pull request #537 from KhronosGroup/fix-535
b9cd3dcd7f Run format_all.sh.
e930f79e2e Be a bit smarter about uint on legacy targets.
2684054bbd Merge pull request #538 from KhronosGroup/fix-io-block-location
b06c1af9b3 Distinguish between enhanced_layouts and SSO requirements.
28c9be8a23 Unsigned integers are disallowed on legacy targets.
7796a9f3ec Merge pull request #531 from KhronosGroup/subgroup
c266429be9 Partially implement subgroup ops for HLSL SM 6.0.
146ea76f52 Add test shader for subgroup.
e1ccfd5dbb Implement all of subgroup.
f6c0e53f58 Start adding Vulkan 1.1 subgroup support to GLSL.
489e04e09e Merge pull request #530 from KhronosGroup/fix-529
9c2761f69a Run format_all.sh.
8175e2e200 Fix depth compare textures when used in functions without argument.
edac731360 Merge pull request #528 from msiglreith/fix_root_constants_name
6bdd775936 Merge pull request #526 from taisei-project/strip-uniform-location
f260c452f7 Fix naming of root constants
4a43024dba Strip uniform locations for ESSL < 3.00 and GLSL < 430
9b8c2c4654 Merge pull request #524 from KhronosGroup/fix-512-523
694b314f87 Support empty structs.
31a3fdf4ee Decouple public require_extension and the internal use of the function.
d0133c5dd9 Merge pull request #509 from KhronosGroup/hlsl-semantic-counter-buffer
d8cdab2f57 Add test shader for semantic decoration.
05c66f9cac Add test shader which uses counter buffer extension.
215d3ca0a4 Add support for new HLSL semantic/counter buffer decorations.
7f84537350 Merge pull request #519 from KhronosGroup/fix-507
ac81a0ce68 Use declared binding in SPIR-V as a fallback for explicit MSL binds.
e8ca39b7b5 Add test for sampler image arrays.
382101bd05 Run format_all.sh.
1d082e9b79 Fix passing arrays of combined image samplers in HLSL.
5827dd54ea Support array of images and samplers in MSL.
81eb72a9a0 Ignore LOD when sampling 1D textures in MSL.
69ffd918a6 Merge pull request #522 from KhronosGroup/fix-515
938040be0b Only disable binding layout for UBOs.
fe697a80f5 Emit classic uniform for UBO in GLSL 1.30.
65be63fd04 Merge pull request #521 from KhronosGroup/fix-516
a6e211e00b Support dual-source blending on GLSL and MSL.
a1e30c8c2b Merge pull request #520 from KhronosGroup/fix-517
3229e6efb6 Add more illegal name replacement in MSL.
761b06bc73 Merge pull request #514 from KhronosGroup/fix-510
a901b4c814 Remove redundant temporary sort.
c1947aa447 Update glslang/SPIRV-Tools on Travis.
35f64d03bb Fix name aliasing for temporary variables.
5161d5ed3b Merge pull request #505 from KhronosGroup/msl-array-mrt
719cf9d42f Run format_all.sh.
8e90382675 Properly flatten MRT outputs in MSL.
6e6ca0b237 Attempt MRT-as-array in MSL.
fbfe13657a Merge pull request #502 from KhronosGroup/update-glslang-spirv-tools
4f348a9807 Update README and update_test_shaders.sh.
b46910e9f5 Access chain into packed vectors as arrays.
a6afda650f Add newly moved reference files.
cdbd376c42 Try building debug on Travis.
e7bf8d2f48 Refactor out noopt shaders to their own folders.
1a4a36b6cd Experiment a bit more with Travis.
5a746ba941 Split checkout and build on Travis CI.
4979d10b54 Implement packHalf2x16/unpackHalf2x16 on MSL.
ae2680c898 Make sure ballotARB inherits expression dependencies.
938c7debed Handle control-dependent temporaries.
012377f811 Refactor block IO emission to use bitset.
955eac522b Make use of the handy scripts for Travis as well.
9fbd8b789e Update tests for latest SPIRV-Tools and glslang.
e8e58844d4 Rewrite everything to use Bitset rather than uint64_t.
29315f3b3f Merge pull request #498 from JustSid/master
e66fd6c2a0 Added generation of the textureXYZGrad() legacy GLSL instruction
0f9cb0da0d Merge pull request #497 from KhronosGroup/fix-489
9a52713d77 Run format_all.sh.
a803e5ae38 Deprecate set_options()/get_options() interface, replace it.
236b2fa47c Merge pull request #496 from KhronosGroup/fix-494
e3b8e9455c Add test shader where a phi variable invalidates a temporary.
1de74fdabd Explicitly emit break/continues in selection branches.
e0efa737ca Expand the implementation of inherit_expression_dependencies.
01fbb3a342 Merge pull request #495 from KhronosGroup/fix-493
28cccc3dbb Emit complex continue blocks "properly".
54549a624f Add some test shaders for special merge methods.
8d557d4103 Handle cases where merge selects as also loop merge or continue blocks.
8c8a93f745 Merge pull request #492 from KhronosGroup/fix-488
922420e346 Disallow arrays and structs from becoming loop variables.
2e5d06d1ce Merge pull request #485 from KhronosGroup/fix-484
bdabd0c73a Disable double test in MSL FP16 tests.
723bec5d33 frexp/ldexp fails in debug builds, workaround temporarily.
0780820a7b Skip interpolateAt tests with FP16.
ac0e93f392 Run format_all.sh.
18ad1be3c3 Add FP16 test for MSL as well.
47d94ff8d9 Add FP16 to HLSL.
770ed25f29 Merge pull request #487 from KhronosGroup/fix-486
15a941cd3d Add builtin GLSL functions to blacklist.
d9da2db442 Some compat fixes for MSL and Half.
547278da12 Test denormal fp16 constants.
24dfe496c2 Run format_all.sh.
05348a66ca Add test shader for FP16 support.
bc0f698df3 Fix packing alignments for float16.
91f85d3412 Begin adding float16_t support to GLSL.
b0a2de5b63 Merge pull request #483 from KhronosGroup/fix-466
38d9d8af68 Add some more MSL test shaders.
5fe79eb59c Update tests.
294259e2f1 Fix type aliasing on MSL.

git-subtree-dir: deps/SPIRV-Cross
git-subtree-split: e59cc244958af6059f7bd1d16d833e17409dec37
---
 .travis.yml                                   |   23 +-
 CMakeLists.txt                                |    9 +
 README.md                                     |    6 +-
 appveyor.yml                                  |   31 +
 build_glslang_spirv_tools.sh                  |   26 +
 checkout_glslang_spirv_tools.sh               |   35 +-
 format_all.sh                                 |    3 +-
 main.cpp                                      |  118 +-
 ...orage-buffer-basic.invalid.nofxc.asm.comp} |    0
 .../asm/frag/combined-sampler-reuse.asm.frag  |   30 +
 .../asm/frag/empty-struct.asm.frag            |    8 +
 .../asm/frag/function-overload-alias.asm.frag |    2 +-
 .../asm/frag/image-extract-reuse.asm.frag     |   31 +
 .../asm/frag/implicit-read-dep-phi.asm.frag   |   57 +
 .../opt/shaders-hlsl/asm/frag/srem.asm.frag   |   29 +
 .../asm/frag/texel-fetch-no-lod.asm.frag      |   29 +
 .../asm/frag/unreachable.asm.frag             |   16 +-
 .../asm/vert/empty-struct-composite.asm.vert  |    8 -
 .../vert/spec-constant-op-composite.asm.vert  |   37 +
 reference/opt/shaders-hlsl/comp/builtins.comp |    2 +
 reference/opt/shaders-hlsl/comp/image.comp    |   38 +-
 .../opt/shaders-hlsl/comp/rmw-matrix.comp     |   20 +
 reference/opt/shaders-hlsl/comp/shared.comp   |    2 +-
 .../comp/subgroups.invalid.nofxc.sm60.comp    |   67 +
 .../frag/array-lut-no-loop-variable.frag      |   35 +
 .../control-dependent-in-branch.desktop.frag  |   54 +
 .../opt/shaders-hlsl/frag/fp16.desktop.frag   |   45 +
 .../frag/image-query-selective.frag           |  101 +
 .../opt/shaders-hlsl/frag/image-query.frag    |  107 +
 .../frag/partial-write-preserve.frag          |    6 -
 .../opt/shaders-hlsl/frag/resources.frag      |    4 +-
 .../frag/row-major-layout-in-struct.frag      |   37 +
 .../frag/sample-cmp-level-zero.frag           |    4 +-
 .../opt/shaders-hlsl/frag/sampler-array.frag  |    3 +-
 .../frag/sampler-image-arrays.frag            |   39 +
 .../opt/shaders-hlsl/frag/spec-constant.frag  |   33 -
 .../shaders-hlsl/frag/tex-sampling-ms.frag    |   33 +
 .../opt/shaders-hlsl/frag/tex-sampling.frag   |   58 +-
 .../opt/shaders-hlsl/frag/unary-enclose.frag  |    2 +-
 .../opt/shaders-hlsl/vert/locations.vert      |   12 +-
 .../vert/read-from-row-major-array.vert       |   35 +
 .../opt/shaders-hlsl/vert/return-array.vert   |    2 -
 .../shaders-msl/asm/comp/bitcast_sar.asm.comp |   18 +-
 .../asm/comp/bitcast_sdiv.asm.comp            |   18 +-
 .../shaders-msl/asm/comp/bitcast_slr.asm.comp |   18 +-
 .../storage-buffer-basic.invalid.asm.comp}    |    2 +-
 .../asm/frag/combined-sampler-reuse.asm.frag  |   23 +
 .../asm/frag/default-member-names.asm.frag    |   26 +-
 .../asm/frag/empty-struct.asm.frag}           |    2 +-
 .../opt/shaders-msl/asm/frag/frem.asm.frag    |   12 +-
 .../asm/frag/function-overload-alias.asm.frag |    2 +-
 .../asm/frag/image-extract-reuse.asm.frag     |   17 +
 .../asm/frag/implicit-read-dep-phi.asm.frag   |   50 +
 .../asm/frag/op-constant-null.asm.frag        |    6 -
 .../asm/frag/phi-loop-variable.asm.frag       |    3 -
 .../shaders-msl/asm/frag/srem.asm.frag}       |   21 +-
 .../asm/frag/texel-fetch-no-lod.asm.frag      |   17 +
 .../asm/frag/undef-variable-store.asm.frag    |   23 +-
 .../shaders-msl/asm/frag/unreachable.asm.frag |   26 +-
 .../asm/frag/vector-shuffle-oom.asm.frag      |  190 +-
 ...mposite.asm.vert => packing-test.asm.vert} |    0
 .../vert/spec-constant-op-composite.asm.vert  |   35 +
 .../access-private-workgroup-in-function.comp |   18 +
 reference/opt/shaders-msl/comp/atomic.comp    |   72 +-
 .../opt/shaders-msl/comp/bake_gradient.comp   |   22 -
 reference/opt/shaders-msl/comp/basic.comp     |    4 +-
 reference/opt/shaders-msl/comp/builtins.comp  |    2 +
 .../opt/shaders-msl/comp/coherent-block.comp  |    2 +-
 .../opt/shaders-msl/comp/coherent-image.comp  |    2 +-
 .../shaders-msl/comp/composite-construct.comp |   35 +
 reference/opt/shaders-msl/comp/culling.comp   |    4 +-
 .../opt/shaders-msl/comp/defer-parens.comp    |    7 +-
 reference/opt/shaders-msl/comp/dowhile.comp   |    3 +-
 .../comp/image-cube-array-load-store.comp     |   10 +
 reference/opt/shaders-msl/comp/image.comp     |    3 +-
 reference/opt/shaders-msl/comp/inverse.comp   |    2 +-
 reference/opt/shaders-msl/comp/mat3.comp      |    2 +-
 reference/opt/shaders-msl/comp/mod.comp       |    2 +-
 reference/opt/shaders-msl/comp/modf.comp      |    2 +-
 .../opt/shaders-msl/comp/packing-test-1.comp  |   28 +
 .../opt/shaders-msl/comp/packing-test-2.comp  |   28 +
 .../opt/shaders-msl/comp/read-write-only.comp |    2 +-
 reference/opt/shaders-msl/comp/return.comp    |   33 -
 .../opt/shaders-msl/comp/rmw-matrix.comp      |   22 +
 reference/opt/shaders-msl/comp/rmw-opt.comp   |    3 +-
 .../comp/shared-array-of-arrays.comp          |    2 +-
 reference/opt/shaders-msl/comp/shared.comp    |    4 +-
 .../opt/shaders-msl/comp/struct-layout.comp   |    2 +-
 .../opt/shaders-msl/comp/struct-nested.comp   |    8 +-
 .../opt/shaders-msl/comp/struct-packing.comp  |   75 +-
 .../opt/shaders-msl/comp/torture-loop.comp    |   73 +-
 .../opt/shaders-msl/comp/type-alias.comp      |   14 +-
 reference/opt/shaders-msl/comp/udiv.comp      |    9 +-
 .../desktop-only/frag/image-ms.desktop.frag   |    2 +-
 .../desktop-only/vert/basic.desktop.sso.vert  |   12 +-
 .../shaders-msl/flatten/basic.flatten.vert    |   12 +-
 .../flatten/multiindex.flatten.vert           |   10 +-
 .../flatten/push-constant.flatten.vert        |   12 +-
 .../shaders-msl/flatten/rowmajor.flatten.vert |   10 +-
 .../shaders-msl/flatten/struct.flatten.vert   |   14 +-
 .../shaders-msl/flatten/swizzle.flatten.vert  |    2 +-
 .../shaders-msl/flatten/types.flatten.frag    |    2 +-
 .../frag/array-lut-no-loop-variable.frag      |   42 +
 reference/opt/shaders-msl/frag/basic.frag     |   12 +-
 .../binary-func-unpack-pack-arguments.frag    |   28 +
 .../frag/binary-unpack-pack-arguments.frag    |   28 +
 .../opt/shaders-msl/frag/bitcasting.frag      |   10 +-
 reference/opt/shaders-msl/frag/builtins.frag  |   10 +-
 .../composite-extract-forced-temporary.frag   |   10 +-
 .../opt/shaders-msl/frag/constant-array.frag  |   12 +-
 .../shaders-msl/frag/constant-composites.frag |   10 +-
 .../control-dependent-in-branch.desktop.frag  |   45 +
 .../frag/dual-source-blending.frag            |   19 +
 .../opt/shaders-msl/frag/false-loop-init.frag |   26 +-
 .../opt/shaders-msl/frag/flush_params.frag    |    5 -
 .../opt/shaders-msl/frag/for-loop-init.frag   |   61 +-
 .../opt/shaders-msl/frag/fp16-packing.frag    |   25 +
 .../opt/shaders-msl/frag/fp16.desktop.frag    |   16 +
 .../opt/shaders-msl/frag/front-facing.frag    |   12 +-
 .../opt/shaders-msl/frag/gather-dref.frag     |   22 +
 .../opt/shaders-msl/frag/gather-offset.frag   |   17 +
 .../shaders-msl/frag/illegal-name-test-0.frag |   17 +
 reference/opt/shaders-msl/frag/in_block.frag  |   12 +-
 reference/opt/shaders-msl/frag/in_mat.frag    |   37 +
 reference/opt/shaders-msl/frag/mix.frag       |   25 +-
 reference/opt/shaders-msl/frag/mrt-array.frag |   43 +
 .../packed-expression-vector-shuffle.frag     |   23 +
 .../opt/shaders-msl/frag/packing-test-3.frag  |   28 +
 reference/opt/shaders-msl/frag/pls.frag       |   16 +-
 .../opt/shaders-msl/frag/readonly-ssbo.frag   |   22 +
 .../sample-depth-separate-image-sampler.frag  |    2 +-
 .../opt/shaders-msl/frag/sampler-1d-lod.frag  |   22 +
 .../frag/sampler-image-arrays.msl2.frag       |   28 +
 reference/opt/shaders-msl/frag/sampler.frag   |   12 +-
 .../frag/separate-image-sampler-argument.frag |    2 +-
 .../frag/shadow-compare-global-alias.frag     |   27 +
 .../frag/spec-constant-block-size.frag        |   10 +-
 reference/opt/shaders-msl/frag/swizzle.frag   |   12 +-
 .../shaders-msl/frag/texture-proj-shadow.frag |   16 +-
 .../opt/shaders-msl/frag/ubo_layout.frag      |    7 +-
 .../opt/shaders-msl/frag/unary-enclose.frag   |   12 +-
 .../legacy/vert/transpose.legacy.vert         |   10 +-
 reference/opt/shaders-msl/vert/basic.vert     |   12 +-
 .../opt/shaders-msl/vert/copy.flatten.vert    |   20 +-
 .../opt/shaders-msl/vert/dynamic.flatten.vert |   18 +-
 reference/opt/shaders-msl/vert/functions.vert |   16 +-
 .../shaders-msl/vert/in_out_array_mat.vert    |   67 +
 reference/opt/shaders-msl/vert/out_block.vert |   12 +-
 .../opt/shaders-msl/vert/packed_matrix.vert   |   22 +-
 reference/opt/shaders-msl/vert/pointsize.vert |   12 +-
 .../vert/read-from-row-major-array.vert       |   37 +
 .../opt/shaders-msl/vert/return-array.vert    |   10 +-
 .../opt/shaders-msl/vert/texture_buffer.vert  |    2 +-
 .../opt/shaders-msl/vert/ubo.alignment.vert   |   14 +-
 reference/opt/shaders-msl/vert/ubo.vert       |   12 +-
 .../vulkan/frag/push-constant.vk.frag         |   10 +-
 ...etch_subpassInput.vk.nocompat.invalid.frag |   11 -
 reference/opt/shaders/amd/fs.invalid.frag     |    4 +-
 reference/opt/shaders/amd/gcn_shader.comp     |    1 +
 reference/opt/shaders/amd/shader_ballot.comp  |    8 +-
 .../opt/shaders/amd/shader_group_vote.comp    |    7 +
 .../shaders/amd/shader_trinary_minmax.comp    |    1 +
 .../shaders/asm/comp/bitcast_iequal.asm.comp  |   10 +-
 .../opt/shaders/asm/comp/bitcast_sar.asm.comp |   18 +-
 .../shaders/asm/comp/bitcast_sdiv.asm.comp    |   18 +-
 .../opt/shaders/asm/comp/bitcast_slr.asm.comp |   18 +-
 .../asm/comp/hlsl-functionality.asm.comp      |   19 +
 .../asm/comp/name-alias.asm.invalid.comp      |   37 -
 .../storage-buffer-basic.invalid.asm.comp}    |    0
 .../frag/combined-sampler-reuse.vk.asm.frag   |   13 +
 .../combined-sampler-reuse.vk.asm.frag.vk     |   14 +
 .../frag/complex-name-workarounds.asm.frag    |   16 +
 ...osite-construct-struct-no-swizzle.asm.frag |    6 -
 .../asm/frag/default-member-names.asm.frag    |   26 +-
 .../empty-struct.asm.frag}                    |    0
 .../asm/frag/function-overload-alias.asm.frag |    2 +-
 .../asm/frag/image-extract-reuse.asm.frag     |   11 +
 .../frag/image-fetch-no-sampler.asm.vk.frag   |    4 +-
 .../image-fetch-no-sampler.asm.vk.frag.vk     |    6 +-
 .../frag/image-query-no-sampler.vk.asm.frag   |    9 +
 .../image-query-no-sampler.vk.asm.frag.vk     |   10 +
 .../asm/frag/implicit-read-dep-phi.asm.frag   |   40 +
 ...op-body-dominator-continue-access.asm.frag |  100 +-
 .../asm/frag/loop-header-to-continue.asm.frag |   12 +-
 .../asm/frag/loop-merge-to-continue.asm.frag  |   21 +
 .../asm/frag/multi-for-loop-init.asm.frag     |    5 +-
 .../asm/frag/op-constant-null.asm.frag        |    6 -
 .../asm/frag/phi-loop-variable.asm.frag       |    3 -
 ...pler-buffer-array-without-sampler.asm.frag |   18 +
 .../sampler-buffer-without-sampler.asm.frag   |    4 +-
 .../frag/selection-merge-to-continue.asm.frag |   24 +
 reference/opt/shaders/asm/frag/srem.asm.frag  |   13 +
 .../frag/switch-merge-to-continue.asm.frag    |   31 +
 .../asm/frag/temporary-name-alias.asm.frag}   |    0
 .../asm/frag/temporary-phi-hoisting.asm.frag  |    7 +-
 .../asm/frag/texel-fetch-no-lod.asm.frag      |   11 +
 .../asm/frag/undef-variable-store.asm.frag    |   23 +-
 .../opt/shaders/asm/frag/unreachable.asm.frag |   15 +-
 .../asm/frag/vector-shuffle-oom.asm.frag      |  162 +-
 .../inout-split-access-chain-handle.asm.geom  |    9 +
 .../geom/split-access-chain-input.asm.geom    |    9 +
 ...s-fixed-input-array-builtin-array.asm.tesc |   81 -
 ...nput-array-builtin-array.invalid.asm.tesc} |    0
 .../opt/shaders/asm/vert/empty-io.asm.vert    |    7 +-
 .../asm/vert/global-builtin.sso.asm.vert      |    8 +-
 .../spec-constant-op-composite.asm.vk.vert    |   16 +
 .../spec-constant-op-composite.asm.vk.vert.vk |   24 +
 reference/opt/shaders/comp/bake_gradient.comp |   11 +-
 reference/opt/shaders/comp/casts.comp         |    6 +-
 reference/opt/shaders/comp/cfg.comp           |   27 +-
 .../opt/shaders/comp/composite-construct.comp |   14 +-
 reference/opt/shaders/comp/defer-parens.comp  |    7 +-
 reference/opt/shaders/comp/dowhile.comp       |    1 -
 .../opt/shaders/comp/generate_height.comp     |   51 +-
 reference/opt/shaders/comp/image.comp         |    3 +-
 reference/opt/shaders/comp/return.comp        |   31 -
 reference/opt/shaders/comp/rmw-matrix.comp    |   20 +
 reference/opt/shaders/comp/rmw-opt.comp       |    3 +-
 reference/opt/shaders/comp/shared.comp        |    2 +-
 .../opt/shaders/comp/struct-packing.comp      |   29 +-
 reference/opt/shaders/comp/torture-loop.comp  |   71 +-
 reference/opt/shaders/comp/type-alias.comp    |   11 +-
 reference/opt/shaders/comp/udiv.comp          |    6 +-
 .../control-dependent-in-branch.desktop.frag  |   37 +
 .../frag/dual-source-blending.desktop.frag    |   11 +
 .../desktop-only/frag/fp16.desktop.frag       |   11 +
 .../frag/image-query.desktop.frag             |    9 +
 .../desktop-only/geom/basic.desktop.sso.geom  |    7 +-
 .../opt/shaders/flatten/copy.flatten.vert     |    8 +-
 .../opt/shaders/flatten/dynamic.flatten.vert  |    4 +-
 ...multi-dimensional.desktop.flatten_dim.frag |   24 +-
 .../frag/array-lut-no-loop-variable.frag      |   18 +
 .../opt/shaders/frag/constant-array.frag      |    2 +-
 .../opt/shaders/frag/constant-composites.frag |    6 +-
 .../frag/eliminate-dead-variables.frag        |   14 -
 .../opt/shaders/frag/false-loop-init.frag     |   16 +-
 reference/opt/shaders/frag/flush_params.frag  |    5 -
 reference/opt/shaders/frag/for-loop-init.frag |   58 +-
 reference/opt/shaders/frag/gather-dref.frag   |   14 +
 reference/opt/shaders/frag/ground.frag        |    2 +-
 ...temporary-use-continue-block-as-value.frag |   25 +-
 reference/opt/shaders/frag/mix.frag           |    8 +-
 .../shaders/frag/partial-write-preserve.frag  |    6 -
 .../opt/shaders/frag/sampler-ms-query.frag    |   14 -
 reference/opt/shaders/frag/swizzle.frag       |    2 +-
 reference/opt/shaders/frag/temporary.frag     |   14 -
 reference/opt/shaders/frag/ubo_layout.frag    |    7 +-
 reference/opt/shaders/frag/unary-enclose.frag |    2 +-
 reference/opt/shaders/geom/basic.geom         |    7 +-
 .../legacy/vert/struct-varying.legacy.vert    |    5 +-
 reference/opt/shaders/tesc/water_tess.tesc    |    8 +-
 reference/opt/shaders/tese/water_tess.tese    |    7 +-
 reference/opt/shaders/vert/ocean.vert         |   48 +-
 .../vert/read-from-row-major-array.vert       |   16 +
 .../subgroups.nocompat.invalid.vk.comp.vk     |  110 +
 .../separate-combined-fake-overload.vk.frag   |   12 +
 ...separate-combined-fake-overload.vk.frag.vk |   13 +
 .../separate-sampler-texture-array.vk.frag    |   19 +
 .../separate-sampler-texture-array.vk.frag.vk |   20 +
 .../frag/separate-sampler-texture.vk.frag     |    5 +-
 .../frag/separate-sampler-texture.vk.frag.vk  |    5 +-
 .../shaders/vulkan/frag/spec-constant.vk.frag |   20 -
 .../vulkan/frag/spec-constant.vk.frag.vk      |   25 -
 .../vulkan/vert/multiview.nocompat.vk.vert    |   15 -
 ...lization-constant-workgroup.nofxc.asm.comp |    0
 .../asm/vert/empty-struct-composite.asm.vert  |   15 +
 .../comp/bitfield.comp}                       |    0
 .../frag/spec-constant.frag                   |  118 ++
 ...lization-constant-workgroup.nofxc.asm.comp |   16 -
 ...orage-buffer-basic.invalid.nofxc.asm.comp} |    2 +
 .../asm/frag/combined-sampler-reuse.asm.frag  |   30 +
 .../asm/frag/empty-struct.asm.frag            |   27 +
 .../asm/frag/image-extract-reuse.asm.frag     |   31 +
 .../asm/frag/implicit-read-dep-phi.asm.frag   |   56 +
 reference/shaders-hlsl/asm/frag/srem.asm.frag |   29 +
 .../asm/frag/texel-fetch-no-lod.asm.frag      |   29 +
 .../asm/vert/empty-struct-composite.asm.vert  |    8 -
 .../vert/spec-constant-op-composite.asm.vert  |   38 +
 .../shaders-hlsl/comp/bitfield.noopt.comp     |  113 -
 reference/shaders-hlsl/comp/rmw-matrix.comp   |   20 +
 .../comp/subgroups.invalid.nofxc.sm60.comp    |   93 +
 .../frag/array-lut-no-loop-variable.frag      |   31 +
 .../control-dependent-in-branch.desktop.frag  |   55 +
 reference/shaders-hlsl/frag/fp16.desktop.frag |  179 ++
 .../frag/row-major-layout-in-struct.frag      |   46 +
 .../frag/sampler-image-arrays.frag            |   54 +
 .../shaders-hlsl/frag/spec-constant.frag      |   79 -
 .../shaders-hlsl/frag/tex-sampling-ms.frag    |   32 +
 reference/shaders-hlsl/frag/tex-sampling.frag |   48 +-
 .../vert/read-from-row-major-array.vert       |   64 +
 .../inliner-dominator-inside-loop.asm.frag    |   31 +-
 .../asm/vert/empty-struct-composite.asm.vert  |   16 +
 .../comp/bitfield.comp}                       |    0
 .../comp/loop.comp}                           |    2 +-
 .../comp/return.comp                          |    2 +-
 .../frag/in_block_assign.frag}                |   10 +-
 .../vert/functions_nested.vert                |   10 +-
 .../shaders-msl/asm/comp/bitcast_sar.asm.comp |   18 +-
 .../asm/comp/bitcast_sdiv.asm.comp            |   18 +-
 .../shaders-msl/asm/comp/bitcast_slr.asm.comp |   18 +-
 .../storage-buffer-basic.invalid.asm.comp}    |    1 +
 .../asm/frag/combined-sampler-reuse.asm.frag  |   23 +
 .../asm/frag/empty-struct.asm.frag            |   30 +
 reference/shaders-msl/asm/frag/frem.asm.frag  |   12 +-
 .../asm/frag/image-extract-reuse.asm.frag     |   17 +
 .../asm/frag/implicit-read-dep-phi.asm.frag   |   49 +
 .../inliner-dominator-inside-loop.asm.frag    |  235 ---
 reference/shaders-msl/asm/frag/srem.asm.frag  |   23 +
 .../asm/frag/texel-fetch-no-lod.asm.frag      |   17 +
 .../shaders-msl/asm/frag/unreachable.asm.frag |   10 +-
 .../asm/frag/vector-shuffle-oom.asm.frag      |   64 +-
 .../asm/vert/packing-test.asm.vert            |   19 +
 .../vert/spec-constant-op-composite.asm.vert  |   37 +
 .../access-private-workgroup-in-function.comp |   34 +
 reference/shaders-msl/comp/atomic.comp        |   72 +-
 reference/shaders-msl/comp/bake_gradient.comp |   40 -
 reference/shaders-msl/comp/basic.comp         |    4 +-
 .../shaders-msl/comp/bitfield.noopt.comp      |   47 -
 .../shaders-msl/comp/coherent-block.comp      |    2 +-
 .../shaders-msl/comp/coherent-image.comp      |    2 +-
 .../shaders-msl/comp/composite-construct.comp |   50 +
 reference/shaders-msl/comp/culling.comp       |    4 +-
 reference/shaders-msl/comp/dowhile.comp       |    2 +-
 .../comp/image-cube-array-load-store.comp     |   12 +
 reference/shaders-msl/comp/inverse.comp       |    2 +-
 reference/shaders-msl/comp/loop.noopt.comp    |  107 -
 reference/shaders-msl/comp/mat3.comp          |    2 +-
 reference/shaders-msl/comp/mod.comp           |    2 +-
 reference/shaders-msl/comp/modf.comp          |    2 +-
 .../shaders-msl/comp/packing-test-1.comp      |   38 +
 .../shaders-msl/comp/packing-test-2.comp      |   29 +
 .../shaders-msl/comp/read-write-only.comp     |    2 +-
 reference/shaders-msl/comp/rmw-matrix.comp    |   22 +
 reference/shaders-msl/comp/shared.comp        |    4 +-
 reference/shaders-msl/comp/struct-layout.comp |    2 +-
 reference/shaders-msl/comp/struct-nested.comp |   16 +-
 .../shaders-msl/comp/struct-packing.comp      |   98 +-
 reference/shaders-msl/comp/torture-loop.comp  |    2 +-
 reference/shaders-msl/comp/type-alias.comp    |   14 +-
 reference/shaders-msl/comp/udiv.comp          |    2 +-
 .../desktop-only/frag/image-ms.desktop.frag   |    2 +-
 .../desktop-only/vert/basic.desktop.sso.vert  |   12 +-
 .../shaders-msl/flatten/basic.flatten.vert    |   12 +-
 .../flatten/multiindex.flatten.vert           |   10 +-
 .../flatten/push-constant.flatten.vert        |   12 +-
 .../shaders-msl/flatten/rowmajor.flatten.vert |   10 +-
 .../shaders-msl/flatten/struct.flatten.vert   |   14 +-
 .../shaders-msl/flatten/swizzle.flatten.vert  |    2 +-
 .../shaders-msl/flatten/types.flatten.frag    |    2 +-
 .../frag/array-lut-no-loop-variable.frag      |   38 +
 reference/shaders-msl/frag/basic.frag         |   12 +-
 .../binary-func-unpack-pack-arguments.frag    |   28 +
 .../frag/binary-unpack-pack-arguments.frag    |   28 +
 reference/shaders-msl/frag/bitcasting.frag    |   10 +-
 reference/shaders-msl/frag/builtins.frag      |   10 +-
 .../composite-extract-forced-temporary.frag   |   10 +-
 .../shaders-msl/frag/constant-array.frag      |   10 +-
 .../shaders-msl/frag/constant-composites.frag |   10 +-
 .../control-dependent-in-branch.desktop.frag  |   45 +
 .../frag/dual-source-blending.frag            |   19 +
 .../shaders-msl/frag/false-loop-init.frag     |   10 +-
 reference/shaders-msl/frag/fp16-packing.frag  |   25 +
 reference/shaders-msl/frag/fp16.desktop.frag  |  180 ++
 reference/shaders-msl/frag/front-facing.frag  |   12 +-
 reference/shaders-msl/frag/gather-dref.frag   |   22 +
 reference/shaders-msl/frag/gather-offset.frag |   17 +
 .../shaders-msl/frag/illegal-name-test-0.frag |   21 +
 reference/shaders-msl/frag/in_block.frag      |   12 +-
 reference/shaders-msl/frag/in_mat.frag        |   37 +
 reference/shaders-msl/frag/mix.frag           |   16 +-
 reference/shaders-msl/frag/mrt-array.frag     |   53 +
 .../packed-expression-vector-shuffle.frag     |   25 +
 .../shaders-msl/frag/packing-test-3.frag      |   54 +
 reference/shaders-msl/frag/pls.frag           |   16 +-
 reference/shaders-msl/frag/readonly-ssbo.frag |   29 +
 .../sample-depth-separate-image-sampler.frag  |    2 +-
 .../shaders-msl/frag/sampler-1d-lod.frag      |   22 +
 .../frag/sampler-image-arrays.msl2.frag       |   45 +
 reference/shaders-msl/frag/sampler.frag       |   14 +-
 .../frag/separate-image-sampler-argument.frag |    2 +-
 .../frag/shadow-compare-global-alias.frag     |   53 +
 .../frag/spec-constant-block-size.frag        |   10 +-
 reference/shaders-msl/frag/swizzle.frag       |   12 +-
 .../shaders-msl/frag/texture-proj-shadow.frag |   16 +-
 reference/shaders-msl/frag/ubo_layout.frag    |    7 +-
 reference/shaders-msl/frag/unary-enclose.frag |   12 +-
 .../legacy/vert/transpose.legacy.vert         |   10 +-
 reference/shaders-msl/vert/basic.vert         |   12 +-
 reference/shaders-msl/vert/copy.flatten.vert  |   15 +-
 .../shaders-msl/vert/dynamic.flatten.vert     |   14 +-
 reference/shaders-msl/vert/functions.vert     |   16 +-
 .../shaders-msl/vert/in_out_array_mat.vert    |   78 +
 reference/shaders-msl/vert/out_block.vert     |   12 +-
 reference/shaders-msl/vert/packed_matrix.vert |   15 +-
 reference/shaders-msl/vert/pointsize.vert     |   12 +-
 .../vert/read-from-row-major-array.vert       |   66 +
 reference/shaders-msl/vert/return-array.vert  |   12 +-
 .../shaders-msl/vert/texture_buffer.vert      |    2 +-
 reference/shaders-msl/vert/ubo.alignment.vert |   14 +-
 reference/shaders-msl/vert/ubo.vert           |   12 +-
 .../vulkan/frag/push-constant.vk.frag         |   10 +-
 .../vulkan/frag/spec-constant.vk.frag         |  108 +-
 .../inliner-dominator-inside-loop.asm.frag    |    3 +-
 .../asm/vert/empty-struct-composite.asm.vert  |   13 +
 .../asm/vert/semantic-decoration.asm.vert     |   25 +
 .../comp/bitfield.comp}                       |    0
 .../comp/loop.comp}                           |    0
 .../comp/return.comp                          |    0
 .../vulkan/frag/spec-constant.vk.frag         |    0
 .../vulkan/frag/spec-constant.vk.frag.vk      |  107 +
 ...etch_subpassInput.vk.nocompat.invalid.frag |   11 -
 reference/shaders/amd/fs.invalid.frag         |    4 +-
 reference/shaders/amd/shader_ballot.comp      |    2 +-
 .../shaders/asm/comp/bitcast_iequal.asm.comp  |   10 +-
 .../shaders/asm/comp/bitcast_sar.asm.comp     |   18 +-
 .../shaders/asm/comp/bitcast_sdiv.asm.comp    |   18 +-
 .../shaders/asm/comp/bitcast_slr.asm.comp     |   18 +-
 .../asm/comp/hlsl-functionality.asm.comp      |   24 +
 .../asm/comp/name-alias.asm.invalid.comp      |   37 -
 .../storage-buffer-basic.invalid.asm.comp}    |    2 +
 .../frag/combined-sampler-reuse.vk.asm.frag   |   13 +
 .../combined-sampler-reuse.vk.asm.frag.vk     |   14 +
 .../frag/complex-name-workarounds.asm.frag    |   28 +
 .../shaders/asm/frag/empty-struct.asm.frag    |   25 +
 .../asm/frag/image-extract-reuse.asm.frag     |   11 +
 .../image-fetch-no-sampler.asm.vk.frag.vk     |    2 +-
 .../frag/image-query-no-sampler.vk.asm.frag   |   13 +
 .../image-query-no-sampler.vk.asm.frag.vk     |   14 +
 .../asm/frag/implicit-read-dep-phi.asm.frag   |   39 +
 .../inliner-dominator-inside-loop.asm.frag    |  227 --
 .../asm/frag/loop-header-to-continue.asm.frag |   12 +-
 .../asm/frag/loop-merge-to-continue.asm.frag  |   17 +
 ...pler-buffer-array-without-sampler.asm.frag |   28 +
 .../sampler-buffer-without-sampler.asm.frag   |    4 +-
 .../frag/selection-merge-to-continue.asm.frag |   23 +
 reference/shaders/asm/frag/srem.asm.frag      |   13 +
 .../frag/switch-merge-to-continue.asm.frag    |   30 +
 .../asm/frag/temporary-name-alias.asm.frag    |   10 +
 .../asm/frag/texel-fetch-no-lod.asm.frag      |   11 +
 .../asm/frag/vector-shuffle-oom.asm.frag      |   36 +-
 .../inout-split-access-chain-handle.asm.geom  |   23 +
 .../geom/split-access-chain-input.asm.geom    |    9 +
 ...input-array-builtin-array.invalid.asm.tesc |   79 +
 reference/shaders/asm/vert/empty-io.asm.vert  |    5 +
 .../spec-constant-op-composite.asm.vk.vert    |   16 +
 .../spec-constant-op-composite.asm.vk.vert.vk |   25 +
 reference/shaders/comp/bitfield.noopt.comp    |   19 -
 reference/shaders/comp/generate_height.comp   |   32 +-
 reference/shaders/comp/loop.noopt.comp        |  105 -
 reference/shaders/comp/rmw-matrix.comp        |   20 +
 reference/shaders/comp/struct-packing.comp    |   52 +-
 .../desktop-only/comp/enhanced-layouts.comp   |    9 +-
 .../desktop-only/comp/fp64.desktop.comp       |    1 -
 .../control-dependent-in-branch.desktop.frag  |   37 +
 .../frag/dual-source-blending.desktop.frag    |   11 +
 .../desktop-only/frag/fp16.desktop.frag       |  153 ++
 .../frag/array-lut-no-loop-variable.frag      |   14 +
 .../shaders/frag/constant-composites.frag     |    6 +-
 reference/shaders/frag/gather-dref.frag       |   14 +
 reference/shaders/frag/swizzle.frag           |    2 +-
 reference/shaders/frag/ubo_layout.frag        |    7 +-
 reference/shaders/tesc/water_tess.tesc        |    3 +-
 reference/shaders/vert/ground.vert            |   16 +-
 reference/shaders/vert/ocean.vert             |   32 +-
 .../vert/read-from-row-major-array.vert       |   45 +
 .../subgroups.nocompat.invalid.vk.comp.vk     |  110 +
 .../shaders/vulkan/frag/push-constant.frag.vk |   18 -
 .../separate-combined-fake-overload.vk.frag   |   22 +
 ...separate-combined-fake-overload.vk.frag.vk |   23 +
 .../separate-sampler-texture-array.vk.frag    |   44 +
 .../separate-sampler-texture-array.vk.frag.vk |   45 +
 .../vulkan/frag/spec-constant.vk.frag.vk      |   68 -
 .../vulkan/vert/multiview.nocompat.vk.vert    |   15 -
 .../shaders/vulkan/vert/vulkan-vertex.vert    |    9 -
 .../shaders/vulkan/vert/vulkan-vertex.vert.vk |    7 -
 ...lization-constant-workgroup.nofxc.asm.comp |    0
 .../asm/vert/empty-struct-composite.asm.vert  |    0
 .../comp/bitfield.comp                        |    0
 .../frag/spec-constant.frag                   |    0
 ...orage-buffer-basic.invalid.nofxc.asm.comp} |    0
 .../asm/frag/combined-sampler-reuse.asm.frag  |   57 +
 shaders-hlsl/asm/frag/empty-struct.asm.frag   |   56 +
 .../asm/frag/image-extract-reuse.asm.frag     |   41 +
 .../asm/frag/implicit-read-dep-phi.asm.frag   |   81 +
 shaders-hlsl/asm/frag/srem.asm.frag           |   43 +
 .../asm/frag/texel-fetch-no-lod.asm.frag      |   46 +
 .../vert/spec-constant-op-composite.asm.vert  |   98 +
 shaders-hlsl/comp/rmw-matrix.comp             |   20 +
 .../comp/subgroups.invalid.nofxc.sm60.comp    |  131 ++
 .../frag/array-lut-no-loop-variable.frag      |   13 +
 .../control-dependent-in-branch.desktop.frag  |   36 +
 shaders-hlsl/frag/fp16.desktop.frag           |  156 ++
 .../frag/row-major-layout-in-struct.frag      |   29 +
 shaders-hlsl/frag/sampler-image-arrays.frag   |   33 +
 shaders-hlsl/frag/tex-sampling-ms.frag        |   16 +
 shaders-hlsl/frag/tex-sampling.frag           |   28 +-
 .../vert/read-from-row-major-array.vert       |   20 +
 .../inliner-dominator-inside-loop.asm.frag    |    0
 .../asm/vert/empty-struct-composite.asm.vert  |    0
 .../comp/bitfield.comp                        |    0
 .../comp/loop.comp                            |    0
 .../comp/return.comp                          |    0
 .../frag/in_block_assign.frag                 |    0
 ... => storage-buffer-basic.invalid.asm.comp} |    0
 .../asm/frag/combined-sampler-reuse.asm.frag  |   57 +
 shaders-msl/asm/frag/empty-struct.asm.frag    |   56 +
 .../asm/frag/image-extract-reuse.asm.frag     |   41 +
 .../asm/frag/implicit-read-dep-phi.asm.frag   |   81 +
 shaders-msl/asm/frag/srem.asm.frag            |   43 +
 .../asm/frag/texel-fetch-no-lod.asm.frag      |   46 +
 .../asm/frag/vector-shuffle-oom.asm.frag      |   12 +-
 shaders-msl/asm/vert/packing-test.asm.vert    |   43 +
 .../vert/spec-constant-op-composite.asm.vert  |   98 +
 .../access-private-workgroup-in-function.comp |   31 +
 shaders-msl/comp/atomic.comp                  |   23 +
 shaders-msl/comp/composite-construct.comp     |   31 +
 .../comp/image-cube-array-load-store.comp     |   13 +
 shaders-msl/comp/packing-test-1.comp          |   18 +
 shaders-msl/comp/packing-test-2.comp          |   16 +
 shaders-msl/comp/rmw-matrix.comp              |   20 +
 shaders-msl/comp/udiv.comp                    |    2 +-
 shaders-msl/flatten/types.flatten.frag        |    4 +-
 .../frag/array-lut-no-loop-variable.frag      |   13 +
 .../binary-func-unpack-pack-arguments.frag    |   15 +
 .../frag/binary-unpack-pack-arguments.frag    |   15 +
 .../control-dependent-in-branch.desktop.frag  |   34 +
 shaders-msl/frag/dual-source-blending.frag    |   10 +
 shaders-msl/frag/fp16-packing.frag            |   12 +
 shaders-msl/frag/fp16.desktop.frag            |  151 ++
 shaders-msl/frag/gather-dref.frag             |   11 +
 shaders-msl/frag/gather-offset.frag           |    9 +
 shaders-msl/frag/illegal-name-test-0.frag     |   12 +
 shaders-msl/frag/in_mat.frag                  |   19 +
 shaders-msl/frag/mrt-array.frag               |   24 +
 .../packed-expression-vector-shuffle.frag     |   15 +
 shaders-msl/frag/packing-test-3.frag          |   36 +
 shaders-msl/frag/readonly-ssbo.frag           |   16 +
 shaders-msl/frag/sampler-1d-lod.frag          |   12 +
 .../frag/sampler-image-arrays.msl2.frag       |   33 +
 .../frag/shadow-compare-global-alias.frag     |   38 +
 shaders-msl/frag/swizzle.frag                 |    2 +-
 shaders-msl/vert/in_out_array_mat.vert        |   41 +
 .../vert/read-from-row-major-array.vert       |   20 +
 .../inliner-dominator-inside-loop.asm.frag    |    0
 .../asm/vert/empty-struct-composite.asm.vert  |    0
 .../asm/vert/semantic-decoration.asm.vert     |   68 +
 .../comp/bitfield.comp                        |    0
 .../comp/loop.comp                            |    0
 {shaders => shaders-no-opt}/comp/return.comp  |    0
 .../vulkan/frag/spec-constant.vk.frag         |    0
 shaders/amd/fs.invalid.frag                   |    4 +-
 shaders/asm/comp/hlsl-functionality.asm.comp  |   63 +
 shaders/asm/comp/name-alias.asm.invalid.comp  |  124 --
 ... => storage-buffer-basic.invalid.asm.comp} |    0
 .../frag/combined-sampler-reuse.vk.asm.frag   |   57 +
 .../frag/complex-name-workarounds.asm.frag    |   81 +
 shaders/asm/frag/empty-struct.asm.frag        |   56 +
 .../hlsl-sample-cmp-level-zero-cube.asm.frag  |    2 +
 .../frag/hlsl-sample-cmp-level-zero.asm.frag  |    2 +
 shaders/asm/frag/image-extract-reuse.asm.frag |   41 +
 .../frag/image-query-no-sampler.vk.asm.frag   |   57 +
 .../asm/frag/implicit-read-dep-phi.asm.frag   |   81 +
 .../asm/frag/loop-merge-to-continue.asm.frag  |   85 +
 ...pler-buffer-array-without-sampler.asm.frag |   86 +
 .../sampler-buffer-without-sampler.asm.frag   |    3 +
 .../frag/selection-merge-to-continue.asm.frag |   85 +
 shaders/asm/frag/srem.asm.frag                |   43 +
 .../frag/switch-merge-to-continue.asm.frag    |   85 +
 .../asm/frag/temporary-name-alias.asm.frag    |   48 +
 shaders/asm/frag/texel-fetch-no-lod.asm.frag  |   46 +
 .../inout-split-access-chain-handle.asm.geom  |   90 +
 .../geom/split-access-chain-input.asm.geom    |   52 +
 ...nput-array-builtin-array.invalid.asm.tesc} |    0
 .../spec-constant-op-composite.asm.vk.vert    |   98 +
 shaders/comp/rmw-matrix.comp                  |   20 +
 shaders/comp/struct-packing.comp              |    4 +-
 shaders/desktop-only/comp/fp64.desktop.comp   |    2 +-
 .../control-dependent-in-branch.desktop.frag  |   36 +
 .../frag/dual-source-blending.desktop.frag    |   10 +
 shaders/desktop-only/frag/fp16.desktop.frag   |  151 ++
 shaders/frag/array-lut-no-loop-variable.frag  |   13 +
 shaders/frag/gather-dref.frag                 |   11 +
 shaders/frag/swizzle.frag                     |    2 +-
 .../legacy/fragment/explicit-lod.legacy.frag  |    2 +-
 shaders/legacy/vert/implicit-lod.legacy.vert  |    2 +-
 shaders/vert/read-from-row-major-array.vert   |   20 +
 .../comp/subgroups.nocompat.invalid.vk.comp   |  125 ++
 .../separate-combined-fake-overload.vk.frag   |   21 +
 .../separate-sampler-texture-array.vk.frag    |   42 +
 spirv.hpp                                     |  123 +-
 spirv_cfg.cpp                                 |    2 +-
 spirv_cfg.hpp                                 |    2 +-
 spirv_common.hpp                              |  243 ++-
 spirv_cpp.cpp                                 |   16 +-
 spirv_cpp.hpp                                 |    4 +-
 spirv_cross.cpp                               |  487 ++++-
 spirv_cross.hpp                               |   53 +-
 spirv_cross_util.cpp                          |   19 +
 spirv_cross_util.hpp                          |    3 +-
 spirv_glsl.cpp                                | 1836 +++++++++++++----
 spirv_glsl.hpp                                |   59 +-
 spirv_hlsl.cpp                                |  617 +++++-
 spirv_hlsl.hpp                                |   27 +-
 spirv_msl.cpp                                 |  977 +++++----
 spirv_msl.hpp                                 |  114 +-
 test_shaders.py                               |  109 +-
 test_shaders.sh                               |    2 +
 tests-other/hlsl_wave_mask.cpp                |   73 +
 update_test_shaders.sh                        |   13 +-
 609 files changed, 15185 insertions(+), 4607 deletions(-)
 create mode 100644 appveyor.yml
 create mode 100755 build_glslang_spirv_tools.sh
 rename reference/{shaders-hlsl/asm/comp/storage-buffer-basic.nofxc.asm.comp => opt/shaders-hlsl/asm/comp/storage-buffer-basic.invalid.nofxc.asm.comp} (100%)
 create mode 100644 reference/opt/shaders-hlsl/asm/frag/combined-sampler-reuse.asm.frag
 create mode 100644 reference/opt/shaders-hlsl/asm/frag/empty-struct.asm.frag
 create mode 100644 reference/opt/shaders-hlsl/asm/frag/image-extract-reuse.asm.frag
 create mode 100644 reference/opt/shaders-hlsl/asm/frag/implicit-read-dep-phi.asm.frag
 create mode 100644 reference/opt/shaders-hlsl/asm/frag/srem.asm.frag
 create mode 100644 reference/opt/shaders-hlsl/asm/frag/texel-fetch-no-lod.asm.frag
 delete mode 100644 reference/opt/shaders-hlsl/asm/vert/empty-struct-composite.asm.vert
 create mode 100644 reference/opt/shaders-hlsl/asm/vert/spec-constant-op-composite.asm.vert
 create mode 100644 reference/opt/shaders-hlsl/comp/rmw-matrix.comp
 create mode 100644 reference/opt/shaders-hlsl/comp/subgroups.invalid.nofxc.sm60.comp
 create mode 100644 reference/opt/shaders-hlsl/frag/array-lut-no-loop-variable.frag
 create mode 100644 reference/opt/shaders-hlsl/frag/control-dependent-in-branch.desktop.frag
 create mode 100644 reference/opt/shaders-hlsl/frag/fp16.desktop.frag
 create mode 100644 reference/opt/shaders-hlsl/frag/row-major-layout-in-struct.frag
 create mode 100644 reference/opt/shaders-hlsl/frag/sampler-image-arrays.frag
 delete mode 100644 reference/opt/shaders-hlsl/frag/spec-constant.frag
 create mode 100644 reference/opt/shaders-hlsl/frag/tex-sampling-ms.frag
 create mode 100644 reference/opt/shaders-hlsl/vert/read-from-row-major-array.vert
 rename reference/{shaders-msl/asm/comp/storage-buffer-basic.asm.comp => opt/shaders-msl/asm/comp/storage-buffer-basic.invalid.asm.comp} (100%)
 create mode 100644 reference/opt/shaders-msl/asm/frag/combined-sampler-reuse.asm.frag
 rename reference/{shaders-msl/asm/vert/empty-struct-composite.asm.vert => opt/shaders-msl/asm/frag/empty-struct.asm.frag} (77%)
 create mode 100644 reference/opt/shaders-msl/asm/frag/image-extract-reuse.asm.frag
 create mode 100644 reference/opt/shaders-msl/asm/frag/implicit-read-dep-phi.asm.frag
 rename reference/{shaders-msl/frag/in_block_assign.noopt.frag => opt/shaders-msl/asm/frag/srem.asm.frag} (61%)
 create mode 100644 reference/opt/shaders-msl/asm/frag/texel-fetch-no-lod.asm.frag
 rename reference/opt/shaders-msl/asm/vert/{empty-struct-composite.asm.vert => packing-test.asm.vert} (100%)
 create mode 100644 reference/opt/shaders-msl/asm/vert/spec-constant-op-composite.asm.vert
 create mode 100644 reference/opt/shaders-msl/comp/access-private-workgroup-in-function.comp
 delete mode 100644 reference/opt/shaders-msl/comp/bake_gradient.comp
 create mode 100644 reference/opt/shaders-msl/comp/composite-construct.comp
 create mode 100644 reference/opt/shaders-msl/comp/image-cube-array-load-store.comp
 create mode 100644 reference/opt/shaders-msl/comp/packing-test-1.comp
 create mode 100644 reference/opt/shaders-msl/comp/packing-test-2.comp
 delete mode 100644 reference/opt/shaders-msl/comp/return.comp
 create mode 100644 reference/opt/shaders-msl/comp/rmw-matrix.comp
 create mode 100644 reference/opt/shaders-msl/frag/array-lut-no-loop-variable.frag
 create mode 100644 reference/opt/shaders-msl/frag/binary-func-unpack-pack-arguments.frag
 create mode 100644 reference/opt/shaders-msl/frag/binary-unpack-pack-arguments.frag
 create mode 100644 reference/opt/shaders-msl/frag/control-dependent-in-branch.desktop.frag
 create mode 100644 reference/opt/shaders-msl/frag/dual-source-blending.frag
 create mode 100644 reference/opt/shaders-msl/frag/fp16-packing.frag
 create mode 100644 reference/opt/shaders-msl/frag/fp16.desktop.frag
 create mode 100644 reference/opt/shaders-msl/frag/gather-dref.frag
 create mode 100644 reference/opt/shaders-msl/frag/gather-offset.frag
 create mode 100644 reference/opt/shaders-msl/frag/illegal-name-test-0.frag
 create mode 100644 reference/opt/shaders-msl/frag/in_mat.frag
 create mode 100644 reference/opt/shaders-msl/frag/mrt-array.frag
 create mode 100644 reference/opt/shaders-msl/frag/packed-expression-vector-shuffle.frag
 create mode 100644 reference/opt/shaders-msl/frag/packing-test-3.frag
 create mode 100644 reference/opt/shaders-msl/frag/readonly-ssbo.frag
 create mode 100644 reference/opt/shaders-msl/frag/sampler-1d-lod.frag
 create mode 100644 reference/opt/shaders-msl/frag/sampler-image-arrays.msl2.frag
 create mode 100644 reference/opt/shaders-msl/frag/shadow-compare-global-alias.frag
 create mode 100644 reference/opt/shaders-msl/vert/in_out_array_mat.vert
 create mode 100644 reference/opt/shaders-msl/vert/read-from-row-major-array.vert
 delete mode 100644 reference/opt/shaders/amd/fragmentMaskFetch_subpassInput.vk.nocompat.invalid.frag
 create mode 100644 reference/opt/shaders/asm/comp/hlsl-functionality.asm.comp
 delete mode 100644 reference/opt/shaders/asm/comp/name-alias.asm.invalid.comp
 rename reference/{shaders/asm/comp/storage-buffer-basic.asm.comp => opt/shaders/asm/comp/storage-buffer-basic.invalid.asm.comp} (100%)
 create mode 100644 reference/opt/shaders/asm/frag/combined-sampler-reuse.vk.asm.frag
 create mode 100644 reference/opt/shaders/asm/frag/combined-sampler-reuse.vk.asm.frag.vk
 create mode 100644 reference/opt/shaders/asm/frag/complex-name-workarounds.asm.frag
 rename reference/opt/shaders/asm/{vert/empty-struct-composite.asm.vert => frag/empty-struct.asm.frag} (100%)
 create mode 100644 reference/opt/shaders/asm/frag/image-extract-reuse.asm.frag
 create mode 100644 reference/opt/shaders/asm/frag/image-query-no-sampler.vk.asm.frag
 create mode 100644 reference/opt/shaders/asm/frag/image-query-no-sampler.vk.asm.frag.vk
 create mode 100644 reference/opt/shaders/asm/frag/implicit-read-dep-phi.asm.frag
 create mode 100644 reference/opt/shaders/asm/frag/loop-merge-to-continue.asm.frag
 create mode 100644 reference/opt/shaders/asm/frag/sampler-buffer-array-without-sampler.asm.frag
 create mode 100644 reference/opt/shaders/asm/frag/selection-merge-to-continue.asm.frag
 create mode 100644 reference/opt/shaders/asm/frag/srem.asm.frag
 create mode 100644 reference/opt/shaders/asm/frag/switch-merge-to-continue.asm.frag
 rename reference/{shaders/asm/vert/empty-struct-composite.asm.vert => opt/shaders/asm/frag/temporary-name-alias.asm.frag} (100%)
 create mode 100644 reference/opt/shaders/asm/frag/texel-fetch-no-lod.asm.frag
 create mode 100644 reference/opt/shaders/asm/geom/inout-split-access-chain-handle.asm.geom
 create mode 100644 reference/opt/shaders/asm/geom/split-access-chain-input.asm.geom
 delete mode 100644 reference/opt/shaders/asm/tesc/tess-fixed-input-array-builtin-array.asm.tesc
 rename reference/{shaders/asm/tesc/tess-fixed-input-array-builtin-array.asm.tesc => opt/shaders/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc} (100%)
 create mode 100644 reference/opt/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert
 create mode 100644 reference/opt/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert.vk
 delete mode 100644 reference/opt/shaders/comp/return.comp
 create mode 100644 reference/opt/shaders/comp/rmw-matrix.comp
 create mode 100644 reference/opt/shaders/desktop-only/frag/control-dependent-in-branch.desktop.frag
 create mode 100644 reference/opt/shaders/desktop-only/frag/dual-source-blending.desktop.frag
 create mode 100644 reference/opt/shaders/desktop-only/frag/fp16.desktop.frag
 create mode 100644 reference/opt/shaders/frag/array-lut-no-loop-variable.frag
 delete mode 100644 reference/opt/shaders/frag/eliminate-dead-variables.frag
 create mode 100644 reference/opt/shaders/frag/gather-dref.frag
 delete mode 100644 reference/opt/shaders/frag/sampler-ms-query.frag
 delete mode 100644 reference/opt/shaders/frag/temporary.frag
 create mode 100644 reference/opt/shaders/vert/read-from-row-major-array.vert
 create mode 100644 reference/opt/shaders/vulkan/comp/subgroups.nocompat.invalid.vk.comp.vk
 create mode 100644 reference/opt/shaders/vulkan/frag/separate-combined-fake-overload.vk.frag
 create mode 100644 reference/opt/shaders/vulkan/frag/separate-combined-fake-overload.vk.frag.vk
 create mode 100644 reference/opt/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag
 create mode 100644 reference/opt/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag.vk
 delete mode 100644 reference/opt/shaders/vulkan/frag/spec-constant.vk.frag
 delete mode 100644 reference/opt/shaders/vulkan/frag/spec-constant.vk.frag.vk
 delete mode 100644 reference/opt/shaders/vulkan/vert/multiview.nocompat.vk.vert
 rename reference/{opt/shaders-hlsl => shaders-hlsl-no-opt}/asm/comp/specialization-constant-workgroup.nofxc.asm.comp (100%)
 create mode 100644 reference/shaders-hlsl-no-opt/asm/vert/empty-struct-composite.asm.vert
 rename reference/{opt/shaders-hlsl/comp/bitfield.noopt.comp => shaders-hlsl-no-opt/comp/bitfield.comp} (100%)
 create mode 100644 reference/shaders-hlsl-no-opt/frag/spec-constant.frag
 delete mode 100644 reference/shaders-hlsl/asm/comp/specialization-constant-workgroup.nofxc.asm.comp
 rename reference/{opt/shaders-hlsl/asm/comp/storage-buffer-basic.nofxc.asm.comp => shaders-hlsl/asm/comp/storage-buffer-basic.invalid.nofxc.asm.comp} (93%)
 create mode 100644 reference/shaders-hlsl/asm/frag/combined-sampler-reuse.asm.frag
 create mode 100644 reference/shaders-hlsl/asm/frag/empty-struct.asm.frag
 create mode 100644 reference/shaders-hlsl/asm/frag/image-extract-reuse.asm.frag
 create mode 100644 reference/shaders-hlsl/asm/frag/implicit-read-dep-phi.asm.frag
 create mode 100644 reference/shaders-hlsl/asm/frag/srem.asm.frag
 create mode 100644 reference/shaders-hlsl/asm/frag/texel-fetch-no-lod.asm.frag
 delete mode 100644 reference/shaders-hlsl/asm/vert/empty-struct-composite.asm.vert
 create mode 100644 reference/shaders-hlsl/asm/vert/spec-constant-op-composite.asm.vert
 delete mode 100644 reference/shaders-hlsl/comp/bitfield.noopt.comp
 create mode 100644 reference/shaders-hlsl/comp/rmw-matrix.comp
 create mode 100644 reference/shaders-hlsl/comp/subgroups.invalid.nofxc.sm60.comp
 create mode 100644 reference/shaders-hlsl/frag/array-lut-no-loop-variable.frag
 create mode 100644 reference/shaders-hlsl/frag/control-dependent-in-branch.desktop.frag
 create mode 100644 reference/shaders-hlsl/frag/fp16.desktop.frag
 create mode 100644 reference/shaders-hlsl/frag/row-major-layout-in-struct.frag
 create mode 100644 reference/shaders-hlsl/frag/sampler-image-arrays.frag
 delete mode 100644 reference/shaders-hlsl/frag/spec-constant.frag
 create mode 100644 reference/shaders-hlsl/frag/tex-sampling-ms.frag
 create mode 100644 reference/shaders-hlsl/vert/read-from-row-major-array.vert
 rename reference/{opt/shaders-msl => shaders-msl-no-opt}/asm/frag/inliner-dominator-inside-loop.asm.frag (93%)
 create mode 100644 reference/shaders-msl-no-opt/asm/vert/empty-struct-composite.asm.vert
 rename reference/{opt/shaders-msl/comp/bitfield.noopt.comp => shaders-msl-no-opt/comp/bitfield.comp} (100%)
 rename reference/{opt/shaders-msl/comp/loop.noopt.comp => shaders-msl-no-opt/comp/loop.comp} (91%)
 rename reference/{shaders-msl => shaders-msl-no-opt}/comp/return.comp (90%)
 rename reference/{opt/shaders-msl/frag/in_block_assign.noopt.frag => shaders-msl-no-opt/frag/in_block_assign.frag} (100%)
 rename reference/{opt/shaders-msl/asm/comp/storage-buffer-basic.asm.comp => shaders-msl/asm/comp/storage-buffer-basic.invalid.asm.comp} (94%)
 create mode 100644 reference/shaders-msl/asm/frag/combined-sampler-reuse.asm.frag
 create mode 100644 reference/shaders-msl/asm/frag/empty-struct.asm.frag
 create mode 100644 reference/shaders-msl/asm/frag/image-extract-reuse.asm.frag
 create mode 100644 reference/shaders-msl/asm/frag/implicit-read-dep-phi.asm.frag
 delete mode 100644 reference/shaders-msl/asm/frag/inliner-dominator-inside-loop.asm.frag
 create mode 100644 reference/shaders-msl/asm/frag/srem.asm.frag
 create mode 100644 reference/shaders-msl/asm/frag/texel-fetch-no-lod.asm.frag
 create mode 100644 reference/shaders-msl/asm/vert/packing-test.asm.vert
 create mode 100644 reference/shaders-msl/asm/vert/spec-constant-op-composite.asm.vert
 create mode 100644 reference/shaders-msl/comp/access-private-workgroup-in-function.comp
 delete mode 100644 reference/shaders-msl/comp/bake_gradient.comp
 delete mode 100644 reference/shaders-msl/comp/bitfield.noopt.comp
 create mode 100644 reference/shaders-msl/comp/composite-construct.comp
 create mode 100644 reference/shaders-msl/comp/image-cube-array-load-store.comp
 delete mode 100644 reference/shaders-msl/comp/loop.noopt.comp
 create mode 100644 reference/shaders-msl/comp/packing-test-1.comp
 create mode 100644 reference/shaders-msl/comp/packing-test-2.comp
 create mode 100644 reference/shaders-msl/comp/rmw-matrix.comp
 create mode 100644 reference/shaders-msl/frag/array-lut-no-loop-variable.frag
 create mode 100644 reference/shaders-msl/frag/binary-func-unpack-pack-arguments.frag
 create mode 100644 reference/shaders-msl/frag/binary-unpack-pack-arguments.frag
 create mode 100644 reference/shaders-msl/frag/control-dependent-in-branch.desktop.frag
 create mode 100644 reference/shaders-msl/frag/dual-source-blending.frag
 create mode 100644 reference/shaders-msl/frag/fp16-packing.frag
 create mode 100644 reference/shaders-msl/frag/fp16.desktop.frag
 create mode 100644 reference/shaders-msl/frag/gather-dref.frag
 create mode 100644 reference/shaders-msl/frag/gather-offset.frag
 create mode 100644 reference/shaders-msl/frag/illegal-name-test-0.frag
 create mode 100644 reference/shaders-msl/frag/in_mat.frag
 create mode 100644 reference/shaders-msl/frag/mrt-array.frag
 create mode 100644 reference/shaders-msl/frag/packed-expression-vector-shuffle.frag
 create mode 100644 reference/shaders-msl/frag/packing-test-3.frag
 create mode 100644 reference/shaders-msl/frag/readonly-ssbo.frag
 create mode 100644 reference/shaders-msl/frag/sampler-1d-lod.frag
 create mode 100644 reference/shaders-msl/frag/sampler-image-arrays.msl2.frag
 create mode 100644 reference/shaders-msl/frag/shadow-compare-global-alias.frag
 create mode 100644 reference/shaders-msl/vert/in_out_array_mat.vert
 create mode 100644 reference/shaders-msl/vert/read-from-row-major-array.vert
 rename reference/{opt/shaders => shaders-no-opt}/asm/frag/inliner-dominator-inside-loop.asm.frag (97%)
 create mode 100644 reference/shaders-no-opt/asm/vert/empty-struct-composite.asm.vert
 create mode 100644 reference/shaders-no-opt/asm/vert/semantic-decoration.asm.vert
 rename reference/{opt/shaders/comp/bitfield.noopt.comp => shaders-no-opt/comp/bitfield.comp} (100%)
 rename reference/{opt/shaders/comp/loop.noopt.comp => shaders-no-opt/comp/loop.comp} (100%)
 rename reference/{shaders => shaders-no-opt}/comp/return.comp (100%)
 rename reference/{shaders => shaders-no-opt}/vulkan/frag/spec-constant.vk.frag (100%)
 create mode 100644 reference/shaders-no-opt/vulkan/frag/spec-constant.vk.frag.vk
 delete mode 100644 reference/shaders/amd/fragmentMaskFetch_subpassInput.vk.nocompat.invalid.frag
 create mode 100644 reference/shaders/asm/comp/hlsl-functionality.asm.comp
 delete mode 100644 reference/shaders/asm/comp/name-alias.asm.invalid.comp
 rename reference/{opt/shaders/asm/comp/storage-buffer-basic.asm.comp => shaders/asm/comp/storage-buffer-basic.invalid.asm.comp} (90%)
 create mode 100644 reference/shaders/asm/frag/combined-sampler-reuse.vk.asm.frag
 create mode 100644 reference/shaders/asm/frag/combined-sampler-reuse.vk.asm.frag.vk
 create mode 100644 reference/shaders/asm/frag/complex-name-workarounds.asm.frag
 create mode 100644 reference/shaders/asm/frag/empty-struct.asm.frag
 create mode 100644 reference/shaders/asm/frag/image-extract-reuse.asm.frag
 create mode 100644 reference/shaders/asm/frag/image-query-no-sampler.vk.asm.frag
 create mode 100644 reference/shaders/asm/frag/image-query-no-sampler.vk.asm.frag.vk
 create mode 100644 reference/shaders/asm/frag/implicit-read-dep-phi.asm.frag
 delete mode 100644 reference/shaders/asm/frag/inliner-dominator-inside-loop.asm.frag
 create mode 100644 reference/shaders/asm/frag/loop-merge-to-continue.asm.frag
 create mode 100644 reference/shaders/asm/frag/sampler-buffer-array-without-sampler.asm.frag
 create mode 100644 reference/shaders/asm/frag/selection-merge-to-continue.asm.frag
 create mode 100644 reference/shaders/asm/frag/srem.asm.frag
 create mode 100644 reference/shaders/asm/frag/switch-merge-to-continue.asm.frag
 create mode 100644 reference/shaders/asm/frag/temporary-name-alias.asm.frag
 create mode 100644 reference/shaders/asm/frag/texel-fetch-no-lod.asm.frag
 create mode 100644 reference/shaders/asm/geom/inout-split-access-chain-handle.asm.geom
 create mode 100644 reference/shaders/asm/geom/split-access-chain-input.asm.geom
 create mode 100644 reference/shaders/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc
 create mode 100644 reference/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert
 create mode 100644 reference/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert.vk
 delete mode 100644 reference/shaders/comp/bitfield.noopt.comp
 delete mode 100644 reference/shaders/comp/loop.noopt.comp
 create mode 100644 reference/shaders/comp/rmw-matrix.comp
 create mode 100644 reference/shaders/desktop-only/frag/control-dependent-in-branch.desktop.frag
 create mode 100644 reference/shaders/desktop-only/frag/dual-source-blending.desktop.frag
 create mode 100644 reference/shaders/desktop-only/frag/fp16.desktop.frag
 create mode 100644 reference/shaders/frag/array-lut-no-loop-variable.frag
 create mode 100644 reference/shaders/frag/gather-dref.frag
 create mode 100644 reference/shaders/vert/read-from-row-major-array.vert
 create mode 100644 reference/shaders/vulkan/comp/subgroups.nocompat.invalid.vk.comp.vk
 delete mode 100644 reference/shaders/vulkan/frag/push-constant.frag.vk
 create mode 100644 reference/shaders/vulkan/frag/separate-combined-fake-overload.vk.frag
 create mode 100644 reference/shaders/vulkan/frag/separate-combined-fake-overload.vk.frag.vk
 create mode 100644 reference/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag
 create mode 100644 reference/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag.vk
 delete mode 100644 reference/shaders/vulkan/frag/spec-constant.vk.frag.vk
 delete mode 100644 reference/shaders/vulkan/vert/multiview.nocompat.vk.vert
 delete mode 100644 reference/shaders/vulkan/vert/vulkan-vertex.vert
 delete mode 100644 reference/shaders/vulkan/vert/vulkan-vertex.vert.vk
 rename {shaders-hlsl => shaders-hlsl-no-opt}/asm/comp/specialization-constant-workgroup.nofxc.asm.comp (100%)
 rename {shaders-hlsl => shaders-hlsl-no-opt}/asm/vert/empty-struct-composite.asm.vert (100%)
 rename shaders-hlsl/comp/bitfield.noopt.comp => shaders-hlsl-no-opt/comp/bitfield.comp (100%)
 rename {shaders-hlsl => shaders-hlsl-no-opt}/frag/spec-constant.frag (100%)
 rename shaders-hlsl/asm/comp/{storage-buffer-basic.nofxc.asm.comp => storage-buffer-basic.invalid.nofxc.asm.comp} (100%)
 create mode 100644 shaders-hlsl/asm/frag/combined-sampler-reuse.asm.frag
 create mode 100644 shaders-hlsl/asm/frag/empty-struct.asm.frag
 create mode 100644 shaders-hlsl/asm/frag/image-extract-reuse.asm.frag
 create mode 100644 shaders-hlsl/asm/frag/implicit-read-dep-phi.asm.frag
 create mode 100644 shaders-hlsl/asm/frag/srem.asm.frag
 create mode 100644 shaders-hlsl/asm/frag/texel-fetch-no-lod.asm.frag
 create mode 100644 shaders-hlsl/asm/vert/spec-constant-op-composite.asm.vert
 create mode 100644 shaders-hlsl/comp/rmw-matrix.comp
 create mode 100644 shaders-hlsl/comp/subgroups.invalid.nofxc.sm60.comp
 create mode 100644 shaders-hlsl/frag/array-lut-no-loop-variable.frag
 create mode 100644 shaders-hlsl/frag/control-dependent-in-branch.desktop.frag
 create mode 100644 shaders-hlsl/frag/fp16.desktop.frag
 create mode 100644 shaders-hlsl/frag/row-major-layout-in-struct.frag
 create mode 100644 shaders-hlsl/frag/sampler-image-arrays.frag
 create mode 100644 shaders-hlsl/frag/tex-sampling-ms.frag
 create mode 100644 shaders-hlsl/vert/read-from-row-major-array.vert
 rename {shaders-msl => shaders-msl-no-opt}/asm/frag/inliner-dominator-inside-loop.asm.frag (100%)
 rename {shaders-msl => shaders-msl-no-opt}/asm/vert/empty-struct-composite.asm.vert (100%)
 rename shaders-msl/comp/bitfield.noopt.comp => shaders-msl-no-opt/comp/bitfield.comp (100%)
 rename shaders-msl/comp/loop.noopt.comp => shaders-msl-no-opt/comp/loop.comp (100%)
 rename {shaders-msl => shaders-msl-no-opt}/comp/return.comp (100%)
 rename shaders-msl/frag/in_block_assign.noopt.frag => shaders-msl-no-opt/frag/in_block_assign.frag (100%)
 rename shaders-msl/asm/comp/{storage-buffer-basic.asm.comp => storage-buffer-basic.invalid.asm.comp} (100%)
 create mode 100644 shaders-msl/asm/frag/combined-sampler-reuse.asm.frag
 create mode 100644 shaders-msl/asm/frag/empty-struct.asm.frag
 create mode 100644 shaders-msl/asm/frag/image-extract-reuse.asm.frag
 create mode 100644 shaders-msl/asm/frag/implicit-read-dep-phi.asm.frag
 create mode 100644 shaders-msl/asm/frag/srem.asm.frag
 create mode 100644 shaders-msl/asm/frag/texel-fetch-no-lod.asm.frag
 create mode 100644 shaders-msl/asm/vert/packing-test.asm.vert
 create mode 100644 shaders-msl/asm/vert/spec-constant-op-composite.asm.vert
 create mode 100644 shaders-msl/comp/access-private-workgroup-in-function.comp
 create mode 100644 shaders-msl/comp/composite-construct.comp
 create mode 100644 shaders-msl/comp/image-cube-array-load-store.comp
 create mode 100644 shaders-msl/comp/packing-test-1.comp
 create mode 100644 shaders-msl/comp/packing-test-2.comp
 create mode 100644 shaders-msl/comp/rmw-matrix.comp
 create mode 100644 shaders-msl/frag/array-lut-no-loop-variable.frag
 create mode 100644 shaders-msl/frag/binary-func-unpack-pack-arguments.frag
 create mode 100644 shaders-msl/frag/binary-unpack-pack-arguments.frag
 create mode 100644 shaders-msl/frag/control-dependent-in-branch.desktop.frag
 create mode 100644 shaders-msl/frag/dual-source-blending.frag
 create mode 100644 shaders-msl/frag/fp16-packing.frag
 create mode 100644 shaders-msl/frag/fp16.desktop.frag
 create mode 100644 shaders-msl/frag/gather-dref.frag
 create mode 100644 shaders-msl/frag/gather-offset.frag
 create mode 100644 shaders-msl/frag/illegal-name-test-0.frag
 create mode 100644 shaders-msl/frag/in_mat.frag
 create mode 100644 shaders-msl/frag/mrt-array.frag
 create mode 100644 shaders-msl/frag/packed-expression-vector-shuffle.frag
 create mode 100644 shaders-msl/frag/packing-test-3.frag
 create mode 100644 shaders-msl/frag/readonly-ssbo.frag
 create mode 100644 shaders-msl/frag/sampler-1d-lod.frag
 create mode 100644 shaders-msl/frag/sampler-image-arrays.msl2.frag
 create mode 100644 shaders-msl/frag/shadow-compare-global-alias.frag
 create mode 100644 shaders-msl/vert/in_out_array_mat.vert
 create mode 100644 shaders-msl/vert/read-from-row-major-array.vert
 rename {shaders => shaders-no-opt}/asm/frag/inliner-dominator-inside-loop.asm.frag (100%)
 rename {shaders => shaders-no-opt}/asm/vert/empty-struct-composite.asm.vert (100%)
 create mode 100644 shaders-no-opt/asm/vert/semantic-decoration.asm.vert
 rename shaders/comp/bitfield.noopt.comp => shaders-no-opt/comp/bitfield.comp (100%)
 rename shaders/comp/loop.noopt.comp => shaders-no-opt/comp/loop.comp (100%)
 rename {shaders => shaders-no-opt}/comp/return.comp (100%)
 rename {shaders => shaders-no-opt}/vulkan/frag/spec-constant.vk.frag (100%)
 create mode 100644 shaders/asm/comp/hlsl-functionality.asm.comp
 delete mode 100644 shaders/asm/comp/name-alias.asm.invalid.comp
 rename shaders/asm/comp/{storage-buffer-basic.asm.comp => storage-buffer-basic.invalid.asm.comp} (100%)
 create mode 100644 shaders/asm/frag/combined-sampler-reuse.vk.asm.frag
 create mode 100644 shaders/asm/frag/complex-name-workarounds.asm.frag
 create mode 100644 shaders/asm/frag/empty-struct.asm.frag
 create mode 100644 shaders/asm/frag/image-extract-reuse.asm.frag
 create mode 100644 shaders/asm/frag/image-query-no-sampler.vk.asm.frag
 create mode 100644 shaders/asm/frag/implicit-read-dep-phi.asm.frag
 create mode 100644 shaders/asm/frag/loop-merge-to-continue.asm.frag
 create mode 100644 shaders/asm/frag/sampler-buffer-array-without-sampler.asm.frag
 create mode 100644 shaders/asm/frag/selection-merge-to-continue.asm.frag
 create mode 100644 shaders/asm/frag/srem.asm.frag
 create mode 100644 shaders/asm/frag/switch-merge-to-continue.asm.frag
 create mode 100644 shaders/asm/frag/temporary-name-alias.asm.frag
 create mode 100644 shaders/asm/frag/texel-fetch-no-lod.asm.frag
 create mode 100644 shaders/asm/geom/inout-split-access-chain-handle.asm.geom
 create mode 100644 shaders/asm/geom/split-access-chain-input.asm.geom
 rename shaders/asm/tesc/{tess-fixed-input-array-builtin-array.asm.tesc => tess-fixed-input-array-builtin-array.invalid.asm.tesc} (100%)
 create mode 100644 shaders/asm/vert/spec-constant-op-composite.asm.vk.vert
 create mode 100644 shaders/comp/rmw-matrix.comp
 create mode 100644 shaders/desktop-only/frag/control-dependent-in-branch.desktop.frag
 create mode 100644 shaders/desktop-only/frag/dual-source-blending.desktop.frag
 create mode 100644 shaders/desktop-only/frag/fp16.desktop.frag
 create mode 100644 shaders/frag/array-lut-no-loop-variable.frag
 create mode 100644 shaders/frag/gather-dref.frag
 create mode 100644 shaders/vert/read-from-row-major-array.vert
 create mode 100644 shaders/vulkan/comp/subgroups.nocompat.invalid.vk.comp
 create mode 100644 shaders/vulkan/frag/separate-combined-fake-overload.vk.frag
 create mode 100644 shaders/vulkan/frag/separate-sampler-texture-array.vk.frag
 create mode 100644 tests-other/hlsl_wave_mask.cpp

diff --git a/.travis.yml b/.travis.yml
index 26941f43a3..8e09d61957 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -2,32 +2,17 @@ language: cpp
 os:
   - linux
   - osx
-osx_image: xcode8.3
+osx_image: xcode9.3beta
 
 # Use Ubuntu 14.04 LTS (Trusty) as the Linux testing environment.
 sudo: required
 dist: trusty
 
-# We check out glslang and SPIRV-Tools at specific revisions to avoid test output mismatches
-env:
-  - GLSLANG_REV=9c6f8cc29ba303b43ccf36deea6bb38a304f9b92 SPIRV_TOOLS_REV=e28edd458b729da7bbfd51e375feb33103709e6f
-
 before_script:
   - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew upgrade python3; fi
-  - git clone https://github.com/KhronosGroup/glslang.git glslang
-  - git clone https://github.com/KhronosGroup/SPIRV-Tools SPIRV-Tools
-  - git clone https://github.com/KhronosGroup/SPIRV-Headers.git SPIRV-Tools/external/spirv-headers
+  - ./checkout_glslang_spirv_tools.sh
 
 script:
-  - git -C glslang checkout $GLSLANG_REV
-  - git -C SPIRV-Tools checkout $SPIRV_TOOLS_REV
-  - cd glslang && cmake . && make -j2 && cd ..
-  - cd SPIRV-Tools && cmake . && make -j2 && cd ..
+  - ./build_glslang_spirv_tools.sh Debug 2
   - make -j2
-  - PATH=./glslang/StandAlone:./SPIRV-Tools/tools:$PATH
-  - ./test_shaders.py shaders
-  - ./test_shaders.py --msl shaders-msl
-  - ./test_shaders.py --hlsl shaders-hlsl
-  - ./test_shaders.py shaders --opt
-  - ./test_shaders.py --msl shaders-msl --opt
-  - ./test_shaders.py --hlsl shaders-hlsl --opt
+  - ./test_shaders.sh
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 659315c785..9226afd46f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -128,12 +128,21 @@ if (${PYTHONINTERP_FOUND})
 	add_test(NAME spirv-cross-test
 		COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test_shaders.py
 			${CMAKE_CURRENT_SOURCE_DIR}/shaders)
+	add_test(NAME spirv-cross-test-no-opt
+		COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test_shaders.py
+			${CMAKE_CURRENT_SOURCE_DIR}/shaders-no-opt)
 	add_test(NAME spirv-cross-test-metal
 		COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test_shaders.py --metal
 			${CMAKE_CURRENT_SOURCE_DIR}/shaders-msl)
+	add_test(NAME spirv-cross-test-metal-no-opt
+		COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test_shaders.py --metal
+			${CMAKE_CURRENT_SOURCE_DIR}/shaders-msl-no-opt)
 	add_test(NAME spirv-cross-test-hlsl
 		COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test_shaders.py --hlsl
 			${CMAKE_CURRENT_SOURCE_DIR}/shaders-hlsl)
+	add_test(NAME spirv-cross-test-hlsl-no-opt
+		COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test_shaders.py --hlsl
+			${CMAKE_CURRENT_SOURCE_DIR}/shaders-hlsl-no-opt)
 	add_test(NAME spirv-cross-test-opt
 		COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test_shaders.py --opt
 			${CMAKE_CURRENT_SOURCE_DIR}/shaders)
diff --git a/README.md b/README.md
index a6f5231e7a..5f92698602 100644
--- a/README.md
+++ b/README.md
@@ -3,6 +3,7 @@
 SPIRV-Cross is a tool designed for parsing and converting SPIR-V to other shader languages.
 
 [![Build Status](https://travis-ci.org/KhronosGroup/SPIRV-Cross.svg?branch=master)](https://travis-ci.org/KhronosGroup/SPIRV-Cross)
+[![Build Status](https://ci.appveyor.com/api/projects/status/github/KhronosGroup/SPIRV-Cross?svg=true&branch=master)](https://ci.appveyor.com/project/HansKristian-ARM/SPIRV-Cross)
 
 ## Features
 
@@ -24,7 +25,7 @@ However, most missing features are expected to be "trivial" improvements at this
 
 SPIRV-Cross has been tested on Linux, OSX and Windows.
 
-The make and CMake build flavors offer the option to treat exceptions as assertions. To disable exceptions for make just append SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS=1 to the command line. For CMake append -DSPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS=ON. By default exceptions are enabled.
+The make and CMake build flavors offer the option to treat exceptions as assertions. To disable exceptions for make just append `SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS=1` to the command line. For CMake append `-DSPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS=ON`. By default exceptions are enabled.
 
 ### Linux and macOS
 
@@ -38,6 +39,8 @@ MinGW-w64 based compilation works with `make`, and an MSVC 2013 solution is also
 
 ### Using the C++ API
 
+For more in-depth documentation than what's provided in this README, please have a look at the [Wiki](https://github.com/KhronosGroup/SPIRV-Cross/wiki).
+
 To perform reflection and convert to other shader languages you can use the SPIRV-Cross API.
 For example:
 
@@ -279,6 +282,7 @@ although there are a couple of convenience script for doing this:
 
 ```
 ./checkout_glslang_spirv_tools.sh # Checks out glslang and SPIRV-Tools at a fixed revision which matches the reference output.
+./build_glslang_spirv_tools.sh    # Builds glslang and SPIRV-Tools.
 ./test_shaders.sh                 # Runs over all changes and makes sure that there are no deltas compared to reference files.
 ```
 
diff --git a/appveyor.yml b/appveyor.yml
new file mode 100644
index 0000000000..2f427f1804
--- /dev/null
+++ b/appveyor.yml
@@ -0,0 +1,31 @@
+
+environment:
+  matrix:
+    - GENERATOR: "Visual Studio 12 2013 Win64"
+      CONFIG: Debug
+
+    - GENERATOR: "Visual Studio 12 2013 Win64"
+      CONFIG: Release
+
+    - GENERATOR: "Visual Studio 14 2015 Win64"
+      CONFIG: Debug
+
+    - GENERATOR: "Visual Studio 14 2015 Win64"
+      CONFIG: Release
+
+    - GENERATOR: "Visual Studio 12 2013"
+      CONFIG: Debug
+
+    - GENERATOR: "Visual Studio 12 2013"
+      CONFIG: Release
+
+    - GENERATOR: "Visual Studio 14 2015"
+      CONFIG: Debug
+
+    - GENERATOR: "Visual Studio 14 2015"
+      CONFIG: Release
+
+build_script:
+  - git submodule update --init
+  - cmake "-G%GENERATOR%" -H. -B_builds
+  - cmake --build _builds --config "%CONFIG%"
diff --git a/build_glslang_spirv_tools.sh b/build_glslang_spirv_tools.sh
new file mode 100755
index 0000000000..a966427935
--- /dev/null
+++ b/build_glslang_spirv_tools.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+PROFILE=Release
+if [ ! -z $1 ]; then
+	PROFILE=$1
+fi
+
+NPROC=$(nproc)
+if [ ! -z $2 ]; then
+	NPROC=$2
+fi
+
+echo "Building glslang."
+mkdir -p external/glslang-build
+cd external/glslang-build
+cmake ../glslang -DCMAKE_BUILD_TYPE=$PROFILE -G"Unix Makefiles"
+make -j$NPROC
+cd ../..
+
+echo "Building SPIRV-Tools."
+mkdir -p external/spirv-tools-build
+cd external/spirv-tools-build
+cmake ../spirv-tools -DCMAKE_BUILD_TYPE=$PROFILE -G"Unix Makefiles" -DSPIRV_WERROR=OFF
+make -j$NPROC
+cd ../..
+
diff --git a/checkout_glslang_spirv_tools.sh b/checkout_glslang_spirv_tools.sh
index a4674c14e1..b751fb8e7f 100755
--- a/checkout_glslang_spirv_tools.sh
+++ b/checkout_glslang_spirv_tools.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 
-GLSLANG_REV=9c6f8cc29ba303b43ccf36deea6bb38a304f9b92
-SPIRV_TOOLS_REV=e28edd458b729da7bbfd51e375feb33103709e6f
+GLSLANG_REV=461ea09943e0e88ea854ab9e3b42d17d728af2ad
+SPIRV_TOOLS_REV=53bc1623ecd3cc304d0d6feed8385e70c7ab30d3
 
 if [ -d external/glslang ]; then
 	echo "Updating glslang to revision $GLSLANG_REV."
@@ -18,13 +18,6 @@ else
 fi
 cd ../..
 
-echo "Building glslang."
-mkdir -p external/glslang-build
-cd external/glslang-build
-cmake ../glslang -DCMAKE_BUILD_TYPE=Release -G"Unix Makefiles"
-make -j$(nproc)
-cd ../..
-
 if [ -d external/spirv-tools ]; then
 	echo "Updating SPIRV-Tools to revision $SPIRV_TOOLS_REV."
 	cd external/spirv-tools
@@ -37,21 +30,15 @@ else
 	git clone git://github.com/KhronosGroup/SPIRV-Tools.git spirv-tools
 	cd spirv-tools
 	git checkout $SPIRV_TOOLS_REV
-
-	if [ -d external/spirv-headers ]; then
-		cd external/spirv-headers
-		git pull origin master
-		cd ../..
-	else
-		git clone git://github.com/KhronosGroup/SPIRV-Headers.git external/spirv-headers
-	fi
 fi
-cd ../..
-
-echo "Building SPIRV-Tools."
-mkdir -p external/spirv-tools-build
-cd external/spirv-tools-build
-cmake ../spirv-tools -DCMAKE_BUILD_TYPE=Release -G"Unix Makefiles"
-make -j$(nproc)
+
+if [ -d external/spirv-headers ]; then
+	cd external/spirv-headers
+	git pull origin master
+	cd ../..
+else
+	git clone git://github.com/KhronosGroup/SPIRV-Headers.git external/spirv-headers
+fi
+
 cd ../..
 
diff --git a/format_all.sh b/format_all.sh
index 05efeb3eae..fcfffc57f8 100755
--- a/format_all.sh
+++ b/format_all.sh
@@ -1,6 +1,7 @@
 #!/bin/bash
 
-for file in spirv_*.{cpp,hpp} include/spirv_cross/*.{hpp,h} samples/cpp/*.cpp main.cpp
+#for file in spirv_*.{cpp,hpp} include/spirv_cross/*.{hpp,h} samples/cpp/*.cpp main.cpp
+for file in spirv_*.{cpp,hpp} main.cpp
 do
     echo "Formatting file: $file ..."
     clang-format -style=file -i $file
diff --git a/main.cpp b/main.cpp
index 4f0265d1af..b309a82ac3 100644
--- a/main.cpp
+++ b/main.cpp
@@ -122,7 +122,7 @@ struct CLIParser
 			THROW("Tried to parse uint, but nothing left in arguments");
 		}
 
-		uint32_t val = stoul(*argv);
+		uint64_t val = stoul(*argv);
 		if (val > numeric_limits<uint32_t>::max())
 		{
 			THROW("next_uint() out of range");
@@ -131,7 +131,7 @@ struct CLIParser
 		argc--;
 		argv++;
 
-		return val;
+		return uint32_t(val);
 	}
 
 	double next_double()
@@ -212,7 +212,6 @@ static void print_resources(const Compiler &compiler, const char *tag, const vec
 	for (auto &res : resources)
 	{
 		auto &type = compiler.get_type(res.type_id);
-		auto mask = compiler.get_decoration_mask(res.id);
 
 		if (print_ssbo && compiler.buffer_is_hlsl_counter_buffer(res.id))
 			continue;
@@ -221,8 +220,8 @@ static void print_resources(const Compiler &compiler, const char *tag, const vec
 		// for SSBOs and UBOs since those are the only meaningful names to use externally.
 		// Push constant blocks are still accessed by name and not block name, even though they are technically Blocks.
 		bool is_push_constant = compiler.get_storage_class(res.id) == StorageClassPushConstant;
-		bool is_block = (compiler.get_decoration_mask(type.self) &
-		                 ((1ull << DecorationBlock) | (1ull << DecorationBufferBlock))) != 0;
+		bool is_block = compiler.get_decoration_bitset(type.self).get(DecorationBlock) ||
+		                compiler.get_decoration_bitset(type.self).get(DecorationBufferBlock);
 		bool is_sized_block = is_block && (compiler.get_storage_class(res.id) == StorageClassUniform ||
 		                                   compiler.get_storage_class(res.id) == StorageClassUniformConstant);
 		uint32_t fallback_id = !is_push_constant && is_block ? res.base_type_id : res.id;
@@ -231,6 +230,12 @@ static void print_resources(const Compiler &compiler, const char *tag, const vec
 		if (is_sized_block)
 			block_size = uint32_t(compiler.get_declared_struct_size(compiler.get_type(res.base_type_id)));
 
+		Bitset mask;
+		if (print_ssbo)
+			mask = compiler.get_buffer_block_flags(res.id);
+		else
+			mask = compiler.get_decoration_bitset(res.id);
+
 		string array;
 		for (auto arr : type.array)
 			array = join("[", arr ? convert_to_string(arr) : "", "]") + array;
@@ -238,17 +243,17 @@ static void print_resources(const Compiler &compiler, const char *tag, const vec
 		fprintf(stderr, " ID %03u : %s%s", res.id,
 		        !res.name.empty() ? res.name.c_str() : compiler.get_fallback_name(fallback_id).c_str(), array.c_str());
 
-		if (mask & (1ull << DecorationLocation))
+		if (mask.get(DecorationLocation))
 			fprintf(stderr, " (Location : %u)", compiler.get_decoration(res.id, DecorationLocation));
-		if (mask & (1ull << DecorationDescriptorSet))
+		if (mask.get(DecorationDescriptorSet))
 			fprintf(stderr, " (Set : %u)", compiler.get_decoration(res.id, DecorationDescriptorSet));
-		if (mask & (1ull << DecorationBinding))
+		if (mask.get(DecorationBinding))
 			fprintf(stderr, " (Binding : %u)", compiler.get_decoration(res.id, DecorationBinding));
-		if (mask & (1ull << DecorationInputAttachmentIndex))
+		if (mask.get(DecorationInputAttachmentIndex))
 			fprintf(stderr, " (Attachment : %u)", compiler.get_decoration(res.id, DecorationInputAttachmentIndex));
-		if (mask & (1ull << DecorationNonReadable))
+		if (mask.get(DecorationNonReadable))
 			fprintf(stderr, " writeonly");
-		if (mask & (1ull << DecorationNonWritable))
+		if (mask.get(DecorationNonWritable))
 			fprintf(stderr, " readonly");
 		if (is_sized_block)
 			fprintf(stderr, " (BlockSize : %u bytes)", block_size);
@@ -284,7 +289,7 @@ static const char *execution_model_to_str(spv::ExecutionModel model)
 
 static void print_resources(const Compiler &compiler, const ShaderResources &res)
 {
-	uint64_t modes = compiler.get_execution_mode_mask();
+	auto &modes = compiler.get_execution_mode_bitset();
 
 	fprintf(stderr, "Entry points:\n");
 	auto entry_points = compiler.get_entry_points_and_stages();
@@ -293,11 +298,7 @@ static void print_resources(const Compiler &compiler, const ShaderResources &res
 	fprintf(stderr, "\n");
 
 	fprintf(stderr, "Execution modes:\n");
-	for (unsigned i = 0; i < 64; i++)
-	{
-		if (!(modes & (1ull << i)))
-			continue;
-
+	modes.for_each_bit([&](uint32_t i) {
 		auto mode = static_cast<ExecutionMode>(i);
 		uint32_t arg0 = compiler.get_execution_mode_argument(mode, 0);
 		uint32_t arg1 = compiler.get_execution_mode_argument(mode, 1);
@@ -353,7 +354,7 @@ static void print_resources(const Compiler &compiler, const ShaderResources &res
 		default:
 			break;
 		}
-	}
+	});
 	fprintf(stderr, "\n");
 
 	print_resources(compiler, "subpass inputs", res.subpass_inputs);
@@ -487,25 +488,47 @@ struct CLIArguments
 	bool flatten_multidimensional_arrays = false;
 	bool use_420pack_extension = true;
 	bool remove_unused = false;
+	bool combined_samplers_inherit_bindings = false;
 };
 
 static void print_help()
 {
-	fprintf(stderr, "Usage: spirv-cross [--output <output path>] [SPIR-V file] [--es] [--no-es] "
-	                "[--version <GLSL version>] [--dump-resources] [--help] [--force-temporary] "
-	                "[--vulkan-semantics] [--flatten-ubo] [--fixup-clipspace] [--flip-vert-y] [--iterations iter] "
-	                "[--cpp] [--cpp-interface-name <name>] "
-	                "[--msl] [--msl-version <MMmmpp>]"
-	                "[--hlsl] [--shader-model] [--hlsl-enable-compat] "
-	                "[--separate-shader-objects]"
-	                "[--pls-in format input-name] [--pls-out format output-name] [--remap source_name target_name "
-	                "components] [--extension ext] [--entry name] [--stage <stage (vert, frag, geom, tesc, tese, "
-	                "comp)>] [--remove-unused-variables] "
-	                "[--flatten-multidimensional-arrays] [--no-420pack-extension] "
-	                "[--remap-variable-type <variable_name> <new_variable_type>] "
-	                "[--rename-interface-variable <in|out> <location> <new_variable_name>] "
-	                "[--set-hlsl-vertex-input-semantic <location> <semantic>] "
-	                "[--rename-entry-point <old> <new> <stage>] "
+	fprintf(stderr, "Usage: spirv-cross\n"
+	                "\t[--output <output path>]\n"
+	                "\t[SPIR-V file]\n"
+	                "\t[--es]\n"
+	                "\t[--no-es]\n"
+	                "\t[--version <GLSL version>]\n"
+	                "\t[--dump-resources]\n"
+	                "\t[--help]\n"
+	                "\t[--force-temporary]\n"
+	                "\t[--vulkan-semantics]\n"
+	                "\t[--flatten-ubo]\n"
+	                "\t[--fixup-clipspace]\n"
+	                "\t[--flip-vert-y]\n"
+	                "\t[--iterations iter]\n"
+	                "\t[--cpp]\n"
+	                "\t[--cpp-interface-name <name>]\n"
+	                "\t[--msl]\n"
+	                "\t[--msl-version <MMmmpp>]\n"
+	                "\t[--hlsl]\n"
+	                "\t[--shader-model]\n"
+	                "\t[--hlsl-enable-compat]\n"
+	                "\t[--separate-shader-objects]\n"
+	                "\t[--pls-in format input-name]\n"
+	                "\t[--pls-out format output-name]\n"
+	                "\t[--remap source_name target_name components]\n"
+	                "\t[--extension ext]\n"
+	                "\t[--entry name]\n"
+	                "\t[--stage <stage (vert, frag, geom, tesc, tese comp)>]\n"
+	                "\t[--remove-unused-variables]\n"
+	                "\t[--flatten-multidimensional-arrays]\n"
+	                "\t[--no-420pack-extension]\n"
+	                "\t[--remap-variable-type <variable_name> <new_variable_type>]\n"
+	                "\t[--rename-interface-variable <in|out> <location> <new_variable_name>]\n"
+	                "\t[--set-hlsl-vertex-input-semantic <location> <semantic>]\n"
+	                "\t[--rename-entry-point <old> <new> <stage>]\n"
+	                "\t[--combined-samplers-inherit-bindings]"
 	                "\n");
 }
 
@@ -711,6 +734,8 @@ static int main_inner(int argc, char *argv[])
 	});
 
 	cbs.add("--remove-unused-variables", [&args](CLIParser &) { args.remove_unused = true; });
+	cbs.add("--combined-samplers-inherit-bindings",
+	        [&args](CLIParser &) { args.combined_samplers_inherit_bindings = true; });
 
 	cbs.default_handler = [&args](const char *value) { args.input = value; };
 	cbs.error_handler = [] { print_help(); };
@@ -748,10 +773,10 @@ static int main_inner(int argc, char *argv[])
 		compiler = unique_ptr<CompilerMSL>(new CompilerMSL(read_spirv_file(args.input)));
 
 		auto *msl_comp = static_cast<CompilerMSL *>(compiler.get());
-		auto msl_opts = msl_comp->get_options();
+		auto msl_opts = msl_comp->get_msl_options();
 		if (args.set_msl_version)
 			msl_opts.msl_version = args.msl_version;
-		msl_comp->set_options(msl_opts);
+		msl_comp->set_msl_options(msl_opts);
 	}
 	else if (args.hlsl)
 		compiler = unique_ptr<CompilerHLSL>(new CompilerHLSL(read_spirv_file(args.input)));
@@ -851,14 +876,14 @@ static int main_inner(int argc, char *argv[])
 	if (!entry_point.empty())
 		compiler->set_entry_point(entry_point, model);
 
-	if (!args.set_version && !compiler->get_options().version)
+	if (!args.set_version && !compiler->get_common_options().version)
 	{
 		fprintf(stderr, "Didn't specify GLSL version and SPIR-V did not specify language.\n");
 		print_help();
 		return EXIT_FAILURE;
 	}
 
-	CompilerGLSL::Options opts = compiler->get_options();
+	CompilerGLSL::Options opts = compiler->get_common_options();
 	if (args.set_version)
 		opts.version = args.version;
 	if (args.set_es)
@@ -870,13 +895,13 @@ static int main_inner(int argc, char *argv[])
 	opts.vulkan_semantics = args.vulkan_semantics;
 	opts.vertex.fixup_clipspace = args.fixup;
 	opts.vertex.flip_vert_y = args.yflip;
-	compiler->set_options(opts);
+	compiler->set_common_options(opts);
 
 	// Set HLSL specific options.
 	if (args.hlsl)
 	{
 		auto *hlsl = static_cast<CompilerHLSL *>(compiler.get());
-		auto hlsl_opts = hlsl->get_options();
+		auto hlsl_opts = hlsl->get_hlsl_options();
 		if (args.set_shader_model)
 		{
 			if (args.shader_model < 30)
@@ -894,11 +919,19 @@ static int main_inner(int argc, char *argv[])
 			hlsl_opts.point_size_compat = true;
 			hlsl_opts.point_coord_compat = true;
 		}
-		hlsl->set_options(hlsl_opts);
+		hlsl->set_hlsl_options(hlsl_opts);
 	}
 
 	if (build_dummy_sampler)
-		compiler->build_dummy_sampler_for_combined_images();
+	{
+		uint32_t sampler = compiler->build_dummy_sampler_for_combined_images();
+		if (sampler != 0)
+		{
+			// Set some defaults to make validation happy.
+			compiler->set_decoration(sampler, DecorationDescriptorSet, 0);
+			compiler->set_decoration(sampler, DecorationBinding, 0);
+		}
+	}
 
 	ShaderResources res;
 	if (args.remove_unused)
@@ -961,6 +994,9 @@ static int main_inner(int argc, char *argv[])
 	if (combined_image_samplers)
 	{
 		compiler->build_combined_image_samplers();
+		if (args.combined_samplers_inherit_bindings)
+			spirv_cross_util::inherit_combined_sampler_bindings(*compiler);
+
 		// Give the remapped combined samplers new names.
 		for (auto &remap : compiler->get_combined_image_samplers())
 		{
diff --git a/reference/shaders-hlsl/asm/comp/storage-buffer-basic.nofxc.asm.comp b/reference/opt/shaders-hlsl/asm/comp/storage-buffer-basic.invalid.nofxc.asm.comp
similarity index 100%
rename from reference/shaders-hlsl/asm/comp/storage-buffer-basic.nofxc.asm.comp
rename to reference/opt/shaders-hlsl/asm/comp/storage-buffer-basic.invalid.nofxc.asm.comp
diff --git a/reference/opt/shaders-hlsl/asm/frag/combined-sampler-reuse.asm.frag b/reference/opt/shaders-hlsl/asm/frag/combined-sampler-reuse.asm.frag
new file mode 100644
index 0000000000..3951fd511a
--- /dev/null
+++ b/reference/opt/shaders-hlsl/asm/frag/combined-sampler-reuse.asm.frag
@@ -0,0 +1,30 @@
+Texture2D<float4> uTex : register(t1);
+SamplerState uSampler : register(s0);
+
+static float4 FragColor;
+static float2 vUV;
+
+struct SPIRV_Cross_Input
+{
+    float2 vUV : TEXCOORD0;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    FragColor = uTex.Sample(uSampler, vUV);
+    FragColor += uTex.Sample(uSampler, vUV, int2(1, 1));
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    vUV = stage_input.vUV;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/opt/shaders-hlsl/asm/frag/empty-struct.asm.frag b/reference/opt/shaders-hlsl/asm/frag/empty-struct.asm.frag
new file mode 100644
index 0000000000..3b50282fe0
--- /dev/null
+++ b/reference/opt/shaders-hlsl/asm/frag/empty-struct.asm.frag
@@ -0,0 +1,8 @@
+void frag_main()
+{
+}
+
+void main()
+{
+    frag_main();
+}
diff --git a/reference/opt/shaders-hlsl/asm/frag/function-overload-alias.asm.frag b/reference/opt/shaders-hlsl/asm/frag/function-overload-alias.asm.frag
index 432915da36..93f8414e93 100644
--- a/reference/opt/shaders-hlsl/asm/frag/function-overload-alias.asm.frag
+++ b/reference/opt/shaders-hlsl/asm/frag/function-overload-alias.asm.frag
@@ -7,7 +7,7 @@ struct SPIRV_Cross_Output
 
 void frag_main()
 {
-    FragColor = (((1.0f.xxxx + 1.0f.xxxx) + (1.0f.xxx.xyzz + 1.0f.xxxx)) + (1.0f.xxxx + 2.0f.xxxx)) + (1.0f.xx.xyxy + 2.0f.xxxx);
+    FragColor = 10.0f.xxxx;
 }
 
 SPIRV_Cross_Output main()
diff --git a/reference/opt/shaders-hlsl/asm/frag/image-extract-reuse.asm.frag b/reference/opt/shaders-hlsl/asm/frag/image-extract-reuse.asm.frag
new file mode 100644
index 0000000000..ed53720d94
--- /dev/null
+++ b/reference/opt/shaders-hlsl/asm/frag/image-extract-reuse.asm.frag
@@ -0,0 +1,31 @@
+Texture2D<float4> uTexture : register(t0);
+SamplerState _uTexture_sampler : register(s0);
+
+static int2 Size;
+
+struct SPIRV_Cross_Output
+{
+    int2 Size : SV_Target0;
+};
+
+uint2 SPIRV_Cross_textureSize(Texture2D<float4> Tex, uint Level, out uint Param)
+{
+    uint2 ret;
+    Tex.GetDimensions(Level, ret.x, ret.y, Param);
+    return ret;
+}
+
+void frag_main()
+{
+    uint _19_dummy_parameter;
+    uint _20_dummy_parameter;
+    Size = int2(SPIRV_Cross_textureSize(uTexture, uint(0), _19_dummy_parameter)) + int2(SPIRV_Cross_textureSize(uTexture, uint(1), _20_dummy_parameter));
+}
+
+SPIRV_Cross_Output main()
+{
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.Size = Size;
+    return stage_output;
+}
diff --git a/reference/opt/shaders-hlsl/asm/frag/implicit-read-dep-phi.asm.frag b/reference/opt/shaders-hlsl/asm/frag/implicit-read-dep-phi.asm.frag
new file mode 100644
index 0000000000..67f14fccae
--- /dev/null
+++ b/reference/opt/shaders-hlsl/asm/frag/implicit-read-dep-phi.asm.frag
@@ -0,0 +1,57 @@
+Texture2D<float4> uImage : register(t0);
+SamplerState _uImage_sampler : register(s0);
+
+static float4 v0;
+static float4 FragColor;
+
+struct SPIRV_Cross_Input
+{
+    float4 v0 : TEXCOORD0;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    float phi;
+    float4 _36;
+    int _51;
+    _51 = 0;
+    phi = 1.0f;
+    _36 = float4(1.0f, 2.0f, 1.0f, 2.0f);
+    for (;;)
+    {
+        FragColor = _36;
+        if (_51 < 4)
+        {
+            if (v0[_51] > 0.0f)
+            {
+                float2 _48 = phi.xx;
+                _51++;
+                phi += 2.0f;
+                _36 = uImage.SampleLevel(_uImage_sampler, _48, 0.0f);
+                continue;
+            }
+            else
+            {
+                break;
+            }
+        }
+        else
+        {
+            break;
+        }
+    }
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    v0 = stage_input.v0;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/opt/shaders-hlsl/asm/frag/srem.asm.frag b/reference/opt/shaders-hlsl/asm/frag/srem.asm.frag
new file mode 100644
index 0000000000..db5e717457
--- /dev/null
+++ b/reference/opt/shaders-hlsl/asm/frag/srem.asm.frag
@@ -0,0 +1,29 @@
+static float4 FragColor;
+static int4 vA;
+static int4 vB;
+
+struct SPIRV_Cross_Input
+{
+    nointerpolation int4 vA : TEXCOORD0;
+    nointerpolation int4 vB : TEXCOORD1;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    FragColor = float4(vA - vB * (vA / vB));
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    vA = stage_input.vA;
+    vB = stage_input.vB;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/opt/shaders-hlsl/asm/frag/texel-fetch-no-lod.asm.frag b/reference/opt/shaders-hlsl/asm/frag/texel-fetch-no-lod.asm.frag
new file mode 100644
index 0000000000..695d5fe9df
--- /dev/null
+++ b/reference/opt/shaders-hlsl/asm/frag/texel-fetch-no-lod.asm.frag
@@ -0,0 +1,29 @@
+Texture2D<float4> uTexture : register(t0);
+SamplerState _uTexture_sampler : register(s0);
+
+static float4 gl_FragCoord;
+static float4 FragColor;
+
+struct SPIRV_Cross_Input
+{
+    float4 gl_FragCoord : SV_Position;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    FragColor = uTexture.Load(int3(int2(gl_FragCoord.xy), 0));
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    gl_FragCoord = stage_input.gl_FragCoord;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/opt/shaders-hlsl/asm/frag/unreachable.asm.frag b/reference/opt/shaders-hlsl/asm/frag/unreachable.asm.frag
index e7cb790205..ee3e467248 100644
--- a/reference/opt/shaders-hlsl/asm/frag/unreachable.asm.frag
+++ b/reference/opt/shaders-hlsl/asm/frag/unreachable.asm.frag
@@ -11,25 +11,23 @@ struct SPIRV_Cross_Output
     float4 FragColor : SV_Target0;
 };
 
-float4 _21;
-
 void frag_main()
 {
-    float4 _33;
-    do
+    bool _29;
+    for (;;)
     {
-        if (counter == 10)
+        _29 = counter == 10;
+        if (_29)
         {
-            _33 = 10.0f.xxxx;
             break;
         }
         else
         {
-            _33 = 30.0f.xxxx;
             break;
         }
-    } while (false);
-    FragColor = _33;
+    }
+    bool4 _35 = _29.xxxx;
+    FragColor = float4(_35.x ? 10.0f.xxxx.x : 30.0f.xxxx.x, _35.y ? 10.0f.xxxx.y : 30.0f.xxxx.y, _35.z ? 10.0f.xxxx.z : 30.0f.xxxx.z, _35.w ? 10.0f.xxxx.w : 30.0f.xxxx.w);
 }
 
 SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
diff --git a/reference/opt/shaders-hlsl/asm/vert/empty-struct-composite.asm.vert b/reference/opt/shaders-hlsl/asm/vert/empty-struct-composite.asm.vert
deleted file mode 100644
index 103ff46a3f..0000000000
--- a/reference/opt/shaders-hlsl/asm/vert/empty-struct-composite.asm.vert
+++ /dev/null
@@ -1,8 +0,0 @@
-void vert_main()
-{
-}
-
-void main()
-{
-    vert_main();
-}
diff --git a/reference/opt/shaders-hlsl/asm/vert/spec-constant-op-composite.asm.vert b/reference/opt/shaders-hlsl/asm/vert/spec-constant-op-composite.asm.vert
new file mode 100644
index 0000000000..c02f70c9ee
--- /dev/null
+++ b/reference/opt/shaders-hlsl/asm/vert/spec-constant-op-composite.asm.vert
@@ -0,0 +1,37 @@
+static const int _7 = -10;
+static const uint _8 = 100u;
+static const int _20 = (_7 + 2);
+static const uint _25 = (_8 % 5u);
+static const int4 _30 = int4(20, 30, _20, _20);
+static const int2 _32 = int2(_30.y, _30.x);
+static const int _33 = _30.y;
+
+static float4 gl_Position;
+static int _4;
+
+struct SPIRV_Cross_Output
+{
+    nointerpolation int _4 : TEXCOORD0;
+    float4 gl_Position : SV_Position;
+};
+
+void vert_main()
+{
+    float4 _64 = 0.0f.xxxx;
+    _64.y = float(_20);
+    float4 _68 = _64;
+    _68.z = float(_25);
+    float4 _52 = _68 + float4(_30);
+    float2 _56 = _52.xy + float2(_32);
+    gl_Position = float4(_56.x, _56.y, _52.z, _52.w);
+    _4 = _33;
+}
+
+SPIRV_Cross_Output main()
+{
+    vert_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.gl_Position = gl_Position;
+    stage_output._4 = _4;
+    return stage_output;
+}
diff --git a/reference/opt/shaders-hlsl/comp/builtins.comp b/reference/opt/shaders-hlsl/comp/builtins.comp
index 990fc85337..7f88aa798f 100644
--- a/reference/opt/shaders-hlsl/comp/builtins.comp
+++ b/reference/opt/shaders-hlsl/comp/builtins.comp
@@ -1,3 +1,5 @@
+static const uint3 gl_WorkGroupSize = uint3(8u, 4u, 2u);
+
 void comp_main()
 {
 }
diff --git a/reference/opt/shaders-hlsl/comp/image.comp b/reference/opt/shaders-hlsl/comp/image.comp
index a8fc137581..6c2b58cd29 100644
--- a/reference/opt/shaders-hlsl/comp/image.comp
+++ b/reference/opt/shaders-hlsl/comp/image.comp
@@ -34,24 +34,26 @@ struct SPIRV_Cross_Input
 
 void comp_main()
 {
-    uImageOutF[int2(gl_GlobalInvocationID.xy)] = uImageInF[int2(gl_GlobalInvocationID.xy)].x;
-    uImageOutI[int2(gl_GlobalInvocationID.xy)] = uImageInI[int2(gl_GlobalInvocationID.xy)].x;
-    uImageOutU[int2(gl_GlobalInvocationID.xy)] = uImageInU[int2(gl_GlobalInvocationID.xy)].x;
-    uImageOutBuffer[int(gl_GlobalInvocationID.x)] = uImageInBuffer[int(gl_GlobalInvocationID.x)].x;
-    uImageOutF2[int2(gl_GlobalInvocationID.xy)] = uImageInF2[int2(gl_GlobalInvocationID.xy)].xy;
-    uImageOutI2[int2(gl_GlobalInvocationID.xy)] = uImageInI2[int2(gl_GlobalInvocationID.xy)].xy;
-    uImageOutU2[int2(gl_GlobalInvocationID.xy)] = uImageInU2[int2(gl_GlobalInvocationID.xy)].xy;
-    float4 _135 = uImageInBuffer2[int(gl_GlobalInvocationID.x)].xyyy;
-    uImageOutBuffer2[int(gl_GlobalInvocationID.x)] = _135.xy;
-    uImageOutF4[int2(gl_GlobalInvocationID.xy)] = uImageInF4[int2(gl_GlobalInvocationID.xy)];
-    int4 _165 = uImageInI4[int2(gl_GlobalInvocationID.xy)];
-    uImageOutI4[int2(gl_GlobalInvocationID.xy)] = _165;
-    uint4 _180 = uImageInU4[int2(gl_GlobalInvocationID.xy)];
-    uImageOutU4[int2(gl_GlobalInvocationID.xy)] = _180;
-    uImageOutBuffer4[int(gl_GlobalInvocationID.x)] = uImageInBuffer4[int(gl_GlobalInvocationID.x)];
-    uImageNoFmtF[int2(gl_GlobalInvocationID.xy)] = _135;
-    uImageNoFmtU[int2(gl_GlobalInvocationID.xy)] = _180;
-    uImageNoFmtI[int2(gl_GlobalInvocationID.xy)] = _165;
+    int2 _23 = int2(gl_GlobalInvocationID.xy);
+    uImageOutF[_23] = uImageInF[_23].x;
+    uImageOutI[_23] = uImageInI[_23].x;
+    uImageOutU[_23] = uImageInU[_23].x;
+    int _74 = int(gl_GlobalInvocationID.x);
+    uImageOutBuffer[_74] = uImageInBuffer[_74].x;
+    uImageOutF2[_23] = uImageInF2[_23].xy;
+    uImageOutI2[_23] = uImageInI2[_23].xy;
+    uImageOutU2[_23] = uImageInU2[_23].xy;
+    float4 _135 = uImageInBuffer2[_74].xyyy;
+    uImageOutBuffer2[_74] = _135.xy;
+    uImageOutF4[_23] = uImageInF4[_23];
+    int4 _165 = uImageInI4[_23];
+    uImageOutI4[_23] = _165;
+    uint4 _180 = uImageInU4[_23];
+    uImageOutU4[_23] = _180;
+    uImageOutBuffer4[_74] = uImageInBuffer4[_74];
+    uImageNoFmtF[_23] = _135;
+    uImageNoFmtU[_23] = _180;
+    uImageNoFmtI[_23] = _165;
 }
 
 [numthreads(1, 1, 1)]
diff --git a/reference/opt/shaders-hlsl/comp/rmw-matrix.comp b/reference/opt/shaders-hlsl/comp/rmw-matrix.comp
new file mode 100644
index 0000000000..ed66669358
--- /dev/null
+++ b/reference/opt/shaders-hlsl/comp/rmw-matrix.comp
@@ -0,0 +1,20 @@
+RWByteAddressBuffer _11 : register(u0);
+
+void comp_main()
+{
+    _11.Store(0, asuint(asfloat(_11.Load(0)) * asfloat(_11.Load(96))));
+    _11.Store4(16, asuint(asfloat(_11.Load4(16)) * asfloat(_11.Load4(112))));
+    float4x4 _35 = asfloat(uint4x4(_11.Load4(128), _11.Load4(144), _11.Load4(160), _11.Load4(176)));
+    float4x4 _37 = asfloat(uint4x4(_11.Load4(32), _11.Load4(48), _11.Load4(64), _11.Load4(80)));
+    float4x4 _38 = mul(_35, _37);
+    _11.Store4(32, asuint(_38[0]));
+    _11.Store4(48, asuint(_38[1]));
+    _11.Store4(64, asuint(_38[2]));
+    _11.Store4(80, asuint(_38[3]));
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    comp_main();
+}
diff --git a/reference/opt/shaders-hlsl/comp/shared.comp b/reference/opt/shaders-hlsl/comp/shared.comp
index 498241eaca..9831302afd 100644
--- a/reference/opt/shaders-hlsl/comp/shared.comp
+++ b/reference/opt/shaders-hlsl/comp/shared.comp
@@ -17,7 +17,7 @@ void comp_main()
 {
     sShared[gl_LocalInvocationIndex] = asfloat(_22.Load(gl_GlobalInvocationID.x * 4 + 0));
     GroupMemoryBarrierWithGroupSync();
-    _44.Store(gl_GlobalInvocationID.x * 4 + 0, asuint(sShared[(4u - gl_LocalInvocationIndex) - 1u]));
+    _44.Store(gl_GlobalInvocationID.x * 4 + 0, asuint(sShared[3u - gl_LocalInvocationIndex]));
 }
 
 [numthreads(4, 1, 1)]
diff --git a/reference/opt/shaders-hlsl/comp/subgroups.invalid.nofxc.sm60.comp b/reference/opt/shaders-hlsl/comp/subgroups.invalid.nofxc.sm60.comp
new file mode 100644
index 0000000000..dabc7df9e2
--- /dev/null
+++ b/reference/opt/shaders-hlsl/comp/subgroups.invalid.nofxc.sm60.comp
@@ -0,0 +1,67 @@
+RWByteAddressBuffer _9 : register(u0, space0);
+
+static uint4 gl_SubgroupEqMask;
+static uint4 gl_SubgroupGeMask;
+static uint4 gl_SubgroupGtMask;
+static uint4 gl_SubgroupLeMask;
+static uint4 gl_SubgroupLtMask;
+void comp_main()
+{
+    _9.Store(0, asuint(float(WaveGetLaneCount())));
+    _9.Store(0, asuint(float(WaveGetLaneIndex())));
+    _9.Store(0, asuint(float4(gl_SubgroupEqMask).x));
+    _9.Store(0, asuint(float4(gl_SubgroupGeMask).x));
+    _9.Store(0, asuint(float4(gl_SubgroupGtMask).x));
+    _9.Store(0, asuint(float4(gl_SubgroupLeMask).x));
+    _9.Store(0, asuint(float4(gl_SubgroupLtMask).x));
+    uint4 _75 = WaveActiveBallot(true);
+    float4 _88 = WaveActiveSum(20.0f.xxxx);
+    int4 _94 = WaveActiveSum(int4(20, 20, 20, 20));
+    float4 _96 = WaveActiveProduct(20.0f.xxxx);
+    int4 _98 = WaveActiveProduct(int4(20, 20, 20, 20));
+    float4 _127 = WavePrefixProduct(_96) * _96;
+    int4 _129 = WavePrefixProduct(_98) * _98;
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    gl_SubgroupEqMask = 1u << (WaveGetLaneIndex() - uint4(0, 32, 64, 96));
+    if (WaveGetLaneIndex() >= 32) gl_SubgroupEqMask.x = 0;
+    if (WaveGetLaneIndex() >= 64 || WaveGetLaneIndex() < 32) gl_SubgroupEqMask.y = 0;
+    if (WaveGetLaneIndex() >= 96 || WaveGetLaneIndex() < 64) gl_SubgroupEqMask.z = 0;
+    if (WaveGetLaneIndex() < 96) gl_SubgroupEqMask.w = 0;
+    gl_SubgroupGeMask = ~((1u << (WaveGetLaneIndex() - uint4(0, 32, 64, 96))) - 1u);
+    if (WaveGetLaneIndex() >= 32) gl_SubgroupGeMask.x = 0u;
+    if (WaveGetLaneIndex() >= 64) gl_SubgroupGeMask.y = 0u;
+    if (WaveGetLaneIndex() >= 96) gl_SubgroupGeMask.z = 0u;
+    if (WaveGetLaneIndex() < 32) gl_SubgroupGeMask.y = ~0u;
+    if (WaveGetLaneIndex() < 64) gl_SubgroupGeMask.z = ~0u;
+    if (WaveGetLaneIndex() < 96) gl_SubgroupGeMask.w = ~0u;
+    uint gt_lane_index = WaveGetLaneIndex() + 1;
+    gl_SubgroupGtMask = ~((1u << (gt_lane_index - uint4(0, 32, 64, 96))) - 1u);
+    if (gt_lane_index >= 32) gl_SubgroupGtMask.x = 0u;
+    if (gt_lane_index >= 64) gl_SubgroupGtMask.y = 0u;
+    if (gt_lane_index >= 96) gl_SubgroupGtMask.z = 0u;
+    if (gt_lane_index >= 128) gl_SubgroupGtMask.w = 0u;
+    if (gt_lane_index < 32) gl_SubgroupGtMask.y = ~0u;
+    if (gt_lane_index < 64) gl_SubgroupGtMask.z = ~0u;
+    if (gt_lane_index < 96) gl_SubgroupGtMask.w = ~0u;
+    uint le_lane_index = WaveGetLaneIndex() + 1;
+    gl_SubgroupLeMask = (1u << (le_lane_index - uint4(0, 32, 64, 96))) - 1u;
+    if (le_lane_index >= 32) gl_SubgroupLeMask.x = ~0u;
+    if (le_lane_index >= 64) gl_SubgroupLeMask.y = ~0u;
+    if (le_lane_index >= 96) gl_SubgroupLeMask.z = ~0u;
+    if (le_lane_index >= 128) gl_SubgroupLeMask.w = ~0u;
+    if (le_lane_index < 32) gl_SubgroupLeMask.y = 0u;
+    if (le_lane_index < 64) gl_SubgroupLeMask.z = 0u;
+    if (le_lane_index < 96) gl_SubgroupLeMask.w = 0u;
+    gl_SubgroupLtMask = (1u << (WaveGetLaneIndex() - uint4(0, 32, 64, 96))) - 1u;
+    if (WaveGetLaneIndex() >= 32) gl_SubgroupLtMask.x = ~0u;
+    if (WaveGetLaneIndex() >= 64) gl_SubgroupLtMask.y = ~0u;
+    if (WaveGetLaneIndex() >= 96) gl_SubgroupLtMask.z = ~0u;
+    if (WaveGetLaneIndex() < 32) gl_SubgroupLtMask.y = 0u;
+    if (WaveGetLaneIndex() < 64) gl_SubgroupLtMask.z = 0u;
+    if (WaveGetLaneIndex() < 96) gl_SubgroupLtMask.w = 0u;
+    comp_main();
+}
diff --git a/reference/opt/shaders-hlsl/frag/array-lut-no-loop-variable.frag b/reference/opt/shaders-hlsl/frag/array-lut-no-loop-variable.frag
new file mode 100644
index 0000000000..8cb52f0a4d
--- /dev/null
+++ b/reference/opt/shaders-hlsl/frag/array-lut-no-loop-variable.frag
@@ -0,0 +1,35 @@
+static const float _17[5] = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f };
+
+static float4 FragColor;
+static float4 v0;
+
+struct SPIRV_Cross_Input
+{
+    float4 v0 : TEXCOORD0;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    float lut[5] = _17;
+    for (int _46 = 0; _46 < 4; )
+    {
+        int _33 = _46 + 1;
+        FragColor += lut[_33].xxxx;
+        _46 = _33;
+        continue;
+    }
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    v0 = stage_input.v0;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/opt/shaders-hlsl/frag/control-dependent-in-branch.desktop.frag b/reference/opt/shaders-hlsl/frag/control-dependent-in-branch.desktop.frag
new file mode 100644
index 0000000000..b2899ea02c
--- /dev/null
+++ b/reference/opt/shaders-hlsl/frag/control-dependent-in-branch.desktop.frag
@@ -0,0 +1,54 @@
+Texture2D<float4> uSampler : register(t0);
+SamplerState _uSampler_sampler : register(s0);
+
+static float4 FragColor;
+static float4 vInput;
+
+struct SPIRV_Cross_Input
+{
+    float4 vInput : TEXCOORD0;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    FragColor = vInput;
+    float4 _23 = uSampler.Sample(_uSampler_sampler, vInput.xy);
+    float4 _26 = ddx(vInput);
+    float4 _29 = ddy(vInput);
+    float4 _32 = fwidth(vInput);
+    float4 _35 = ddx_coarse(vInput);
+    float4 _38 = ddy_coarse(vInput);
+    float4 _41 = fwidth(vInput);
+    float4 _44 = ddx_fine(vInput);
+    float4 _47 = ddy_fine(vInput);
+    float4 _50 = fwidth(vInput);
+    float _56_tmp = uSampler.CalculateLevelOfDetail(_uSampler_sampler, vInput.zw);
+    if (vInput.y > 10.0f)
+    {
+        FragColor += _23;
+        FragColor += _26;
+        FragColor += _29;
+        FragColor += _32;
+        FragColor += _35;
+        FragColor += _38;
+        FragColor += _41;
+        FragColor += _44;
+        FragColor += _47;
+        FragColor += _50;
+        FragColor += float2(_56_tmp, _56_tmp).xyxy;
+    }
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    vInput = stage_input.vInput;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/opt/shaders-hlsl/frag/fp16.desktop.frag b/reference/opt/shaders-hlsl/frag/fp16.desktop.frag
new file mode 100644
index 0000000000..8ec30af16f
--- /dev/null
+++ b/reference/opt/shaders-hlsl/frag/fp16.desktop.frag
@@ -0,0 +1,45 @@
+static min16float4 v4;
+static min16float3 v3;
+static min16float v1;
+static min16float2 v2;
+static float o1;
+static float2 o2;
+static float3 o3;
+static float4 o4;
+
+struct SPIRV_Cross_Input
+{
+    min16float v1 : TEXCOORD0;
+    min16float2 v2 : TEXCOORD1;
+    min16float3 v3 : TEXCOORD2;
+    min16float4 v4 : TEXCOORD3;
+};
+
+struct SPIRV_Cross_Output
+{
+    float o1 : SV_Target0;
+    float2 o2 : SV_Target1;
+    float3 o3 : SV_Target2;
+    float4 o4 : SV_Target3;
+};
+
+void frag_main()
+{
+    min16float4 _324;
+    min16float4 _387 = modf(v4, _324);
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    v4 = stage_input.v4;
+    v3 = stage_input.v3;
+    v1 = stage_input.v1;
+    v2 = stage_input.v2;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.o1 = o1;
+    stage_output.o2 = o2;
+    stage_output.o3 = o3;
+    stage_output.o4 = o4;
+    return stage_output;
+}
diff --git a/reference/opt/shaders-hlsl/frag/image-query-selective.frag b/reference/opt/shaders-hlsl/frag/image-query-selective.frag
index c73b742b5a..9194d0de1b 100644
--- a/reference/opt/shaders-hlsl/frag/image-query-selective.frag
+++ b/reference/opt/shaders-hlsl/frag/image-query-selective.frag
@@ -2,6 +2,30 @@ Texture1D<uint4> uSampler1DUint : register(t0);
 SamplerState _uSampler1DUint_sampler : register(s0);
 Texture1D<int4> uSampler1DInt : register(t0);
 SamplerState _uSampler1DInt_sampler : register(s0);
+Texture1D<float4> uSampler1DFloat : register(t0);
+SamplerState _uSampler1DFloat_sampler : register(s0);
+Texture2DArray<int4> uSampler2DArray : register(t2);
+SamplerState _uSampler2DArray_sampler : register(s2);
+Texture3D<float4> uSampler3D : register(t3);
+SamplerState _uSampler3D_sampler : register(s3);
+TextureCube<float4> uSamplerCube : register(t4);
+SamplerState _uSamplerCube_sampler : register(s4);
+TextureCubeArray<uint4> uSamplerCubeArray : register(t5);
+SamplerState _uSamplerCubeArray_sampler : register(s5);
+Buffer<float4> uSamplerBuffer : register(t6);
+Texture2DMS<int4> uSamplerMS : register(t7);
+SamplerState _uSamplerMS_sampler : register(s7);
+Texture2DMSArray<float4> uSamplerMSArray : register(t8);
+SamplerState _uSamplerMSArray_sampler : register(s8);
+Texture2D<float4> uSampler2D : register(t1);
+SamplerState _uSampler2D_sampler : register(s1);
+
+uint SPIRV_Cross_textureSize(Texture1D<float4> Tex, uint Level, out uint Param)
+{
+    uint ret;
+    Tex.GetDimensions(Level, ret.x, Param);
+    return ret;
+}
 
 uint SPIRV_Cross_textureSize(Texture1D<int4> Tex, uint Level, out uint Param)
 {
@@ -17,10 +41,87 @@ uint SPIRV_Cross_textureSize(Texture1D<uint4> Tex, uint Level, out uint Param)
     return ret;
 }
 
+uint2 SPIRV_Cross_textureSize(Texture2D<float4> Tex, uint Level, out uint Param)
+{
+    uint2 ret;
+    Tex.GetDimensions(Level, ret.x, ret.y, Param);
+    return ret;
+}
+
+uint3 SPIRV_Cross_textureSize(Texture2DArray<int4> Tex, uint Level, out uint Param)
+{
+    uint3 ret;
+    Tex.GetDimensions(Level, ret.x, ret.y, ret.z, Param);
+    return ret;
+}
+
+uint3 SPIRV_Cross_textureSize(Texture3D<float4> Tex, uint Level, out uint Param)
+{
+    uint3 ret;
+    Tex.GetDimensions(Level, ret.x, ret.y, ret.z, Param);
+    return ret;
+}
+
+uint SPIRV_Cross_textureSize(Buffer<float4> Tex, uint Level, out uint Param)
+{
+    uint ret;
+    Tex.GetDimensions(ret.x);
+    Param = 0u;
+    return ret;
+}
+
+uint2 SPIRV_Cross_textureSize(TextureCube<float4> Tex, uint Level, out uint Param)
+{
+    uint2 ret;
+    Tex.GetDimensions(Level, ret.x, ret.y, Param);
+    return ret;
+}
+
+uint3 SPIRV_Cross_textureSize(TextureCubeArray<uint4> Tex, uint Level, out uint Param)
+{
+    uint3 ret;
+    Tex.GetDimensions(Level, ret.x, ret.y, ret.z, Param);
+    return ret;
+}
+
+uint2 SPIRV_Cross_textureSize(Texture2DMS<int4> Tex, uint Level, out uint Param)
+{
+    uint2 ret;
+    Tex.GetDimensions(ret.x, ret.y, Param);
+    return ret;
+}
+
+uint3 SPIRV_Cross_textureSize(Texture2DMSArray<float4> Tex, uint Level, out uint Param)
+{
+    uint3 ret;
+    Tex.GetDimensions(ret.x, ret.y, ret.z, Param);
+    return ret;
+}
+
 void frag_main()
 {
     uint _17_dummy_parameter;
     uint _24_dummy_parameter;
+    uint _32_dummy_parameter;
+    uint _42_dummy_parameter;
+    uint _50_dummy_parameter;
+    uint _60_dummy_parameter;
+    uint _68_dummy_parameter;
+    uint _76_dummy_parameter;
+    uint _84_dummy_parameter;
+    uint _92_dummy_parameter;
+    int _100;
+    SPIRV_Cross_textureSize(uSampler2D, 0u, _100);
+    int _104;
+    SPIRV_Cross_textureSize(uSampler2DArray, 0u, _104);
+    int _108;
+    SPIRV_Cross_textureSize(uSampler3D, 0u, _108);
+    int _112;
+    SPIRV_Cross_textureSize(uSamplerCube, 0u, _112);
+    int _116;
+    SPIRV_Cross_textureSize(uSamplerMS, 0u, _116);
+    int _120;
+    SPIRV_Cross_textureSize(uSamplerMSArray, 0u, _120);
 }
 
 void main()
diff --git a/reference/opt/shaders-hlsl/frag/image-query.frag b/reference/opt/shaders-hlsl/frag/image-query.frag
index 3b50282fe0..20d8c1597c 100644
--- a/reference/opt/shaders-hlsl/frag/image-query.frag
+++ b/reference/opt/shaders-hlsl/frag/image-query.frag
@@ -1,5 +1,112 @@
+Texture1D<float4> uSampler1D : register(t0);
+SamplerState _uSampler1D_sampler : register(s0);
+Texture2D<float4> uSampler2D : register(t1);
+SamplerState _uSampler2D_sampler : register(s1);
+Texture2DArray<float4> uSampler2DArray : register(t2);
+SamplerState _uSampler2DArray_sampler : register(s2);
+Texture3D<float4> uSampler3D : register(t3);
+SamplerState _uSampler3D_sampler : register(s3);
+TextureCube<float4> uSamplerCube : register(t4);
+SamplerState _uSamplerCube_sampler : register(s4);
+TextureCubeArray<float4> uSamplerCubeArray : register(t5);
+SamplerState _uSamplerCubeArray_sampler : register(s5);
+Buffer<float4> uSamplerBuffer : register(t6);
+Texture2DMS<float4> uSamplerMS : register(t7);
+SamplerState _uSamplerMS_sampler : register(s7);
+Texture2DMSArray<float4> uSamplerMSArray : register(t8);
+SamplerState _uSamplerMSArray_sampler : register(s8);
+
+uint SPIRV_Cross_textureSize(Texture1D<float4> Tex, uint Level, out uint Param)
+{
+    uint ret;
+    Tex.GetDimensions(Level, ret.x, Param);
+    return ret;
+}
+
+uint2 SPIRV_Cross_textureSize(Texture2D<float4> Tex, uint Level, out uint Param)
+{
+    uint2 ret;
+    Tex.GetDimensions(Level, ret.x, ret.y, Param);
+    return ret;
+}
+
+uint3 SPIRV_Cross_textureSize(Texture2DArray<float4> Tex, uint Level, out uint Param)
+{
+    uint3 ret;
+    Tex.GetDimensions(Level, ret.x, ret.y, ret.z, Param);
+    return ret;
+}
+
+uint3 SPIRV_Cross_textureSize(Texture3D<float4> Tex, uint Level, out uint Param)
+{
+    uint3 ret;
+    Tex.GetDimensions(Level, ret.x, ret.y, ret.z, Param);
+    return ret;
+}
+
+uint SPIRV_Cross_textureSize(Buffer<float4> Tex, uint Level, out uint Param)
+{
+    uint ret;
+    Tex.GetDimensions(ret.x);
+    Param = 0u;
+    return ret;
+}
+
+uint2 SPIRV_Cross_textureSize(TextureCube<float4> Tex, uint Level, out uint Param)
+{
+    uint2 ret;
+    Tex.GetDimensions(Level, ret.x, ret.y, Param);
+    return ret;
+}
+
+uint3 SPIRV_Cross_textureSize(TextureCubeArray<float4> Tex, uint Level, out uint Param)
+{
+    uint3 ret;
+    Tex.GetDimensions(Level, ret.x, ret.y, ret.z, Param);
+    return ret;
+}
+
+uint2 SPIRV_Cross_textureSize(Texture2DMS<float4> Tex, uint Level, out uint Param)
+{
+    uint2 ret;
+    Tex.GetDimensions(ret.x, ret.y, Param);
+    return ret;
+}
+
+uint3 SPIRV_Cross_textureSize(Texture2DMSArray<float4> Tex, uint Level, out uint Param)
+{
+    uint3 ret;
+    Tex.GetDimensions(ret.x, ret.y, ret.z, Param);
+    return ret;
+}
+
 void frag_main()
 {
+    uint _17_dummy_parameter;
+    uint _27_dummy_parameter;
+    uint _37_dummy_parameter;
+    uint _45_dummy_parameter;
+    uint _53_dummy_parameter;
+    uint _61_dummy_parameter;
+    uint _69_dummy_parameter;
+    uint _77_dummy_parameter;
+    uint _85_dummy_parameter;
+    int _89;
+    SPIRV_Cross_textureSize(uSampler1D, 0u, _89);
+    int _93;
+    SPIRV_Cross_textureSize(uSampler2D, 0u, _93);
+    int _97;
+    SPIRV_Cross_textureSize(uSampler2DArray, 0u, _97);
+    int _101;
+    SPIRV_Cross_textureSize(uSampler3D, 0u, _101);
+    int _105;
+    SPIRV_Cross_textureSize(uSamplerCube, 0u, _105);
+    int _109;
+    SPIRV_Cross_textureSize(uSamplerCubeArray, 0u, _109);
+    int _113;
+    SPIRV_Cross_textureSize(uSamplerMS, 0u, _113);
+    int _117;
+    SPIRV_Cross_textureSize(uSamplerMSArray, 0u, _117);
 }
 
 void main()
diff --git a/reference/opt/shaders-hlsl/frag/partial-write-preserve.frag b/reference/opt/shaders-hlsl/frag/partial-write-preserve.frag
index 20da99c336..3b50282fe0 100644
--- a/reference/opt/shaders-hlsl/frag/partial-write-preserve.frag
+++ b/reference/opt/shaders-hlsl/frag/partial-write-preserve.frag
@@ -1,9 +1,3 @@
-struct B
-{
-    float a;
-    float b;
-};
-
 void frag_main()
 {
 }
diff --git a/reference/opt/shaders-hlsl/frag/resources.frag b/reference/opt/shaders-hlsl/frag/resources.frag
index 24b93c239c..c8558e2778 100644
--- a/reference/opt/shaders-hlsl/frag/resources.frag
+++ b/reference/opt/shaders-hlsl/frag/resources.frag
@@ -4,7 +4,7 @@ cbuffer cbuf : register(b3)
 };
 cbuffer registers
 {
-    float4 registers_d : packoffset(c0);
+    float4 registers_a : packoffset(c0);
 };
 Texture2D<float4> uSampledImage : register(t4);
 SamplerState _uSampledImage_sampler : register(s4);
@@ -26,7 +26,7 @@ struct SPIRV_Cross_Output
 
 void frag_main()
 {
-    FragColor = (uSampledImage.Sample(_uSampledImage_sampler, vTex) + uTexture.Sample(uSampler, vTex)) + (cbuf_a + registers_d);
+    FragColor = (uSampledImage.Sample(_uSampledImage_sampler, vTex) + uTexture.Sample(uSampler, vTex)) + (cbuf_a + registers_a);
 }
 
 SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
diff --git a/reference/opt/shaders-hlsl/frag/row-major-layout-in-struct.frag b/reference/opt/shaders-hlsl/frag/row-major-layout-in-struct.frag
new file mode 100644
index 0000000000..8576b0f9d3
--- /dev/null
+++ b/reference/opt/shaders-hlsl/frag/row-major-layout-in-struct.frag
@@ -0,0 +1,37 @@
+struct Foo
+{
+    row_major float4x4 v;
+    row_major float4x4 w;
+};
+
+cbuffer _17 : register(b0)
+{
+    Foo _17_foo : packoffset(c0);
+};
+
+static float4 FragColor;
+static float4 vUV;
+
+struct SPIRV_Cross_Input
+{
+    float4 vUV : TEXCOORD0;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    FragColor = mul(mul(vUV, _17_foo.w), _17_foo.v);
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    vUV = stage_input.vUV;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/opt/shaders-hlsl/frag/sample-cmp-level-zero.frag b/reference/opt/shaders-hlsl/frag/sample-cmp-level-zero.frag
index 6f5ae7e38c..b6e91ce7b6 100644
--- a/reference/opt/shaders-hlsl/frag/sample-cmp-level-zero.frag
+++ b/reference/opt/shaders-hlsl/frag/sample-cmp-level-zero.frag
@@ -41,9 +41,7 @@ void frag_main()
 {
     float4 _80 = vDirRef;
     _80.z = vDirRef.w;
-    float4 _87 = vDirRef;
-    _87.z = vDirRef.w;
-    FragColor = (((((((uSampler2D.SampleCmp(_uSampler2D_sampler, vUVRef.xy, vUVRef.z, int2(-1, -1)) + uSampler2DArray.SampleCmp(_uSampler2DArray_sampler, vDirRef.xyz, vDirRef.w, int2(-1, -1))) + uSamplerCube.SampleCmp(_uSamplerCube_sampler, vDirRef.xyz, vDirRef.w)) + uSamplerCubeArray.SampleCmp(_uSamplerCubeArray_sampler, vDirRef, 0.5f)) + uSampler2D.SampleCmpLevelZero(_uSampler2D_sampler, vUVRef.xy, vUVRef.z, int2(-1, -1))) + uSampler2DArray.SampleCmpLevelZero(_uSampler2DArray_sampler, vDirRef.xyz, vDirRef.w, int2(-1, -1))) + uSamplerCube.SampleCmpLevelZero(_uSamplerCube_sampler, vDirRef.xyz, vDirRef.w)) + uSampler2D.SampleCmp(_uSampler2D_sampler, SPIRV_Cross_projectTextureCoordinate(_80.xyz), vDirRef.z, int2(1, 1))) + uSampler2D.SampleCmpLevelZero(_uSampler2D_sampler, SPIRV_Cross_projectTextureCoordinate(_87.xyz), vDirRef.z, int2(1, 1));
+    FragColor = (((((((uSampler2D.SampleCmp(_uSampler2D_sampler, vUVRef.xy, vUVRef.z, int2(-1, -1)) + uSampler2DArray.SampleCmp(_uSampler2DArray_sampler, vDirRef.xyz, vDirRef.w, int2(-1, -1))) + uSamplerCube.SampleCmp(_uSamplerCube_sampler, vDirRef.xyz, vDirRef.w)) + uSamplerCubeArray.SampleCmp(_uSamplerCubeArray_sampler, vDirRef, 0.5f)) + uSampler2D.SampleCmpLevelZero(_uSampler2D_sampler, vUVRef.xy, vUVRef.z, int2(-1, -1))) + uSampler2DArray.SampleCmpLevelZero(_uSampler2DArray_sampler, vDirRef.xyz, vDirRef.w, int2(-1, -1))) + uSamplerCube.SampleCmpLevelZero(_uSamplerCube_sampler, vDirRef.xyz, vDirRef.w)) + uSampler2D.SampleCmp(_uSampler2D_sampler, SPIRV_Cross_projectTextureCoordinate(_80.xyz), vDirRef.z, int2(1, 1))) + uSampler2D.SampleCmpLevelZero(_uSampler2D_sampler, SPIRV_Cross_projectTextureCoordinate(_80.xyz), vDirRef.z, int2(1, 1));
 }
 
 SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
diff --git a/reference/opt/shaders-hlsl/frag/sampler-array.frag b/reference/opt/shaders-hlsl/frag/sampler-array.frag
index 5b8e492de6..1eced29be0 100644
--- a/reference/opt/shaders-hlsl/frag/sampler-array.frag
+++ b/reference/opt/shaders-hlsl/frag/sampler-array.frag
@@ -17,7 +17,8 @@ struct SPIRV_Cross_Input
 
 void frag_main()
 {
-    uImage[vIndex][int2(gl_FragCoord.xy)] = ((uCombined[vIndex].Sample(_uCombined_sampler[vIndex], vTex) + uTex[vIndex].Sample(uSampler[vIndex], vTex)) + (uCombined[vIndex + 1].Sample(_uCombined_sampler[vIndex + 1], vTex))) + (uTex[vIndex + 1].Sample(uSampler[vIndex + 1], vTex));
+    int _72 = vIndex + 1;
+    uImage[vIndex][int2(gl_FragCoord.xy)] = ((uCombined[vIndex].Sample(_uCombined_sampler[vIndex], vTex) + uTex[vIndex].Sample(uSampler[vIndex], vTex)) + uCombined[_72].Sample(_uCombined_sampler[_72], vTex)) + uTex[_72].Sample(uSampler[_72], vTex);
 }
 
 void main(SPIRV_Cross_Input stage_input)
diff --git a/reference/opt/shaders-hlsl/frag/sampler-image-arrays.frag b/reference/opt/shaders-hlsl/frag/sampler-image-arrays.frag
new file mode 100644
index 0000000000..b6d0e9421c
--- /dev/null
+++ b/reference/opt/shaders-hlsl/frag/sampler-image-arrays.frag
@@ -0,0 +1,39 @@
+Texture2D<float4> uSampler[4] : register(t0);
+SamplerState _uSampler_sampler[4] : register(s0);
+Texture2D<float4> uTextures[4] : register(t8);
+SamplerState uSamplers[4] : register(s4);
+
+static int vIndex;
+static float2 vTex;
+static float4 FragColor;
+
+struct SPIRV_Cross_Input
+{
+    nointerpolation float2 vTex : TEXCOORD0;
+    nointerpolation int vIndex : TEXCOORD1;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    FragColor = 0.0f.xxxx;
+    FragColor += uTextures[2].Sample(uSamplers[1], vTex);
+    FragColor += uSampler[vIndex].Sample(_uSampler_sampler[vIndex], vTex);
+    FragColor += uSampler[vIndex].Sample(_uSampler_sampler[vIndex], vTex + 0.100000001490116119384765625f.xx);
+    FragColor += uSampler[vIndex].Sample(_uSampler_sampler[vIndex], vTex + 0.20000000298023223876953125f.xx);
+    FragColor += uSampler[3].Sample(_uSampler_sampler[3], vTex + 0.300000011920928955078125f.xx);
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    vIndex = stage_input.vIndex;
+    vTex = stage_input.vTex;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/opt/shaders-hlsl/frag/spec-constant.frag b/reference/opt/shaders-hlsl/frag/spec-constant.frag
deleted file mode 100644
index 781e3f20b8..0000000000
--- a/reference/opt/shaders-hlsl/frag/spec-constant.frag
+++ /dev/null
@@ -1,33 +0,0 @@
-static const float a = 1.0f;
-static const float b = 2.0f;
-static const int c = 3;
-static const int d = 4;
-
-struct Foo
-{
-    float elems[(d + 2)];
-};
-
-static float4 FragColor;
-
-struct SPIRV_Cross_Output
-{
-    float4 FragColor : SV_Target0;
-};
-
-void frag_main()
-{
-    float vec0[(c + 3)][8];
-    vec0[0][0] = 10.0f;
-    Foo foo;
-    foo.elems[c] = 10.0f;
-    FragColor = (((a + b).xxxx + vec0[0][0].xxxx) + 20.0f.xxxx) + foo.elems[c].xxxx;
-}
-
-SPIRV_Cross_Output main()
-{
-    frag_main();
-    SPIRV_Cross_Output stage_output;
-    stage_output.FragColor = FragColor;
-    return stage_output;
-}
diff --git a/reference/opt/shaders-hlsl/frag/tex-sampling-ms.frag b/reference/opt/shaders-hlsl/frag/tex-sampling-ms.frag
new file mode 100644
index 0000000000..ca88cfaeb3
--- /dev/null
+++ b/reference/opt/shaders-hlsl/frag/tex-sampling-ms.frag
@@ -0,0 +1,33 @@
+Texture2DMS<float4> uTex : register(t0);
+SamplerState _uTex_sampler : register(s0);
+
+static float4 gl_FragCoord;
+static float4 FragColor;
+
+struct SPIRV_Cross_Input
+{
+    float4 gl_FragCoord : SV_Position;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    int2 _22 = int2(gl_FragCoord.xy);
+    FragColor = uTex.Load(_22, 0);
+    FragColor += uTex.Load(_22, 1);
+    FragColor += uTex.Load(_22, 2);
+    FragColor += uTex.Load(_22, 3);
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    gl_FragCoord = stage_input.gl_FragCoord;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/opt/shaders-hlsl/frag/tex-sampling.frag b/reference/opt/shaders-hlsl/frag/tex-sampling.frag
index 6ebca5d8d1..74e9c3ba4a 100644
--- a/reference/opt/shaders-hlsl/frag/tex-sampling.frag
+++ b/reference/opt/shaders-hlsl/frag/tex-sampling.frag
@@ -1,27 +1,27 @@
-Texture1D<float4> tex1d;
-SamplerState _tex1d_sampler;
-Texture2D<float4> tex2d;
-SamplerState _tex2d_sampler;
-Texture3D<float4> tex3d;
-SamplerState _tex3d_sampler;
-TextureCube<float4> texCube;
-SamplerState _texCube_sampler;
-Texture1D<float4> tex1dShadow;
-SamplerComparisonState _tex1dShadow_sampler;
-Texture2D<float4> tex2dShadow;
-SamplerComparisonState _tex2dShadow_sampler;
-TextureCube<float4> texCubeShadow;
-SamplerComparisonState _texCubeShadow_sampler;
-Texture1DArray<float4> tex1dArray;
-SamplerState _tex1dArray_sampler;
-Texture2DArray<float4> tex2dArray;
-SamplerState _tex2dArray_sampler;
-TextureCubeArray<float4> texCubeArray;
-SamplerState _texCubeArray_sampler;
-Texture2D<float4> separateTex2d;
-SamplerState samplerNonDepth;
-Texture2D<float4> separateTex2dDepth;
-SamplerComparisonState samplerDepth;
+Texture1D<float4> tex1d : register(t0);
+SamplerState _tex1d_sampler : register(s0);
+Texture2D<float4> tex2d : register(t1);
+SamplerState _tex2d_sampler : register(s1);
+Texture3D<float4> tex3d : register(t2);
+SamplerState _tex3d_sampler : register(s2);
+TextureCube<float4> texCube : register(t3);
+SamplerState _texCube_sampler : register(s3);
+Texture1D<float4> tex1dShadow : register(t4);
+SamplerComparisonState _tex1dShadow_sampler : register(s4);
+Texture2D<float4> tex2dShadow : register(t5);
+SamplerComparisonState _tex2dShadow_sampler : register(s5);
+TextureCube<float4> texCubeShadow : register(t6);
+SamplerComparisonState _texCubeShadow_sampler : register(s6);
+Texture1DArray<float4> tex1dArray : register(t7);
+SamplerState _tex1dArray_sampler : register(s7);
+Texture2DArray<float4> tex2dArray : register(t8);
+SamplerState _tex2dArray_sampler : register(s8);
+TextureCubeArray<float4> texCubeArray : register(t9);
+SamplerState _texCubeArray_sampler : register(s9);
+Texture2D<float4> separateTex2d : register(t12);
+SamplerState samplerNonDepth : register(s11);
+Texture2D<float4> separateTex2dDepth : register(t13);
+SamplerComparisonState samplerDepth : register(s10);
 
 static float texCoord1d;
 static float2 texCoord2d;
@@ -60,14 +60,8 @@ float3 SPIRV_Cross_projectTextureCoordinate(float4 coord)
 void frag_main()
 {
     float4 _162 = (((((((((((((((((((tex1d.Sample(_tex1d_sampler, texCoord1d) + tex1d.Sample(_tex1d_sampler, texCoord1d, 1)) + tex1d.SampleLevel(_tex1d_sampler, texCoord1d, 2.0f)) + tex1d.SampleGrad(_tex1d_sampler, texCoord1d, 1.0f, 2.0f)) + tex1d.Sample(_tex1d_sampler, SPIRV_Cross_projectTextureCoordinate(float2(texCoord1d, 2.0f)))) + tex1d.SampleBias(_tex1d_sampler, texCoord1d, 1.0f)) + tex2d.Sample(_tex2d_sampler, texCoord2d)) + tex2d.Sample(_tex2d_sampler, texCoord2d, int2(1, 2))) + tex2d.SampleLevel(_tex2d_sampler, texCoord2d, 2.0f)) + tex2d.SampleGrad(_tex2d_sampler, texCoord2d, float2(1.0f, 2.0f), float2(3.0f, 4.0f))) + tex2d.Sample(_tex2d_sampler, SPIRV_Cross_projectTextureCoordinate(float3(texCoord2d, 2.0f)))) + tex2d.SampleBias(_tex2d_sampler, texCoord2d, 1.0f)) + tex3d.Sample(_tex3d_sampler, texCoord3d)) + tex3d.Sample(_tex3d_sampler, texCoord3d, int3(1, 2, 3))) + tex3d.SampleLevel(_tex3d_sampler, texCoord3d, 2.0f)) + tex3d.SampleGrad(_tex3d_sampler, texCoord3d, float3(1.0f, 2.0f, 3.0f), float3(4.0f, 5.0f, 6.0f))) + tex3d.Sample(_tex3d_sampler, SPIRV_Cross_projectTextureCoordinate(float4(texCoord3d, 2.0f)))) + tex3d.SampleBias(_tex3d_sampler, texCoord3d, 1.0f)) + texCube.Sample(_texCube_sampler, texCoord3d)) + texCube.SampleLevel(_texCube_sampler, texCoord3d, 2.0f)) + texCube.SampleBias(_texCube_sampler, texCoord3d, 1.0f);
-    float _178 = _162.w + tex1dShadow.SampleCmp(_tex1dShadow_sampler, float3(texCoord1d, 0.0f, 0.0f).x, 0.0f);
-    float4 _327 = _162;
-    _327.w = _178;
-    float _193 = _178 + tex2dShadow.SampleCmp(_tex2dShadow_sampler, float3(texCoord2d, 0.0f).xy, 0.0f);
-    float4 _331 = _327;
-    _331.w = _193;
-    float4 _335 = _331;
-    _335.w = _193 + texCubeShadow.SampleCmp(_texCubeShadow_sampler, float4(texCoord3d, 0.0f).xyz, 0.0f);
+    float4 _335 = _162;
+    _335.w = ((_162.w + tex1dShadow.SampleCmp(_tex1dShadow_sampler, float3(texCoord1d, 0.0f, 0.0f).x, 0.0f)) + tex2dShadow.SampleCmp(_tex2dShadow_sampler, float3(texCoord2d, 0.0f).xy, 0.0f)) + texCubeShadow.SampleCmp(_texCubeShadow_sampler, float4(texCoord3d, 0.0f).xyz, 0.0f);
     float4 _308 = ((((((((((((((_335 + tex1dArray.Sample(_tex1dArray_sampler, texCoord2d)) + tex2dArray.Sample(_tex2dArray_sampler, texCoord3d)) + texCubeArray.Sample(_texCubeArray_sampler, texCoord4d)) + tex2d.GatherRed(_tex2d_sampler, texCoord2d)) + tex2d.GatherRed(_tex2d_sampler, texCoord2d)) + tex2d.GatherGreen(_tex2d_sampler, texCoord2d)) + tex2d.GatherBlue(_tex2d_sampler, texCoord2d)) + tex2d.GatherAlpha(_tex2d_sampler, texCoord2d)) + tex2d.GatherRed(_tex2d_sampler, texCoord2d, int2(1, 1))) + tex2d.GatherRed(_tex2d_sampler, texCoord2d, int2(1, 1))) + tex2d.GatherGreen(_tex2d_sampler, texCoord2d, int2(1, 1))) + tex2d.GatherBlue(_tex2d_sampler, texCoord2d, int2(1, 1))) + tex2d.GatherAlpha(_tex2d_sampler, texCoord2d, int2(1, 1))) + tex2d.Load(int3(int2(1, 2), 0))) + separateTex2d.Sample(samplerNonDepth, texCoord2d);
     float4 _339 = _308;
     _339.w = _308.w + separateTex2dDepth.SampleCmp(samplerDepth, texCoord3d.xy, texCoord3d.z);
diff --git a/reference/opt/shaders-hlsl/frag/unary-enclose.frag b/reference/opt/shaders-hlsl/frag/unary-enclose.frag
index 76e98a66d0..348b91c172 100644
--- a/reference/opt/shaders-hlsl/frag/unary-enclose.frag
+++ b/reference/opt/shaders-hlsl/frag/unary-enclose.frag
@@ -15,7 +15,7 @@ struct SPIRV_Cross_Output
 
 void frag_main()
 {
-    FragColor = -(-vIn);
+    FragColor = vIn;
 }
 
 SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
diff --git a/reference/opt/shaders-hlsl/vert/locations.vert b/reference/opt/shaders-hlsl/vert/locations.vert
index ba36c4ae39..b007582c2a 100644
--- a/reference/opt/shaders-hlsl/vert/locations.vert
+++ b/reference/opt/shaders-hlsl/vert/locations.vert
@@ -5,6 +5,8 @@ struct Foo
     float3 c;
 };
 
+static const Foo _71 = { 1.0f.xxx, 1.0f.xxx, 1.0f.xxx };
+
 static float4 gl_Position;
 static float4 Input2;
 static float4 Input4;
@@ -40,8 +42,6 @@ struct SPIRV_Cross_Output
     float4 gl_Position : SV_Position;
 };
 
-Foo _70;
-
 void vert_main()
 {
     gl_Position = ((1.0f.xxxx + Input2) + Input4) + Input0;
@@ -49,13 +49,7 @@ void vert_main()
     vLocation1 = 1.0f;
     vLocation2[0] = 2.0f;
     vLocation2[1] = 2.0f;
-    Foo _65 = _70;
-    _65.a = 1.0f.xxx;
-    Foo _67 = _65;
-    _67.b = 1.0f.xxx;
-    Foo _69 = _67;
-    _69.c = 1.0f.xxx;
-    vLocation4 = _69;
+    vLocation4 = _71;
     vLocation9 = 9.0f;
     vout.color = 2.0f.xxx;
     vout.foo = 4.0f.xxx;
diff --git a/reference/opt/shaders-hlsl/vert/read-from-row-major-array.vert b/reference/opt/shaders-hlsl/vert/read-from-row-major-array.vert
new file mode 100644
index 0000000000..dde648e6d5
--- /dev/null
+++ b/reference/opt/shaders-hlsl/vert/read-from-row-major-array.vert
@@ -0,0 +1,35 @@
+cbuffer _104 : register(b0)
+{
+    column_major float2x3 _104_var[3][4] : packoffset(c0);
+};
+
+static float4 gl_Position;
+static float4 a_position;
+static float v_vtxResult;
+
+struct SPIRV_Cross_Input
+{
+    float4 a_position : TEXCOORD0;
+};
+
+struct SPIRV_Cross_Output
+{
+    float v_vtxResult : TEXCOORD0;
+    float4 gl_Position : SV_Position;
+};
+
+void vert_main()
+{
+    gl_Position = a_position;
+    v_vtxResult = ((float(abs(_104_var[0][0][0].x - 2.0f) < 0.0500000007450580596923828125f) * float(abs(_104_var[0][0][0].y - 6.0f) < 0.0500000007450580596923828125f)) * float(abs(_104_var[0][0][0].z - (-6.0f)) < 0.0500000007450580596923828125f)) * ((float(abs(_104_var[0][0][1].x) < 0.0500000007450580596923828125f) * float(abs(_104_var[0][0][1].y - 5.0f) < 0.0500000007450580596923828125f)) * float(abs(_104_var[0][0][1].z - 5.0f) < 0.0500000007450580596923828125f));
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    a_position = stage_input.a_position;
+    vert_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.gl_Position = gl_Position;
+    stage_output.v_vtxResult = v_vtxResult;
+    return stage_output;
+}
diff --git a/reference/opt/shaders-hlsl/vert/return-array.vert b/reference/opt/shaders-hlsl/vert/return-array.vert
index 902033b017..bd15755633 100644
--- a/reference/opt/shaders-hlsl/vert/return-array.vert
+++ b/reference/opt/shaders-hlsl/vert/return-array.vert
@@ -1,5 +1,3 @@
-static const float4 _20[2] = { 10.0f.xxxx, 20.0f.xxxx };
-
 static float4 gl_Position;
 static float4 vInput0;
 static float4 vInput1;
diff --git a/reference/opt/shaders-msl/asm/comp/bitcast_sar.asm.comp b/reference/opt/shaders-msl/asm/comp/bitcast_sar.asm.comp
index 20d6fe9e9d..4176830588 100644
--- a/reference/opt/shaders-msl/asm/comp/bitcast_sar.asm.comp
+++ b/reference/opt/shaders-msl/asm/comp/bitcast_sar.asm.comp
@@ -17,13 +17,15 @@ struct _4
 
 kernel void main0(device _3& _5 [[buffer(0)]], device _4& _6 [[buffer(1)]])
 {
-    _6._m0 = uint4(int4(_5._m1) >> _5._m0);
-    _6._m0 = uint4(_5._m0 >> int4(_5._m1));
-    _6._m0 = uint4(int4(_5._m1) >> int4(_5._m1));
-    _6._m0 = uint4(_5._m0 >> _5._m0);
-    _6._m1 = int4(_5._m1) >> int4(_5._m1);
-    _6._m1 = _5._m0 >> _5._m0;
-    _6._m1 = int4(_5._m1) >> _5._m0;
-    _6._m1 = _5._m0 >> int4(_5._m1);
+    int4 _22 = _5._m0;
+    uint4 _23 = _5._m1;
+    _6._m0 = uint4(int4(_23) >> _22);
+    _6._m0 = uint4(_22 >> int4(_23));
+    _6._m0 = uint4(int4(_23) >> int4(_23));
+    _6._m0 = uint4(_22 >> _22);
+    _6._m1 = int4(_23) >> int4(_23);
+    _6._m1 = _22 >> _22;
+    _6._m1 = int4(_23) >> _22;
+    _6._m1 = _22 >> int4(_23);
 }
 
diff --git a/reference/opt/shaders-msl/asm/comp/bitcast_sdiv.asm.comp b/reference/opt/shaders-msl/asm/comp/bitcast_sdiv.asm.comp
index f18b318bbb..6b80dff310 100644
--- a/reference/opt/shaders-msl/asm/comp/bitcast_sdiv.asm.comp
+++ b/reference/opt/shaders-msl/asm/comp/bitcast_sdiv.asm.comp
@@ -17,13 +17,15 @@ struct _4
 
 kernel void main0(device _3& _5 [[buffer(0)]], device _4& _6 [[buffer(1)]])
 {
-    _6._m0 = uint4(int4(_5._m1) / _5._m0);
-    _6._m0 = uint4(_5._m0 / int4(_5._m1));
-    _6._m0 = uint4(int4(_5._m1) / int4(_5._m1));
-    _6._m0 = uint4(_5._m0 / _5._m0);
-    _6._m1 = int4(_5._m1) / int4(_5._m1);
-    _6._m1 = _5._m0 / _5._m0;
-    _6._m1 = int4(_5._m1) / _5._m0;
-    _6._m1 = _5._m0 / int4(_5._m1);
+    int4 _22 = _5._m0;
+    uint4 _23 = _5._m1;
+    _6._m0 = uint4(int4(_23) / _22);
+    _6._m0 = uint4(_22 / int4(_23));
+    _6._m0 = uint4(int4(_23) / int4(_23));
+    _6._m0 = uint4(_22 / _22);
+    _6._m1 = int4(_23) / int4(_23);
+    _6._m1 = _22 / _22;
+    _6._m1 = int4(_23) / _22;
+    _6._m1 = _22 / int4(_23);
 }
 
diff --git a/reference/opt/shaders-msl/asm/comp/bitcast_slr.asm.comp b/reference/opt/shaders-msl/asm/comp/bitcast_slr.asm.comp
index 9fd60bef26..1dfca39181 100644
--- a/reference/opt/shaders-msl/asm/comp/bitcast_slr.asm.comp
+++ b/reference/opt/shaders-msl/asm/comp/bitcast_slr.asm.comp
@@ -17,13 +17,15 @@ struct _4
 
 kernel void main0(device _3& _5 [[buffer(0)]], device _4& _6 [[buffer(1)]])
 {
-    _6._m0 = _5._m1 >> uint4(_5._m0);
-    _6._m0 = uint4(_5._m0) >> _5._m1;
-    _6._m0 = _5._m1 >> _5._m1;
-    _6._m0 = uint4(_5._m0) >> uint4(_5._m0);
-    _6._m1 = int4(_5._m1 >> _5._m1);
-    _6._m1 = int4(uint4(_5._m0) >> uint4(_5._m0));
-    _6._m1 = int4(_5._m1 >> uint4(_5._m0));
-    _6._m1 = int4(uint4(_5._m0) >> _5._m1);
+    int4 _22 = _5._m0;
+    uint4 _23 = _5._m1;
+    _6._m0 = _23 >> uint4(_22);
+    _6._m0 = uint4(_22) >> _23;
+    _6._m0 = _23 >> _23;
+    _6._m0 = uint4(_22) >> uint4(_22);
+    _6._m1 = int4(_23 >> _23);
+    _6._m1 = int4(uint4(_22) >> uint4(_22));
+    _6._m1 = int4(_23 >> uint4(_22));
+    _6._m1 = int4(uint4(_22) >> _23);
 }
 
diff --git a/reference/shaders-msl/asm/comp/storage-buffer-basic.asm.comp b/reference/opt/shaders-msl/asm/comp/storage-buffer-basic.invalid.asm.comp
similarity index 100%
rename from reference/shaders-msl/asm/comp/storage-buffer-basic.asm.comp
rename to reference/opt/shaders-msl/asm/comp/storage-buffer-basic.invalid.asm.comp
index 9e37362dbd..2c9b038b20 100644
--- a/reference/shaders-msl/asm/comp/storage-buffer-basic.asm.comp
+++ b/reference/opt/shaders-msl/asm/comp/storage-buffer-basic.invalid.asm.comp
@@ -16,7 +16,7 @@ struct _6
 
 kernel void main0(device _6& _8 [[buffer(0)]], device _6& _9 [[buffer(1)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
 {
-    uint3 _23 = gl_WorkGroupSize;
     _8._m0[gl_WorkGroupID.x] = _9._m0[gl_WorkGroupID.x] + _8._m0[gl_WorkGroupID.x];
+    uint3 _23 = gl_WorkGroupSize;
 }
 
diff --git a/reference/opt/shaders-msl/asm/frag/combined-sampler-reuse.asm.frag b/reference/opt/shaders-msl/asm/frag/combined-sampler-reuse.asm.frag
new file mode 100644
index 0000000000..e420153bf1
--- /dev/null
+++ b/reference/opt/shaders-msl/asm/frag/combined-sampler-reuse.asm.frag
@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float2 vUV [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], texture2d<float> uTex [[texture(1)]], sampler uSampler [[sampler(0)]])
+{
+    main0_out out = {};
+    out.FragColor = uTex.sample(uSampler, in.vUV);
+    out.FragColor += uTex.sample(uSampler, in.vUV, int2(1));
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/asm/frag/default-member-names.asm.frag b/reference/opt/shaders-msl/asm/frag/default-member-names.asm.frag
index 1c730c7bbc..e9573a019a 100644
--- a/reference/opt/shaders-msl/asm/frag/default-member-names.asm.frag
+++ b/reference/opt/shaders-msl/asm/frag/default-member-names.asm.frag
@@ -3,29 +3,7 @@
 
 using namespace metal;
 
-struct _9
-{
-    float _m0;
-};
-
-struct _10
-{
-    float _m0;
-    float _m1;
-    float _m2;
-    float _m3;
-    float _m4;
-    float _m5;
-    float _m6;
-    float _m7;
-    float _m8;
-    float _m9;
-    float _m10;
-    float _m11;
-    _9 _m12;
-};
-
-constant _10 _51 = {};
+constant float _57 = {};
 
 struct main0_out
 {
@@ -35,7 +13,7 @@ struct main0_out
 fragment main0_out main0()
 {
     main0_out out = {};
-    out.m_3 = float4(_51._m0, _51._m1, _51._m2, _51._m3);
+    out.m_3 = float4(_57);
     return out;
 }
 
diff --git a/reference/shaders-msl/asm/vert/empty-struct-composite.asm.vert b/reference/opt/shaders-msl/asm/frag/empty-struct.asm.frag
similarity index 77%
rename from reference/shaders-msl/asm/vert/empty-struct-composite.asm.vert
rename to reference/opt/shaders-msl/asm/frag/empty-struct.asm.frag
index 9e024c2095..92ac1d9f83 100644
--- a/reference/shaders-msl/asm/vert/empty-struct-composite.asm.vert
+++ b/reference/opt/shaders-msl/asm/frag/empty-struct.asm.frag
@@ -3,7 +3,7 @@
 
 using namespace metal;
 
-vertex void main0()
+fragment void main0()
 {
 }
 
diff --git a/reference/opt/shaders-msl/asm/frag/frem.asm.frag b/reference/opt/shaders-msl/asm/frag/frem.asm.frag
index f7c1f2ce88..ebc73d52df 100644
--- a/reference/opt/shaders-msl/asm/frag/frem.asm.frag
+++ b/reference/opt/shaders-msl/asm/frag/frem.asm.frag
@@ -3,17 +3,17 @@
 
 using namespace metal;
 
-struct main0_in
-{
-    float4 vB [[user(locn1)]];
-    float4 vA [[user(locn0)]];
-};
-
 struct main0_out
 {
     float4 FragColor [[color(0)]];
 };
 
+struct main0_in
+{
+    float4 vA [[user(locn0)]];
+    float4 vB [[user(locn1)]];
+};
+
 fragment main0_out main0(main0_in in [[stage_in]])
 {
     main0_out out = {};
diff --git a/reference/opt/shaders-msl/asm/frag/function-overload-alias.asm.frag b/reference/opt/shaders-msl/asm/frag/function-overload-alias.asm.frag
index 624a3e4807..64edee8722 100644
--- a/reference/opt/shaders-msl/asm/frag/function-overload-alias.asm.frag
+++ b/reference/opt/shaders-msl/asm/frag/function-overload-alias.asm.frag
@@ -11,7 +11,7 @@ struct main0_out
 fragment main0_out main0()
 {
     main0_out out = {};
-    out.FragColor = (((float4(1.0) + float4(1.0)) + (float3(1.0).xyzz + float4(1.0))) + (float4(1.0) + float4(2.0))) + (float2(1.0).xyxy + float4(2.0));
+    out.FragColor = float4(10.0);
     return out;
 }
 
diff --git a/reference/opt/shaders-msl/asm/frag/image-extract-reuse.asm.frag b/reference/opt/shaders-msl/asm/frag/image-extract-reuse.asm.frag
new file mode 100644
index 0000000000..0d691b306d
--- /dev/null
+++ b/reference/opt/shaders-msl/asm/frag/image-extract-reuse.asm.frag
@@ -0,0 +1,17 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    int2 Size [[color(0)]];
+};
+
+fragment main0_out main0(texture2d<float> uTexture [[texture(0)]], sampler uTextureSmplr [[sampler(0)]])
+{
+    main0_out out = {};
+    out.Size = int2(uTexture.get_width(), uTexture.get_height()) + int2(uTexture.get_width(1), uTexture.get_height(1));
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/asm/frag/implicit-read-dep-phi.asm.frag b/reference/opt/shaders-msl/asm/frag/implicit-read-dep-phi.asm.frag
new file mode 100644
index 0000000000..dd977a99da
--- /dev/null
+++ b/reference/opt/shaders-msl/asm/frag/implicit-read-dep-phi.asm.frag
@@ -0,0 +1,50 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float4 v0 [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], texture2d<float> uImage [[texture(0)]], sampler uImageSmplr [[sampler(0)]])
+{
+    main0_out out = {};
+    float phi;
+    float4 _36;
+    int _51;
+    _51 = 0;
+    phi = 1.0;
+    _36 = float4(1.0, 2.0, 1.0, 2.0);
+    for (;;)
+    {
+        out.FragColor = _36;
+        if (_51 < 4)
+        {
+            if (in.v0[_51] > 0.0)
+            {
+                float2 _48 = float2(phi);
+                _51++;
+                phi += 2.0;
+                _36 = uImage.sample(uImageSmplr, _48, level(0.0));
+                continue;
+            }
+            else
+            {
+                break;
+            }
+        }
+        else
+        {
+            break;
+        }
+    }
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/asm/frag/op-constant-null.asm.frag b/reference/opt/shaders-msl/asm/frag/op-constant-null.asm.frag
index 9472add395..e1badb517a 100644
--- a/reference/opt/shaders-msl/asm/frag/op-constant-null.asm.frag
+++ b/reference/opt/shaders-msl/asm/frag/op-constant-null.asm.frag
@@ -3,12 +3,6 @@
 
 using namespace metal;
 
-struct D
-{
-    float4 a;
-    float b;
-};
-
 struct main0_out
 {
     float FragColor [[color(0)]];
diff --git a/reference/opt/shaders-msl/asm/frag/phi-loop-variable.asm.frag b/reference/opt/shaders-msl/asm/frag/phi-loop-variable.asm.frag
index 036774d661..92ac1d9f83 100644
--- a/reference/opt/shaders-msl/asm/frag/phi-loop-variable.asm.frag
+++ b/reference/opt/shaders-msl/asm/frag/phi-loop-variable.asm.frag
@@ -5,8 +5,5 @@ using namespace metal;
 
 fragment void main0()
 {
-    for (int _22 = 35; _22 >= 0; _22--)
-    {
-    }
 }
 
diff --git a/reference/shaders-msl/frag/in_block_assign.noopt.frag b/reference/opt/shaders-msl/asm/frag/srem.asm.frag
similarity index 61%
rename from reference/shaders-msl/frag/in_block_assign.noopt.frag
rename to reference/opt/shaders-msl/asm/frag/srem.asm.frag
index d06863d99c..f0cdd574de 100644
--- a/reference/shaders-msl/frag/in_block_assign.noopt.frag
+++ b/reference/opt/shaders-msl/asm/frag/srem.asm.frag
@@ -3,28 +3,21 @@
 
 using namespace metal;
 
-struct VOUT
-{
-    float4 a;
-};
-
-struct main0_in
-{
-    float4 VOUT_a [[user(locn0)]];
-};
-
 struct main0_out
 {
     float4 FragColor [[color(0)]];
 };
 
+struct main0_in
+{
+    int4 vA [[user(locn0)]];
+    int4 vB [[user(locn1)]];
+};
+
 fragment main0_out main0(main0_in in [[stage_in]])
 {
     main0_out out = {};
-    VOUT tmp;
-    tmp.a = in.VOUT_a;
-    tmp.a += float4(1.0);
-    out.FragColor = tmp.a;
+    out.FragColor = float4(in.vA - in.vB * (in.vA / in.vB));
     return out;
 }
 
diff --git a/reference/opt/shaders-msl/asm/frag/texel-fetch-no-lod.asm.frag b/reference/opt/shaders-msl/asm/frag/texel-fetch-no-lod.asm.frag
new file mode 100644
index 0000000000..dd308c32ad
--- /dev/null
+++ b/reference/opt/shaders-msl/asm/frag/texel-fetch-no-lod.asm.frag
@@ -0,0 +1,17 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+fragment main0_out main0(texture2d<float> uTexture [[texture(0)]], sampler uTextureSmplr [[sampler(0)]], float4 gl_FragCoord [[position]])
+{
+    main0_out out = {};
+    out.FragColor = uTexture.read(uint2(int2(gl_FragCoord.xy)), 0);
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/asm/frag/undef-variable-store.asm.frag b/reference/opt/shaders-msl/asm/frag/undef-variable-store.asm.frag
index fb39c46fbb..a5380c51dc 100644
--- a/reference/opt/shaders-msl/asm/frag/undef-variable-store.asm.frag
+++ b/reference/opt/shaders-msl/asm/frag/undef-variable-store.asm.frag
@@ -3,9 +3,6 @@
 
 using namespace metal;
 
-constant float4 _38 = {};
-constant float4 _50 = {};
-
 struct main0_out
 {
     float4 _entryPointOutput [[color(0)]];
@@ -14,25 +11,7 @@ struct main0_out
 fragment main0_out main0()
 {
     main0_out out = {};
-    float4 _51;
-    _51 = _50;
-    float4 _52;
-    for (;;)
-    {
-        if (0.0 != 0.0)
-        {
-            _52 = float4(1.0, 0.0, 0.0, 1.0);
-            break;
-        }
-        else
-        {
-            _52 = float4(1.0, 1.0, 0.0, 1.0);
-            break;
-        }
-        _52 = _38;
-        break;
-    }
-    out._entryPointOutput = _52;
+    out._entryPointOutput = float4(1.0, 1.0, 0.0, 1.0);
     return out;
 }
 
diff --git a/reference/opt/shaders-msl/asm/frag/unreachable.asm.frag b/reference/opt/shaders-msl/asm/frag/unreachable.asm.frag
index 3e80051e6b..9a9baef57a 100644
--- a/reference/opt/shaders-msl/asm/frag/unreachable.asm.frag
+++ b/reference/opt/shaders-msl/asm/frag/unreachable.asm.frag
@@ -3,36 +3,34 @@
 
 using namespace metal;
 
-constant float4 _21 = {};
-
-struct main0_in
-{
-    int counter [[user(locn0)]];
-};
-
 struct main0_out
 {
     float4 FragColor [[color(0)]];
 };
 
+struct main0_in
+{
+    int counter [[user(locn0)]];
+};
+
 fragment main0_out main0(main0_in in [[stage_in]])
 {
     main0_out out = {};
-    float4 _33;
-    do
+    bool _29;
+    for (;;)
     {
-        if (in.counter == 10)
+        _29 = in.counter == 10;
+        if (_29)
         {
-            _33 = float4(10.0);
             break;
         }
         else
         {
-            _33 = float4(30.0);
             break;
         }
-    } while (false);
-    out.FragColor = _33;
+    }
+    bool4 _35 = bool4(_29);
+    out.FragColor = float4(_35.x ? float4(10.0).x : float4(30.0).x, _35.y ? float4(10.0).y : float4(30.0).y, _35.z ? float4(10.0).z : float4(30.0).z, _35.w ? float4(10.0).w : float4(30.0).w);
     return out;
 }
 
diff --git a/reference/opt/shaders-msl/asm/frag/vector-shuffle-oom.asm.frag b/reference/opt/shaders-msl/asm/frag/vector-shuffle-oom.asm.frag
index 97daea5d90..676fd82365 100644
--- a/reference/opt/shaders-msl/asm/frag/vector-shuffle-oom.asm.frag
+++ b/reference/opt/shaders-msl/asm/frag/vector-shuffle-oom.asm.frag
@@ -95,212 +95,212 @@ struct main0_out
     float4 m_5 [[color(0)]];
 };
 
-fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buffer(1)]], constant _18& _19 [[buffer(2)]], texture2d<float> _8 [[texture(0)]], texture2d<float> _12 [[texture(1)]], texture2d<float> _14 [[texture(2)]], sampler _9 [[sampler(0)]], sampler _13 [[sampler(1)]], sampler _15 [[sampler(2)]], float4 gl_FragCoord [[position]])
+fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buffer(1)]], constant _10& _11 [[buffer(2)]], texture2d<float> _14 [[texture(4)]], texture2d<float> _12 [[texture(13)]], texture2d<float> _8 [[texture(14)]], sampler _15 [[sampler(3)]], sampler _13 [[sampler(5)]], sampler _9 [[sampler(6)]], float4 gl_FragCoord [[position]])
 {
     main0_out out = {};
-    _28 _77 = _74;
-    _77._m0 = float4(0.0);
     float2 _82 = gl_FragCoord.xy * _19._m23.xy;
     float4 _88 = _7._m2 * _7._m0.xyxy;
-    float2 _97 = clamp(_82 + (float3(0.0, -2.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
-    float3 _109 = _11._m5 * clamp(_8.sample(_9, _97, level(0.0)).w * _7._m1, 0.0, 1.0);
+    float2 _95 = _88.xy;
+    float2 _96 = _88.zw;
+    float2 _97 = clamp(_82 + (float2(0.0, -2.0) * _7._m0.xy), _95, _96);
+    float3 _109 = float3(_11._m5) * clamp(_8.sample(_9, _97, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _113 = _12.sample(_13, _97, level(0.0));
+    float _114 = _113.y;
     float3 _129;
-    if (_113.y > 0.0)
+    if (_114 > 0.0)
     {
-        _129 = _109 + (_14.sample(_15, _97, level(0.0)).xyz * clamp(_113.y * _113.z, 0.0, 1.0));
+        _129 = _109 + (_14.sample(_15, _97, level(0.0)).xyz * clamp(_114 * _113.z, 0.0, 1.0));
     }
     else
     {
         _129 = _109;
     }
-    float3 _133 = float4(0.0).xyz + (_129 * 0.5);
-    float4 _134 = float4(_133.x, _133.y, _133.z, float4(0.0).w);
-    _28 _135 = _77;
-    _135._m0 = _134;
-    float2 _144 = clamp(_82 + (float3(-1.0, -1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
-    float3 _156 = _11._m5 * clamp(_8.sample(_9, _144, level(0.0)).w * _7._m1, 0.0, 1.0);
+    float3 _130 = _129 * 0.5;
+    float4 _134 = float4(_130.x, _130.y, _130.z, float4(0.0).w);
+    float2 _144 = clamp(_82 + (float2(-1.0) * _7._m0.xy), _95, _96);
+    float3 _156 = float3(_11._m5) * clamp(_8.sample(_9, _144, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _160 = _12.sample(_13, _144, level(0.0));
+    float _161 = _160.y;
     float3 _176;
-    if (_160.y > 0.0)
+    if (_161 > 0.0)
     {
-        _176 = _156 + (_14.sample(_15, _144, level(0.0)).xyz * clamp(_160.y * _160.z, 0.0, 1.0));
+        _176 = _156 + (_14.sample(_15, _144, level(0.0)).xyz * clamp(_161 * _160.z, 0.0, 1.0));
     }
     else
     {
         _176 = _156;
     }
-    float3 _180 = _134.xyz + (_176 * 0.5);
+    float3 _177 = _176 * 0.5;
+    float3 _180 = _134.xyz + _177;
     float4 _181 = float4(_180.x, _180.y, _180.z, _134.w);
-    _28 _182 = _135;
-    _182._m0 = _181;
-    float2 _191 = clamp(_82 + (float3(0.0, -1.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw);
-    float3 _203 = _11._m5 * clamp(_8.sample(_9, _191, level(0.0)).w * _7._m1, 0.0, 1.0);
+    float2 _191 = clamp(_82 + (float2(0.0, -1.0) * _7._m0.xy), _95, _96);
+    float3 _203 = float3(_11._m5) * clamp(_8.sample(_9, _191, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _207 = _12.sample(_13, _191, level(0.0));
+    float _208 = _207.y;
     float3 _223;
-    if (_207.y > 0.0)
+    if (_208 > 0.0)
     {
-        _223 = _203 + (_14.sample(_15, _191, level(0.0)).xyz * clamp(_207.y * _207.z, 0.0, 1.0));
+        _223 = _203 + (_14.sample(_15, _191, level(0.0)).xyz * clamp(_208 * _207.z, 0.0, 1.0));
     }
     else
     {
         _223 = _203;
     }
-    float3 _227 = _181.xyz + (_223 * 0.75);
+    float3 _224 = _223 * 0.75;
+    float3 _227 = _181.xyz + _224;
     float4 _228 = float4(_227.x, _227.y, _227.z, _181.w);
-    _28 _229 = _182;
-    _229._m0 = _228;
-    float2 _238 = clamp(_82 + (float3(1.0, -1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
-    float3 _250 = _11._m5 * clamp(_8.sample(_9, _238, level(0.0)).w * _7._m1, 0.0, 1.0);
+    float2 _238 = clamp(_82 + (float2(1.0, -1.0) * _7._m0.xy), _95, _96);
+    float3 _250 = float3(_11._m5) * clamp(_8.sample(_9, _238, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _254 = _12.sample(_13, _238, level(0.0));
+    float _255 = _254.y;
     float3 _270;
-    if (_254.y > 0.0)
+    if (_255 > 0.0)
     {
-        _270 = _250 + (_14.sample(_15, _238, level(0.0)).xyz * clamp(_254.y * _254.z, 0.0, 1.0));
+        _270 = _250 + (_14.sample(_15, _238, level(0.0)).xyz * clamp(_255 * _254.z, 0.0, 1.0));
     }
     else
     {
         _270 = _250;
     }
-    float3 _274 = _228.xyz + (_270 * 0.5);
+    float3 _271 = _270 * 0.5;
+    float3 _274 = _228.xyz + _271;
     float4 _275 = float4(_274.x, _274.y, _274.z, _228.w);
-    _28 _276 = _229;
-    _276._m0 = _275;
-    float2 _285 = clamp(_82 + (float3(-2.0, 0.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
-    float3 _297 = _11._m5 * clamp(_8.sample(_9, _285, level(0.0)).w * _7._m1, 0.0, 1.0);
+    float2 _285 = clamp(_82 + (float2(-2.0, 0.0) * _7._m0.xy), _95, _96);
+    float3 _297 = float3(_11._m5) * clamp(_8.sample(_9, _285, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _301 = _12.sample(_13, _285, level(0.0));
+    float _302 = _301.y;
     float3 _317;
-    if (_301.y > 0.0)
+    if (_302 > 0.0)
     {
-        _317 = _297 + (_14.sample(_15, _285, level(0.0)).xyz * clamp(_301.y * _301.z, 0.0, 1.0));
+        _317 = _297 + (_14.sample(_15, _285, level(0.0)).xyz * clamp(_302 * _301.z, 0.0, 1.0));
     }
     else
     {
         _317 = _297;
     }
-    float3 _321 = _275.xyz + (_317 * 0.5);
+    float3 _318 = _317 * 0.5;
+    float3 _321 = _275.xyz + _318;
     float4 _322 = float4(_321.x, _321.y, _321.z, _275.w);
-    _28 _323 = _276;
-    _323._m0 = _322;
-    float2 _332 = clamp(_82 + (float3(-1.0, 0.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw);
-    float3 _344 = _11._m5 * clamp(_8.sample(_9, _332, level(0.0)).w * _7._m1, 0.0, 1.0);
+    float2 _332 = clamp(_82 + (float2(-1.0, 0.0) * _7._m0.xy), _95, _96);
+    float3 _344 = float3(_11._m5) * clamp(_8.sample(_9, _332, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _348 = _12.sample(_13, _332, level(0.0));
+    float _349 = _348.y;
     float3 _364;
-    if (_348.y > 0.0)
+    if (_349 > 0.0)
     {
-        _364 = _344 + (_14.sample(_15, _332, level(0.0)).xyz * clamp(_348.y * _348.z, 0.0, 1.0));
+        _364 = _344 + (_14.sample(_15, _332, level(0.0)).xyz * clamp(_349 * _348.z, 0.0, 1.0));
     }
     else
     {
         _364 = _344;
     }
-    float3 _368 = _322.xyz + (_364 * 0.75);
+    float3 _365 = _364 * 0.75;
+    float3 _368 = _322.xyz + _365;
     float4 _369 = float4(_368.x, _368.y, _368.z, _322.w);
-    _28 _370 = _323;
-    _370._m0 = _369;
-    float2 _379 = clamp(_82 + (float3(0.0, 0.0, 1.0).xy * _7._m0.xy), _88.xy, _88.zw);
-    float3 _391 = _11._m5 * clamp(_8.sample(_9, _379, level(0.0)).w * _7._m1, 0.0, 1.0);
+    float2 _379 = clamp(_82, _95, _96);
+    float3 _391 = float3(_11._m5) * clamp(_8.sample(_9, _379, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _395 = _12.sample(_13, _379, level(0.0));
+    float _396 = _395.y;
     float3 _411;
-    if (_395.y > 0.0)
+    if (_396 > 0.0)
     {
-        _411 = _391 + (_14.sample(_15, _379, level(0.0)).xyz * clamp(_395.y * _395.z, 0.0, 1.0));
+        _411 = _391 + (_14.sample(_15, _379, level(0.0)).xyz * clamp(_396 * _395.z, 0.0, 1.0));
     }
     else
     {
         _411 = _391;
     }
-    float3 _415 = _369.xyz + (_411 * 1.0);
+    float3 _412 = _411 * 1.0;
+    float3 _415 = _369.xyz + _412;
     float4 _416 = float4(_415.x, _415.y, _415.z, _369.w);
-    _28 _417 = _370;
-    _417._m0 = _416;
-    float2 _426 = clamp(_82 + (float3(1.0, 0.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw);
-    float3 _438 = _11._m5 * clamp(_8.sample(_9, _426, level(0.0)).w * _7._m1, 0.0, 1.0);
+    float2 _426 = clamp(_82 + (float2(1.0, 0.0) * _7._m0.xy), _95, _96);
+    float3 _438 = float3(_11._m5) * clamp(_8.sample(_9, _426, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _442 = _12.sample(_13, _426, level(0.0));
+    float _443 = _442.y;
     float3 _458;
-    if (_442.y > 0.0)
+    if (_443 > 0.0)
     {
-        _458 = _438 + (_14.sample(_15, _426, level(0.0)).xyz * clamp(_442.y * _442.z, 0.0, 1.0));
+        _458 = _438 + (_14.sample(_15, _426, level(0.0)).xyz * clamp(_443 * _442.z, 0.0, 1.0));
     }
     else
     {
         _458 = _438;
     }
-    float3 _462 = _416.xyz + (_458 * 0.75);
+    float3 _459 = _458 * 0.75;
+    float3 _462 = _416.xyz + _459;
     float4 _463 = float4(_462.x, _462.y, _462.z, _416.w);
-    _28 _464 = _417;
-    _464._m0 = _463;
-    float2 _473 = clamp(_82 + (float3(2.0, 0.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
-    float3 _485 = _11._m5 * clamp(_8.sample(_9, _473, level(0.0)).w * _7._m1, 0.0, 1.0);
+    float2 _473 = clamp(_82 + (float2(2.0, 0.0) * _7._m0.xy), _95, _96);
+    float3 _485 = float3(_11._m5) * clamp(_8.sample(_9, _473, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _489 = _12.sample(_13, _473, level(0.0));
+    float _490 = _489.y;
     float3 _505;
-    if (_489.y > 0.0)
+    if (_490 > 0.0)
     {
-        _505 = _485 + (_14.sample(_15, _473, level(0.0)).xyz * clamp(_489.y * _489.z, 0.0, 1.0));
+        _505 = _485 + (_14.sample(_15, _473, level(0.0)).xyz * clamp(_490 * _489.z, 0.0, 1.0));
     }
     else
     {
         _505 = _485;
     }
-    float3 _509 = _463.xyz + (_505 * 0.5);
+    float3 _506 = _505 * 0.5;
+    float3 _509 = _463.xyz + _506;
     float4 _510 = float4(_509.x, _509.y, _509.z, _463.w);
-    _28 _511 = _464;
-    _511._m0 = _510;
-    float2 _520 = clamp(_82 + (float3(-1.0, 1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
-    float3 _532 = _11._m5 * clamp(_8.sample(_9, _520, level(0.0)).w * _7._m1, 0.0, 1.0);
+    float2 _520 = clamp(_82 + (float2(-1.0, 1.0) * _7._m0.xy), _95, _96);
+    float3 _532 = float3(_11._m5) * clamp(_8.sample(_9, _520, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _536 = _12.sample(_13, _520, level(0.0));
+    float _537 = _536.y;
     float3 _552;
-    if (_536.y > 0.0)
+    if (_537 > 0.0)
     {
-        _552 = _532 + (_14.sample(_15, _520, level(0.0)).xyz * clamp(_536.y * _536.z, 0.0, 1.0));
+        _552 = _532 + (_14.sample(_15, _520, level(0.0)).xyz * clamp(_537 * _536.z, 0.0, 1.0));
     }
     else
     {
         _552 = _532;
     }
-    float3 _556 = _510.xyz + (_552 * 0.5);
+    float3 _553 = _552 * 0.5;
+    float3 _556 = _510.xyz + _553;
     float4 _557 = float4(_556.x, _556.y, _556.z, _510.w);
-    _28 _558 = _511;
-    _558._m0 = _557;
-    float2 _567 = clamp(_82 + (float3(0.0, 1.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw);
-    float3 _579 = _11._m5 * clamp(_8.sample(_9, _567, level(0.0)).w * _7._m1, 0.0, 1.0);
+    float2 _567 = clamp(_82 + (float2(0.0, 1.0) * _7._m0.xy), _95, _96);
+    float3 _579 = float3(_11._m5) * clamp(_8.sample(_9, _567, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _583 = _12.sample(_13, _567, level(0.0));
+    float _584 = _583.y;
     float3 _599;
-    if (_583.y > 0.0)
+    if (_584 > 0.0)
     {
-        _599 = _579 + (_14.sample(_15, _567, level(0.0)).xyz * clamp(_583.y * _583.z, 0.0, 1.0));
+        _599 = _579 + (_14.sample(_15, _567, level(0.0)).xyz * clamp(_584 * _583.z, 0.0, 1.0));
     }
     else
     {
         _599 = _579;
     }
-    float3 _603 = _557.xyz + (_599 * 0.75);
+    float3 _600 = _599 * 0.75;
+    float3 _603 = _557.xyz + _600;
     float4 _604 = float4(_603.x, _603.y, _603.z, _557.w);
-    _28 _605 = _558;
-    _605._m0 = _604;
-    float2 _614 = clamp(_82 + (float3(1.0, 1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
-    float3 _626 = _11._m5 * clamp(_8.sample(_9, _614, level(0.0)).w * _7._m1, 0.0, 1.0);
+    float2 _614 = clamp(_82 + _7._m0.xy, _95, _96);
+    float3 _626 = float3(_11._m5) * clamp(_8.sample(_9, _614, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _630 = _12.sample(_13, _614, level(0.0));
+    float _631 = _630.y;
     float3 _646;
-    if (_630.y > 0.0)
+    if (_631 > 0.0)
     {
-        _646 = _626 + (_14.sample(_15, _614, level(0.0)).xyz * clamp(_630.y * _630.z, 0.0, 1.0));
+        _646 = _626 + (_14.sample(_15, _614, level(0.0)).xyz * clamp(_631 * _630.z, 0.0, 1.0));
     }
     else
     {
         _646 = _626;
     }
-    float3 _650 = _604.xyz + (_646 * 0.5);
+    float3 _647 = _646 * 0.5;
+    float3 _650 = _604.xyz + _647;
     float4 _651 = float4(_650.x, _650.y, _650.z, _604.w);
-    _28 _652 = _605;
-    _652._m0 = _651;
-    float2 _661 = clamp(_82 + (float3(0.0, 2.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
-    float3 _673 = _11._m5 * clamp(_8.sample(_9, _661, level(0.0)).w * _7._m1, 0.0, 1.0);
+    float2 _661 = clamp(_82 + (float2(0.0, 2.0) * _7._m0.xy), _95, _96);
+    float3 _673 = float3(_11._m5) * clamp(_8.sample(_9, _661, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _677 = _12.sample(_13, _661, level(0.0));
+    float _678 = _677.y;
     float3 _693;
-    if (_677.y > 0.0)
+    if (_678 > 0.0)
     {
-        _693 = _673 + (_14.sample(_15, _661, level(0.0)).xyz * clamp(_677.y * _677.z, 0.0, 1.0));
+        _693 = _673 + (_14.sample(_15, _661, level(0.0)).xyz * clamp(_678 * _677.z, 0.0, 1.0));
     }
     else
     {
@@ -308,10 +308,8 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff
     }
     float3 _697 = _651.xyz + (_693 * 0.5);
     float4 _698 = float4(_697.x, _697.y, _697.z, _651.w);
-    _28 _699 = _652;
-    _699._m0 = _698;
-    float3 _702 = _698.xyz / float3(((((((((((((0.0 + 0.5) + 0.5) + 0.75) + 0.5) + 0.5) + 0.75) + 1.0) + 0.75) + 0.5) + 0.5) + 0.75) + 0.5) + 0.5);
-    _28 _704 = _699;
+    float3 _702 = _698.xyz * float3(0.125);
+    _28 _704 = _74;
     _704._m0 = float4(_702.x, _702.y, _702.z, _698.w);
     _28 _705 = _704;
     _705._m0.w = 1.0;
diff --git a/reference/opt/shaders-msl/asm/vert/empty-struct-composite.asm.vert b/reference/opt/shaders-msl/asm/vert/packing-test.asm.vert
similarity index 100%
rename from reference/opt/shaders-msl/asm/vert/empty-struct-composite.asm.vert
rename to reference/opt/shaders-msl/asm/vert/packing-test.asm.vert
diff --git a/reference/opt/shaders-msl/asm/vert/spec-constant-op-composite.asm.vert b/reference/opt/shaders-msl/asm/vert/spec-constant-op-composite.asm.vert
new file mode 100644
index 0000000000..a67634fbd8
--- /dev/null
+++ b/reference/opt/shaders-msl/asm/vert/spec-constant-op-composite.asm.vert
@@ -0,0 +1,35 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+constant int _7_tmp [[function_constant(201)]];
+constant int _7 = is_function_constant_defined(_7_tmp) ? _7_tmp : -10;
+constant uint _8_tmp [[function_constant(202)]];
+constant uint _8 = is_function_constant_defined(_8_tmp) ? _8_tmp : 100u;
+constant int _20 = (_7 + 2);
+constant uint _25 = (_8 % 5u);
+constant int4 _30 = int4(20, 30, _20, _20);
+constant int2 _32 = int2(_30.y, _30.x);
+constant int _33 = _30.y;
+
+struct main0_out
+{
+    int m_4 [[user(locn0)]];
+    float4 gl_Position [[position]];
+};
+
+vertex main0_out main0()
+{
+    main0_out out = {};
+    float4 _64 = float4(0.0);
+    _64.y = float(_20);
+    float4 _68 = _64;
+    _68.z = float(_25);
+    float4 _52 = _68 + float4(_30);
+    float2 _56 = _52.xy + float2(_32);
+    out.gl_Position = float4(_56.x, _56.y, _52.z, _52.w);
+    out.m_4 = _33;
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/comp/access-private-workgroup-in-function.comp b/reference/opt/shaders-msl/comp/access-private-workgroup-in-function.comp
new file mode 100644
index 0000000000..44405126d3
--- /dev/null
+++ b/reference/opt/shaders-msl/comp/access-private-workgroup-in-function.comp
@@ -0,0 +1,18 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+kernel void main0(uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]])
+{
+    threadgroup int u;
+    u = 50;
+    if (gl_LocalInvocationIndex == 0u)
+    {
+    }
+    else
+    {
+        u = 20;
+    }
+}
+
diff --git a/reference/opt/shaders-msl/comp/atomic.comp b/reference/opt/shaders-msl/comp/atomic.comp
index 90a39ec643..f77922aca0 100644
--- a/reference/opt/shaders-msl/comp/atomic.comp
+++ b/reference/opt/shaders-msl/comp/atomic.comp
@@ -12,25 +12,59 @@ struct SSBO
     int i32;
 };
 
-kernel void main0(device SSBO& ssbo [[buffer(0)]])
+kernel void main0(device SSBO& ssbo [[buffer(2)]])
 {
-    uint _16 = atomic_fetch_add_explicit((volatile device atomic_uint*)&(ssbo.u32), 1u, memory_order_relaxed);
-    uint _18 = atomic_fetch_or_explicit((volatile device atomic_uint*)&(ssbo.u32), 1u, memory_order_relaxed);
-    uint _20 = atomic_fetch_xor_explicit((volatile device atomic_uint*)&(ssbo.u32), 1u, memory_order_relaxed);
-    uint _22 = atomic_fetch_and_explicit((volatile device atomic_uint*)&(ssbo.u32), 1u, memory_order_relaxed);
-    uint _24 = atomic_fetch_min_explicit((volatile device atomic_uint*)&(ssbo.u32), 1u, memory_order_relaxed);
-    uint _26 = atomic_fetch_max_explicit((volatile device atomic_uint*)&(ssbo.u32), 1u, memory_order_relaxed);
-    uint _28 = atomic_exchange_explicit((volatile device atomic_uint*)&(ssbo.u32), 1u, memory_order_relaxed);
-    uint _30 = 10u;
-    uint _32 = atomic_compare_exchange_weak_explicit((volatile device atomic_uint*)&(ssbo.u32), &(_30), 2u, memory_order_relaxed, memory_order_relaxed);
-    int _36 = atomic_fetch_add_explicit((volatile device atomic_int*)&(ssbo.i32), 1, memory_order_relaxed);
-    int _38 = atomic_fetch_or_explicit((volatile device atomic_int*)&(ssbo.i32), 1, memory_order_relaxed);
-    int _40 = atomic_fetch_xor_explicit((volatile device atomic_int*)&(ssbo.i32), 1, memory_order_relaxed);
-    int _42 = atomic_fetch_and_explicit((volatile device atomic_int*)&(ssbo.i32), 1, memory_order_relaxed);
-    int _44 = atomic_fetch_min_explicit((volatile device atomic_int*)&(ssbo.i32), 1, memory_order_relaxed);
-    int _46 = atomic_fetch_max_explicit((volatile device atomic_int*)&(ssbo.i32), 1, memory_order_relaxed);
-    int _48 = atomic_exchange_explicit((volatile device atomic_int*)&(ssbo.i32), 1, memory_order_relaxed);
-    int _50 = 10;
-    int _52 = atomic_compare_exchange_weak_explicit((volatile device atomic_int*)&(ssbo.i32), &(_50), 2, memory_order_relaxed, memory_order_relaxed);
+    threadgroup uint shared_u32;
+    threadgroup int shared_i32;
+    uint _16 = atomic_fetch_add_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
+    uint _18 = atomic_fetch_or_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
+    uint _20 = atomic_fetch_xor_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
+    uint _22 = atomic_fetch_and_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
+    uint _24 = atomic_fetch_min_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
+    uint _26 = atomic_fetch_max_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
+    uint _28 = atomic_exchange_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
+    uint _32;
+    do
+    {
+        _32 = 10u;
+    } while (!atomic_compare_exchange_weak_explicit((volatile device atomic_uint*)&ssbo.u32, &_32, 2u, memory_order_relaxed, memory_order_relaxed));
+    int _36 = atomic_fetch_add_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
+    int _38 = atomic_fetch_or_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
+    int _40 = atomic_fetch_xor_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
+    int _42 = atomic_fetch_and_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
+    int _44 = atomic_fetch_min_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
+    int _46 = atomic_fetch_max_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
+    int _48 = atomic_exchange_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
+    int _52;
+    do
+    {
+        _52 = 10;
+    } while (!atomic_compare_exchange_weak_explicit((volatile device atomic_int*)&ssbo.i32, &_52, 2, memory_order_relaxed, memory_order_relaxed));
+    shared_u32 = 10u;
+    shared_i32 = 10;
+    uint _57 = atomic_fetch_add_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
+    uint _58 = atomic_fetch_or_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
+    uint _59 = atomic_fetch_xor_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
+    uint _60 = atomic_fetch_and_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
+    uint _61 = atomic_fetch_min_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
+    uint _62 = atomic_fetch_max_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
+    uint _63 = atomic_exchange_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
+    uint _64;
+    do
+    {
+        _64 = 10u;
+    } while (!atomic_compare_exchange_weak_explicit((volatile threadgroup atomic_uint*)&shared_u32, &_64, 2u, memory_order_relaxed, memory_order_relaxed));
+    int _65 = atomic_fetch_add_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
+    int _66 = atomic_fetch_or_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
+    int _67 = atomic_fetch_xor_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
+    int _68 = atomic_fetch_and_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
+    int _69 = atomic_fetch_min_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
+    int _70 = atomic_fetch_max_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
+    int _71 = atomic_exchange_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
+    int _72;
+    do
+    {
+        _72 = 10;
+    } while (!atomic_compare_exchange_weak_explicit((volatile threadgroup atomic_int*)&shared_i32, &_72, 2, memory_order_relaxed, memory_order_relaxed));
 }
 
diff --git a/reference/opt/shaders-msl/comp/bake_gradient.comp b/reference/opt/shaders-msl/comp/bake_gradient.comp
deleted file mode 100644
index fe7ac2b7d4..0000000000
--- a/reference/opt/shaders-msl/comp/bake_gradient.comp
+++ /dev/null
@@ -1,22 +0,0 @@
-#include <metal_stdlib>
-#include <simd/simd.h>
-
-using namespace metal;
-
-constant uint3 gl_WorkGroupSize = uint3(8u, 8u, 1u);
-
-struct UBO
-{
-    float4 uInvSize;
-    float4 uScale;
-};
-
-kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], constant UBO& _46 [[buffer(0)]], texture2d<float> uHeight [[texture(0)]], sampler uHeightSmplr [[sampler(0)]], texture2d<float> uDisplacement [[texture(1)]], sampler uDisplacementSmplr [[sampler(1)]], texture2d<float, access::write> iHeightDisplacement [[texture(2)]], texture2d<float, access::write> iGradJacobian [[texture(3)]])
-{
-    float4 _59 = (float2(gl_GlobalInvocationID.xy) * _46.uInvSize.xy).xyxy + (_46.uInvSize * 0.5);
-    float2 _157 = ((uDisplacement.sample(uDisplacementSmplr, _59.zw, level(0.0), int2(1, 0)).xy - uDisplacement.sample(uDisplacementSmplr, _59.zw, level(0.0), int2(-1, 0)).xy) * 0.60000002384185791015625) * _46.uScale.z;
-    float2 _161 = ((uDisplacement.sample(uDisplacementSmplr, _59.zw, level(0.0), int2(0, 1)).xy - uDisplacement.sample(uDisplacementSmplr, _59.zw, level(0.0), int2(0, -1)).xy) * 0.60000002384185791015625) * _46.uScale.z;
-    iHeightDisplacement.write(float4(uHeight.sample(uHeightSmplr, _59.xy, level(0.0)).x, 0.0, 0.0, 0.0), uint2(int2(gl_GlobalInvocationID.xy)));
-    iGradJacobian.write(float4((_46.uScale.xy * 0.5) * float2(uHeight.sample(uHeightSmplr, _59.xy, level(0.0), int2(1, 0)).x - uHeight.sample(uHeightSmplr, _59.xy, level(0.0), int2(-1, 0)).x, uHeight.sample(uHeightSmplr, _59.xy, level(0.0), int2(0, 1)).x - uHeight.sample(uHeightSmplr, _59.xy, level(0.0), int2(0, -1)).x), ((1.0 + _157.x) * (1.0 + _161.y)) - (_157.y * _161.x), 0.0), uint2(int2(gl_GlobalInvocationID.xy)));
-}
-
diff --git a/reference/opt/shaders-msl/comp/basic.comp b/reference/opt/shaders-msl/comp/basic.comp
index c41f7c0acf..22ec741965 100644
--- a/reference/opt/shaders-msl/comp/basic.comp
+++ b/reference/opt/shaders-msl/comp/basic.comp
@@ -21,12 +21,12 @@ struct SSBO3
     uint counter;
 };
 
-kernel void main0(device SSBO& _23 [[buffer(0)]], device SSBO2& _45 [[buffer(1)]], device SSBO3& _48 [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+kernel void main0(const device SSBO& _23 [[buffer(0)]], device SSBO2& _45 [[buffer(1)]], device SSBO3& _48 [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
     float4 _29 = _23.in_data[gl_GlobalInvocationID.x];
     if (dot(_29, float4(1.0, 5.0, 6.0, 2.0)) > 8.19999980926513671875)
     {
-        uint _52 = atomic_fetch_add_explicit((volatile device atomic_uint*)&(_48.counter), 1u, memory_order_relaxed);
+        uint _52 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_48.counter, 1u, memory_order_relaxed);
         _45.out_data[_52] = _29;
     }
 }
diff --git a/reference/opt/shaders-msl/comp/builtins.comp b/reference/opt/shaders-msl/comp/builtins.comp
index 8278220225..189bb1b751 100644
--- a/reference/opt/shaders-msl/comp/builtins.comp
+++ b/reference/opt/shaders-msl/comp/builtins.comp
@@ -3,6 +3,8 @@
 
 using namespace metal;
 
+constant uint3 gl_WorkGroupSize = uint3(8u, 4u, 2u);
+
 kernel void main0(uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_NumWorkGroups [[threadgroups_per_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
 {
 }
diff --git a/reference/opt/shaders-msl/comp/coherent-block.comp b/reference/opt/shaders-msl/comp/coherent-block.comp
index bec9b218c7..963574acd6 100644
--- a/reference/opt/shaders-msl/comp/coherent-block.comp
+++ b/reference/opt/shaders-msl/comp/coherent-block.comp
@@ -8,7 +8,7 @@ struct SSBO
     float4 value;
 };
 
-kernel void main0(device SSBO& _10 [[buffer(0)]])
+kernel void main0(device SSBO& _10 [[buffer(1)]])
 {
     _10.value = float4(20.0);
 }
diff --git a/reference/opt/shaders-msl/comp/coherent-image.comp b/reference/opt/shaders-msl/comp/coherent-image.comp
index 0fe044fb9a..827a247125 100644
--- a/reference/opt/shaders-msl/comp/coherent-image.comp
+++ b/reference/opt/shaders-msl/comp/coherent-image.comp
@@ -8,7 +8,7 @@ struct SSBO
     int4 value;
 };
 
-kernel void main0(device SSBO& _10 [[buffer(0)]], texture2d<int> uImage [[texture(0)]])
+kernel void main0(device SSBO& _10 [[buffer(1)]], texture2d<int> uImage [[texture(3)]])
 {
     _10.value = uImage.read(uint2(int2(10)));
 }
diff --git a/reference/opt/shaders-msl/comp/composite-construct.comp b/reference/opt/shaders-msl/comp/composite-construct.comp
new file mode 100644
index 0000000000..2d9fdccaa7
--- /dev/null
+++ b/reference/opt/shaders-msl/comp/composite-construct.comp
@@ -0,0 +1,35 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO0
+{
+    float4 as[1];
+};
+
+// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment.
+template<typename T, uint N>
+void spvArrayCopy(thread T (&dst)[N], thread const T (&src)[N])
+{
+    for (uint i = 0; i < N; dst[i] = src[i], i++);
+}
+
+// An overload for constant arrays.
+template<typename T, uint N>
+void spvArrayCopyConstant(thread T (&dst)[N], constant T (&src)[N])
+{
+    for (uint i = 0; i < N; dst[i] = src[i], i++);
+}
+
+kernel void main0(device SSBO0& _16 [[buffer(0)]], device SSBO0& _32 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]])
+{
+    float4 _37[2] = { _16.as[gl_GlobalInvocationID.x], _32.as[gl_GlobalInvocationID.x] };
+    float4 values[2];
+    spvArrayCopy(values, _37);
+    _16.as[0] = values[gl_LocalInvocationIndex];
+    _32.as[1] = float4(40.0);
+}
+
diff --git a/reference/opt/shaders-msl/comp/culling.comp b/reference/opt/shaders-msl/comp/culling.comp
index b20480bb45..9eac8c9ffb 100644
--- a/reference/opt/shaders-msl/comp/culling.comp
+++ b/reference/opt/shaders-msl/comp/culling.comp
@@ -23,12 +23,12 @@ struct SSBO3
     uint count;
 };
 
-kernel void main0(device SSBO& _22 [[buffer(0)]], device SSBO2& _38 [[buffer(1)]], device SSBO3& _41 [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+kernel void main0(const device SSBO& _22 [[buffer(0)]], device SSBO2& _38 [[buffer(1)]], device SSBO3& _41 [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
     float _28 = _22.in_data[gl_GlobalInvocationID.x];
     if (_28 > 12.0)
     {
-        uint _45 = atomic_fetch_add_explicit((volatile device atomic_uint*)&(_41.count), 1u, memory_order_relaxed);
+        uint _45 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_41.count, 1u, memory_order_relaxed);
         _38.out_data[_45] = _28;
     }
 }
diff --git a/reference/opt/shaders-msl/comp/defer-parens.comp b/reference/opt/shaders-msl/comp/defer-parens.comp
index b9a742a13c..69a8aab92d 100644
--- a/reference/opt/shaders-msl/comp/defer-parens.comp
+++ b/reference/opt/shaders-msl/comp/defer-parens.comp
@@ -12,10 +12,11 @@ struct SSBO
 kernel void main0(device SSBO& _13 [[buffer(0)]])
 {
     float4 _17 = _13.data;
-    _13.data = float4(_17.x, _17.yz + float2(10.0), _17.w);
+    float2 _28 = _17.yz + float2(10.0);
+    _13.data = float4(_17.x, _28, _17.w);
     _13.data = (_17 + _17) + _17;
-    _13.data = (_17.yz + float2(10.0)).xxyy;
-    _13.data = float4((_17.yz + float2(10.0)).y);
+    _13.data = _28.xxyy;
+    _13.data = float4(_28.y);
     _13.data = float4((_17.zw + float2(10.0))[_13.index]);
 }
 
diff --git a/reference/opt/shaders-msl/comp/dowhile.comp b/reference/opt/shaders-msl/comp/dowhile.comp
index d76ca819c4..5047a4854b 100644
--- a/reference/opt/shaders-msl/comp/dowhile.comp
+++ b/reference/opt/shaders-msl/comp/dowhile.comp
@@ -14,7 +14,7 @@ struct SSBO2
     float4 out_data[1];
 };
 
-kernel void main0(device SSBO& _28 [[buffer(0)]], device SSBO2& _52 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+kernel void main0(const device SSBO& _28 [[buffer(0)]], device SSBO2& _52 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
     int i = 0;
     float4 _56;
@@ -27,7 +27,6 @@ kernel void main0(device SSBO& _28 [[buffer(0)]], device SSBO2& _52 [[buffer(1)]
         if (i < 16)
         {
             _56 = _42;
-            continue;
         }
         else
         {
diff --git a/reference/opt/shaders-msl/comp/image-cube-array-load-store.comp b/reference/opt/shaders-msl/comp/image-cube-array-load-store.comp
new file mode 100644
index 0000000000..1eeaf87cf4
--- /dev/null
+++ b/reference/opt/shaders-msl/comp/image-cube-array-load-store.comp
@@ -0,0 +1,10 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+kernel void main0(texturecube_array<float> uImageIn [[texture(0)]], texturecube_array<float, access::write> uImageOut [[texture(1)]])
+{
+    uImageOut.write(uImageIn.read(uint2(int3(9, 7, 11).xy), uint(int3(9, 7, 11).z) % 6u, uint(int3(9, 7, 11).z) / 6u), uint2(int3(9, 7, 11).xy), uint(int3(9, 7, 11).z) % 6u, uint(int3(9, 7, 11).z) / 6u);
+}
+
diff --git a/reference/opt/shaders-msl/comp/image.comp b/reference/opt/shaders-msl/comp/image.comp
index d615fb2736..447732dd23 100644
--- a/reference/opt/shaders-msl/comp/image.comp
+++ b/reference/opt/shaders-msl/comp/image.comp
@@ -5,6 +5,7 @@ using namespace metal;
 
 kernel void main0(texture2d<float> uImageIn [[texture(0)]], texture2d<float, access::write> uImageOut [[texture(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
-    uImageOut.write(uImageIn.read(uint2((int2(gl_GlobalInvocationID.xy) + int2(uImageIn.get_width(), uImageIn.get_height())))), uint2(int2(gl_GlobalInvocationID.xy)));
+    int2 _23 = int2(gl_GlobalInvocationID.xy);
+    uImageOut.write(uImageIn.read(uint2((_23 + int2(uImageIn.get_width(), uImageIn.get_height())))), uint2(_23));
 }
 
diff --git a/reference/opt/shaders-msl/comp/inverse.comp b/reference/opt/shaders-msl/comp/inverse.comp
index 567dba2c21..f2f499b91e 100644
--- a/reference/opt/shaders-msl/comp/inverse.comp
+++ b/reference/opt/shaders-msl/comp/inverse.comp
@@ -114,7 +114,7 @@ float2x2 spvInverse2x2(float2x2 m)
     return (det != 0.0f) ? (adj * (1.0f / det)) : m;
 }
 
-kernel void main0(device MatrixOut& _15 [[buffer(0)]], device MatrixIn& _20 [[buffer(1)]])
+kernel void main0(device MatrixOut& _15 [[buffer(0)]], const device MatrixIn& _20 [[buffer(1)]])
 {
     _15.m2out = spvInverse2x2(_20.m2in);
     _15.m3out = spvInverse3x3(_20.m3in);
diff --git a/reference/opt/shaders-msl/comp/mat3.comp b/reference/opt/shaders-msl/comp/mat3.comp
index 72f08dd85e..adf7e9496b 100644
--- a/reference/opt/shaders-msl/comp/mat3.comp
+++ b/reference/opt/shaders-msl/comp/mat3.comp
@@ -8,7 +8,7 @@ struct SSBO2
     float3x3 out_data[1];
 };
 
-kernel void main0(device SSBO2& _22 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+kernel void main0(device SSBO2& _22 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
     _22.out_data[gl_GlobalInvocationID.x] = float3x3(float3(10.0), float3(20.0), float3(40.0));
 }
diff --git a/reference/opt/shaders-msl/comp/mod.comp b/reference/opt/shaders-msl/comp/mod.comp
index 86bde9c27d..8574f87b7e 100644
--- a/reference/opt/shaders-msl/comp/mod.comp
+++ b/reference/opt/shaders-msl/comp/mod.comp
@@ -22,7 +22,7 @@ Tx mod(Tx x, Ty y)
     return x - y * floor(x / y);
 }
 
-kernel void main0(device SSBO& _23 [[buffer(0)]], device SSBO2& _33 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+kernel void main0(const device SSBO& _23 [[buffer(0)]], device SSBO2& _33 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
     _33.out_data[gl_GlobalInvocationID.x] = mod(_23.in_data[gl_GlobalInvocationID.x], _33.out_data[gl_GlobalInvocationID.x]);
     _33.out_data[gl_GlobalInvocationID.x] = as_type<float4>(as_type<uint4>(_23.in_data[gl_GlobalInvocationID.x]) % as_type<uint4>(_33.out_data[gl_GlobalInvocationID.x]));
diff --git a/reference/opt/shaders-msl/comp/modf.comp b/reference/opt/shaders-msl/comp/modf.comp
index 40cbb40cea..39e402337f 100644
--- a/reference/opt/shaders-msl/comp/modf.comp
+++ b/reference/opt/shaders-msl/comp/modf.comp
@@ -13,7 +13,7 @@ struct SSBO2
     float4 out_data[1];
 };
 
-kernel void main0(device SSBO& _23 [[buffer(0)]], device SSBO2& _35 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+kernel void main0(const device SSBO& _23 [[buffer(0)]], device SSBO2& _35 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
     float4 i;
     float4 _31 = modf(_23.in_data[gl_GlobalInvocationID.x], i);
diff --git a/reference/opt/shaders-msl/comp/packing-test-1.comp b/reference/opt/shaders-msl/comp/packing-test-1.comp
new file mode 100644
index 0000000000..44d2b2d74c
--- /dev/null
+++ b/reference/opt/shaders-msl/comp/packing-test-1.comp
@@ -0,0 +1,28 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+constant uint3 gl_WorkGroupSize = uint3(32u, 1u, 1u);
+
+struct T1
+{
+    packed_float3 a;
+    float b;
+};
+
+struct Buffer0
+{
+    T1 buf0[1];
+};
+
+struct Buffer1
+{
+    float buf1[1];
+};
+
+kernel void main0(device Buffer0& _15 [[buffer(1)]], device Buffer1& _34 [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+{
+    _34.buf1[gl_GlobalInvocationID.x] = _15.buf0[0].b;
+}
+
diff --git a/reference/opt/shaders-msl/comp/packing-test-2.comp b/reference/opt/shaders-msl/comp/packing-test-2.comp
new file mode 100644
index 0000000000..4cc9c673ae
--- /dev/null
+++ b/reference/opt/shaders-msl/comp/packing-test-2.comp
@@ -0,0 +1,28 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+constant uint3 gl_WorkGroupSize = uint3(32u, 1u, 1u);
+
+struct T1
+{
+    packed_float3 a;
+    float b;
+};
+
+struct Buffer0
+{
+    T1 buf0[1];
+};
+
+struct Buffer1
+{
+    float buf1[1];
+};
+
+kernel void main0(device Buffer0& _14 [[buffer(1)]], device Buffer1& _24 [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+{
+    _24.buf1[gl_GlobalInvocationID.x] = _14.buf0[0].b;
+}
+
diff --git a/reference/opt/shaders-msl/comp/read-write-only.comp b/reference/opt/shaders-msl/comp/read-write-only.comp
index ba53b334ba..42c625092a 100644
--- a/reference/opt/shaders-msl/comp/read-write-only.comp
+++ b/reference/opt/shaders-msl/comp/read-write-only.comp
@@ -21,7 +21,7 @@ struct SSBO1
     float4 data3;
 };
 
-kernel void main0(device SSBO2& _10 [[buffer(0)]], device SSBO0& _15 [[buffer(1)]], device SSBO1& _21 [[buffer(2)]])
+kernel void main0(const device SSBO0& _15 [[buffer(0)]], device SSBO1& _21 [[buffer(1)]], device SSBO2& _10 [[buffer(2)]])
 {
     _10.data4 = _15.data0 + _21.data2;
     _10.data5 = _15.data1 + _21.data3;
diff --git a/reference/opt/shaders-msl/comp/return.comp b/reference/opt/shaders-msl/comp/return.comp
deleted file mode 100644
index 06ce8d7662..0000000000
--- a/reference/opt/shaders-msl/comp/return.comp
+++ /dev/null
@@ -1,33 +0,0 @@
-#include <metal_stdlib>
-#include <simd/simd.h>
-
-using namespace metal;
-
-struct SSBO2
-{
-    float4 out_data[1];
-};
-
-constant int _69 = {};
-
-kernel void main0(device SSBO2& _27 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
-{
-    if (gl_GlobalInvocationID.x == 2u)
-    {
-        _27.out_data[gl_GlobalInvocationID.x] = float4(20.0);
-    }
-    else
-    {
-        if (gl_GlobalInvocationID.x == 4u)
-        {
-            _27.out_data[gl_GlobalInvocationID.x] = float4(10.0);
-            return;
-        }
-    }
-    for (int _68 = 0; _68 < 20; _68 = _69 + 1)
-    {
-        return;
-    }
-    _27.out_data[gl_GlobalInvocationID.x] = float4(10.0);
-}
-
diff --git a/reference/opt/shaders-msl/comp/rmw-matrix.comp b/reference/opt/shaders-msl/comp/rmw-matrix.comp
new file mode 100644
index 0000000000..150db7ede9
--- /dev/null
+++ b/reference/opt/shaders-msl/comp/rmw-matrix.comp
@@ -0,0 +1,22 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO
+{
+    float a;
+    float4 b;
+    float4x4 c;
+    float a1;
+    float4 b1;
+    float4x4 c1;
+};
+
+kernel void main0(device SSBO& _11 [[buffer(0)]])
+{
+    _11.a *= _11.a1;
+    _11.b *= _11.b1;
+    _11.c = _11.c * _11.c1;
+}
+
diff --git a/reference/opt/shaders-msl/comp/rmw-opt.comp b/reference/opt/shaders-msl/comp/rmw-opt.comp
index 4bbd8b3c71..05e1f6f283 100644
--- a/reference/opt/shaders-msl/comp/rmw-opt.comp
+++ b/reference/opt/shaders-msl/comp/rmw-opt.comp
@@ -20,7 +20,6 @@ kernel void main0(device SSBO& _9 [[buffer(0)]])
     _9.a ^= 10;
     _9.a %= 40;
     _9.a |= 1;
-    bool _65 = false && true;
-    _9.a = int(_65 && (true || _65));
+    _9.a = 0;
 }
 
diff --git a/reference/opt/shaders-msl/comp/shared-array-of-arrays.comp b/reference/opt/shaders-msl/comp/shared-array-of-arrays.comp
index db1ffebf2b..3884c22f5a 100644
--- a/reference/opt/shaders-msl/comp/shared-array-of-arrays.comp
+++ b/reference/opt/shaders-msl/comp/shared-array-of-arrays.comp
@@ -15,6 +15,6 @@ kernel void main0(device SSBO& _67 [[buffer(0)]], uint3 gl_LocalInvocationID [[t
     threadgroup float foo[4][4];
     foo[gl_LocalInvocationID.x][gl_LocalInvocationID.y] = float(gl_LocalInvocationIndex);
     threadgroup_barrier(mem_flags::mem_threadgroup);
-    _67.out_data[gl_GlobalInvocationID.x] = (((0.0 + foo[gl_LocalInvocationID.x][0]) + foo[gl_LocalInvocationID.x][1]) + foo[gl_LocalInvocationID.x][2]) + foo[gl_LocalInvocationID.x][3];
+    _67.out_data[gl_GlobalInvocationID.x] = ((foo[gl_LocalInvocationID.x][0] + foo[gl_LocalInvocationID.x][1]) + foo[gl_LocalInvocationID.x][2]) + foo[gl_LocalInvocationID.x][3];
 }
 
diff --git a/reference/opt/shaders-msl/comp/shared.comp b/reference/opt/shaders-msl/comp/shared.comp
index ef82961e2b..e58638b5cb 100644
--- a/reference/opt/shaders-msl/comp/shared.comp
+++ b/reference/opt/shaders-msl/comp/shared.comp
@@ -15,11 +15,11 @@ struct SSBO2
     float out_data[1];
 };
 
-kernel void main0(device SSBO& _22 [[buffer(0)]], device SSBO2& _44 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]])
+kernel void main0(const device SSBO& _22 [[buffer(0)]], device SSBO2& _44 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]])
 {
     threadgroup float sShared[4];
     sShared[gl_LocalInvocationIndex] = _22.in_data[gl_GlobalInvocationID.x];
     threadgroup_barrier(mem_flags::mem_threadgroup);
-    _44.out_data[gl_GlobalInvocationID.x] = sShared[(4u - gl_LocalInvocationIndex) - 1u];
+    _44.out_data[gl_GlobalInvocationID.x] = sShared[3u - gl_LocalInvocationIndex];
 }
 
diff --git a/reference/opt/shaders-msl/comp/struct-layout.comp b/reference/opt/shaders-msl/comp/struct-layout.comp
index aa11cc966a..e7bb53ece6 100644
--- a/reference/opt/shaders-msl/comp/struct-layout.comp
+++ b/reference/opt/shaders-msl/comp/struct-layout.comp
@@ -18,7 +18,7 @@ struct SSBO
     Foo in_data[1];
 };
 
-kernel void main0(device SSBO2& _23 [[buffer(0)]], device SSBO& _30 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+kernel void main0(const device SSBO& _30 [[buffer(0)]], device SSBO2& _23 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
     _23.out_data[gl_GlobalInvocationID.x].m = _30.in_data[gl_GlobalInvocationID.x].m * _30.in_data[gl_GlobalInvocationID.x].m;
 }
diff --git a/reference/opt/shaders-msl/comp/struct-nested.comp b/reference/opt/shaders-msl/comp/struct-nested.comp
index 0741b011c7..9abc498f4e 100644
--- a/reference/opt/shaders-msl/comp/struct-nested.comp
+++ b/reference/opt/shaders-msl/comp/struct-nested.comp
@@ -18,12 +18,8 @@ struct dstbuffer
     s2 test[1];
 };
 
-constant s2 _31 = {};
-
-kernel void main0(device dstbuffer& _19 [[buffer(0)]])
+kernel void main0(device dstbuffer& _19 [[buffer(1)]])
 {
-    s2 _30 = _31;
-    _30.b.a = 0;
-    _19.test[0].b.a = _30.b.a;
+    _19.test[0].b.a = 0;
 }
 
diff --git a/reference/opt/shaders-msl/comp/struct-packing.comp b/reference/opt/shaders-msl/comp/struct-packing.comp
index f59cba5b7d..a042f7aa28 100644
--- a/reference/opt/shaders-msl/comp/struct-packing.comp
+++ b/reference/opt/shaders-msl/comp/struct-packing.comp
@@ -67,39 +67,60 @@ struct SSBO1
     float array[1];
 };
 
+struct S0_1
+{
+    float2 a[1];
+    float b;
+};
+
+struct Content_1
+{
+    S0_1 m0s[1];
+    S1 m1s[1];
+    S2 m2s[1];
+    S0_1 m0;
+    S1 m1;
+    S2 m2;
+    S3 m3;
+    char pad7[4];
+    float m4;
+    S4 m3s[8];
+};
+
 struct SSBO0
 {
-    Content content;
-    Content content1[2];
-    Content content2;
+    Content_1 content;
+    Content_1 content1[2];
+    Content_1 content2;
     float array[1];
 };
 
-kernel void main0(device SSBO1& ssbo_430 [[buffer(0)]], device SSBO0& ssbo_140 [[buffer(1)]])
+kernel void main0(device SSBO0& ssbo_140 [[buffer(0)]], device SSBO1& ssbo_430 [[buffer(1)]])
 {
-    ssbo_430.content.m0s[0].a[0] = ssbo_140.content.m0s[0].a[0];
-    ssbo_430.content.m0s[0].b = ssbo_140.content.m0s[0].b;
-    ssbo_430.content.m1s[0].a = ssbo_140.content.m1s[0].a;
-    ssbo_430.content.m1s[0].b = ssbo_140.content.m1s[0].b;
-    ssbo_430.content.m2s[0].a[0] = ssbo_140.content.m2s[0].a[0];
-    ssbo_430.content.m2s[0].b = ssbo_140.content.m2s[0].b;
-    ssbo_430.content.m0.a[0] = ssbo_140.content.m0.a[0];
-    ssbo_430.content.m0.b = ssbo_140.content.m0.b;
-    ssbo_430.content.m1.a = ssbo_140.content.m1.a;
-    ssbo_430.content.m1.b = ssbo_140.content.m1.b;
-    ssbo_430.content.m2.a[0] = ssbo_140.content.m2.a[0];
-    ssbo_430.content.m2.b = ssbo_140.content.m2.b;
-    ssbo_430.content.m3.a = ssbo_140.content.m3.a;
-    ssbo_430.content.m3.b = ssbo_140.content.m3.b;
-    ssbo_430.content.m4 = ssbo_140.content.m4;
-    ssbo_430.content.m3s[0].c = ssbo_140.content.m3s[0].c;
-    ssbo_430.content.m3s[1].c = ssbo_140.content.m3s[1].c;
-    ssbo_430.content.m3s[2].c = ssbo_140.content.m3s[2].c;
-    ssbo_430.content.m3s[3].c = ssbo_140.content.m3s[3].c;
-    ssbo_430.content.m3s[4].c = ssbo_140.content.m3s[4].c;
-    ssbo_430.content.m3s[5].c = ssbo_140.content.m3s[5].c;
-    ssbo_430.content.m3s[6].c = ssbo_140.content.m3s[6].c;
-    ssbo_430.content.m3s[7].c = ssbo_140.content.m3s[7].c;
+    Content_1 _60 = ssbo_140.content;
+    ssbo_430.content.m0s[0].a[0] = _60.m0s[0].a[0];
+    ssbo_430.content.m0s[0].b = _60.m0s[0].b;
+    ssbo_430.content.m1s[0].a = _60.m1s[0].a;
+    ssbo_430.content.m1s[0].b = _60.m1s[0].b;
+    ssbo_430.content.m2s[0].a[0] = _60.m2s[0].a[0];
+    ssbo_430.content.m2s[0].b = _60.m2s[0].b;
+    ssbo_430.content.m0.a[0] = _60.m0.a[0];
+    ssbo_430.content.m0.b = _60.m0.b;
+    ssbo_430.content.m1.a = _60.m1.a;
+    ssbo_430.content.m1.b = _60.m1.b;
+    ssbo_430.content.m2.a[0] = _60.m2.a[0];
+    ssbo_430.content.m2.b = _60.m2.b;
+    ssbo_430.content.m3.a = _60.m3.a;
+    ssbo_430.content.m3.b = _60.m3.b;
+    ssbo_430.content.m4 = _60.m4;
+    ssbo_430.content.m3s[0].c = _60.m3s[0].c;
+    ssbo_430.content.m3s[1].c = _60.m3s[1].c;
+    ssbo_430.content.m3s[2].c = _60.m3s[2].c;
+    ssbo_430.content.m3s[3].c = _60.m3s[3].c;
+    ssbo_430.content.m3s[4].c = _60.m3s[4].c;
+    ssbo_430.content.m3s[5].c = _60.m3s[5].c;
+    ssbo_430.content.m3s[6].c = _60.m3s[6].c;
+    ssbo_430.content.m3s[7].c = _60.m3s[7].c;
     ssbo_430.content.m1.a = ssbo_430.content.m3.a * ssbo_430.m6[1][1];
 }
 
diff --git a/reference/opt/shaders-msl/comp/torture-loop.comp b/reference/opt/shaders-msl/comp/torture-loop.comp
index 1b239550e1..4c367d3e6d 100644
--- a/reference/opt/shaders-msl/comp/torture-loop.comp
+++ b/reference/opt/shaders-msl/comp/torture-loop.comp
@@ -14,66 +14,29 @@ struct SSBO2
     float4 out_data[1];
 };
 
-constant uint _98 = {};
-
-kernel void main0(device SSBO& _24 [[buffer(0)]], device SSBO2& _89 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+kernel void main0(const device SSBO& _24 [[buffer(0)]], device SSBO2& _89 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
-    float4 _93;
-    int _94;
-    _93 = _24.in_data[gl_GlobalInvocationID.x];
-    _94 = 0;
-    int _40;
-    float4 _46;
-    int _48;
-    for (;;)
+    float4 _99;
+    _99 = _24.in_data[gl_GlobalInvocationID.x];
+    for (int _93 = 0; (_93 + 1) < 10; )
     {
-        _40 = _94 + 1;
-        if (_40 < 10)
+        _99 *= 2.0;
+        _93 += 2;
+        continue;
+    }
+    float4 _98;
+    _98 = _99;
+    float4 _103;
+    for (uint _94 = 0u; _94 < 16u; _98 = _103, _94++)
+    {
+        _103 = _98;
+        for (uint _100 = 0u; _100 < 30u; )
         {
-            _46 = _93 * 2.0;
-            _48 = _40 + 1;
-            _93 = _46;
-            _94 = _48;
+            _103 = _24.mvp * _103;
+            _100++;
             continue;
         }
-        else
-        {
-            break;
-        }
     }
-    float4 _95;
-    int _96;
-    _95 = _93;
-    _96 = _40;
-    float4 _100;
-    uint _101;
-    uint _99;
-    for (uint _97 = 0u; _97 < 16u; _95 = _100, _96++, _97++, _99 = _101)
-    {
-        _100 = _95;
-        _101 = 0u;
-        float4 _71;
-        for (; _101 < 30u; _100 = _71, _101++)
-        {
-            _71 = _24.mvp * _100;
-        }
-    }
-    int _102;
-    _102 = _96;
-    int _83;
-    for (;;)
-    {
-        _83 = _102 + 1;
-        if (_83 > 10)
-        {
-            _102 = _83;
-            continue;
-        }
-        else
-        {
-            break;
-        }
-    }
-    _89.out_data[gl_GlobalInvocationID.x] = _95;
+    _89.out_data[gl_GlobalInvocationID.x] = _98;
 }
 
diff --git a/reference/opt/shaders-msl/comp/type-alias.comp b/reference/opt/shaders-msl/comp/type-alias.comp
index 02e23e733e..9a2550a13d 100644
--- a/reference/opt/shaders-msl/comp/type-alias.comp
+++ b/reference/opt/shaders-msl/comp/type-alias.comp
@@ -8,28 +8,18 @@ struct S0
     float4 a;
 };
 
-struct S1
-{
-    float4 a;
-};
-
 struct SSBO0
 {
     S0 s0s[1];
 };
 
-struct SSBO1
-{
-    S1 s1s[1];
-};
-
 struct SSBO2
 {
     float4 outputs[1];
 };
 
-kernel void main0(device SSBO0& _36 [[buffer(0)]], device SSBO1& _55 [[buffer(1)]], device SSBO2& _66 [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+kernel void main0(device SSBO0& _36 [[buffer(0)]], device SSBO0& _55 [[buffer(1)]], device SSBO2& _66 [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
-    _66.outputs[gl_GlobalInvocationID.x] = _36.s0s[gl_GlobalInvocationID.x].a + _55.s1s[gl_GlobalInvocationID.x].a;
+    _66.outputs[gl_GlobalInvocationID.x] = _36.s0s[gl_GlobalInvocationID.x].a + _55.s0s[gl_GlobalInvocationID.x].a;
 }
 
diff --git a/reference/opt/shaders-msl/comp/udiv.comp b/reference/opt/shaders-msl/comp/udiv.comp
index 32874ad787..99ce26412d 100644
--- a/reference/opt/shaders-msl/comp/udiv.comp
+++ b/reference/opt/shaders-msl/comp/udiv.comp
@@ -8,13 +8,8 @@ struct SSBO2
     uint outputs[1];
 };
 
-struct SSBO
+kernel void main0(device SSBO2& _23 [[buffer(0)]], device SSBO2& _10 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
-    uint inputs[1];
-};
-
-kernel void main0(device SSBO2& _10 [[buffer(0)]], device SSBO& _23 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
-{
-    _10.outputs[gl_GlobalInvocationID.x] = _23.inputs[gl_GlobalInvocationID.x] / 29u;
+    _10.outputs[gl_GlobalInvocationID.x] = _23.outputs[gl_GlobalInvocationID.x] / 29u;
 }
 
diff --git a/reference/opt/shaders-msl/desktop-only/frag/image-ms.desktop.frag b/reference/opt/shaders-msl/desktop-only/frag/image-ms.desktop.frag
index 4083e4ea16..3b461f0fef 100644
--- a/reference/opt/shaders-msl/desktop-only/frag/image-ms.desktop.frag
+++ b/reference/opt/shaders-msl/desktop-only/frag/image-ms.desktop.frag
@@ -3,7 +3,7 @@
 
 using namespace metal;
 
-fragment void main0(texture2d_ms<float> uImageMS [[texture(0)]], texture2d_array<float, access::read_write> uImageArray [[texture(1)]], texture2d<float, access::write> uImage [[texture(2)]])
+fragment void main0(texture2d<float, access::write> uImage [[texture(0)]], texture2d_array<float, access::read_write> uImageArray [[texture(1)]], texture2d_ms<float> uImageMS [[texture(2)]])
 {
     uImage.write(uImageMS.read(uint2(int2(1, 2)), 2), uint2(int2(2, 3)));
     uImageArray.write(uImageArray.read(uint2(int3(1, 2, 4).xy), uint(int3(1, 2, 4).z)), uint2(int3(2, 3, 7).xy), uint(int3(2, 3, 7).z));
diff --git a/reference/opt/shaders-msl/desktop-only/vert/basic.desktop.sso.vert b/reference/opt/shaders-msl/desktop-only/vert/basic.desktop.sso.vert
index 1592b5c5cf..ffb4357126 100644
--- a/reference/opt/shaders-msl/desktop-only/vert/basic.desktop.sso.vert
+++ b/reference/opt/shaders-msl/desktop-only/vert/basic.desktop.sso.vert
@@ -8,18 +8,18 @@ struct UBO
     float4x4 uMVP;
 };
 
-struct main0_in
-{
-    float3 aNormal [[attribute(1)]];
-    float4 aVertex [[attribute(0)]];
-};
-
 struct main0_out
 {
     float3 vNormal [[user(locn0)]];
     float4 gl_Position [[position]];
 };
 
+struct main0_in
+{
+    float4 aVertex [[attribute(0)]];
+    float3 aNormal [[attribute(1)]];
+};
+
 vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _16 [[buffer(0)]])
 {
     main0_out out = {};
diff --git a/reference/opt/shaders-msl/flatten/basic.flatten.vert b/reference/opt/shaders-msl/flatten/basic.flatten.vert
index 1592b5c5cf..ffb4357126 100644
--- a/reference/opt/shaders-msl/flatten/basic.flatten.vert
+++ b/reference/opt/shaders-msl/flatten/basic.flatten.vert
@@ -8,18 +8,18 @@ struct UBO
     float4x4 uMVP;
 };
 
-struct main0_in
-{
-    float3 aNormal [[attribute(1)]];
-    float4 aVertex [[attribute(0)]];
-};
-
 struct main0_out
 {
     float3 vNormal [[user(locn0)]];
     float4 gl_Position [[position]];
 };
 
+struct main0_in
+{
+    float4 aVertex [[attribute(0)]];
+    float3 aNormal [[attribute(1)]];
+};
+
 vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _16 [[buffer(0)]])
 {
     main0_out out = {};
diff --git a/reference/opt/shaders-msl/flatten/multiindex.flatten.vert b/reference/opt/shaders-msl/flatten/multiindex.flatten.vert
index 84c4b408b2..f4549abab2 100644
--- a/reference/opt/shaders-msl/flatten/multiindex.flatten.vert
+++ b/reference/opt/shaders-msl/flatten/multiindex.flatten.vert
@@ -8,16 +8,16 @@ struct UBO
     float4 Data[3][5];
 };
 
-struct main0_in
-{
-    int2 aIndex [[attribute(0)]];
-};
-
 struct main0_out
 {
     float4 gl_Position [[position]];
 };
 
+struct main0_in
+{
+    int2 aIndex [[attribute(0)]];
+};
+
 vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _20 [[buffer(0)]])
 {
     main0_out out = {};
diff --git a/reference/opt/shaders-msl/flatten/push-constant.flatten.vert b/reference/opt/shaders-msl/flatten/push-constant.flatten.vert
index 83def9c0bb..8f2e8c173f 100644
--- a/reference/opt/shaders-msl/flatten/push-constant.flatten.vert
+++ b/reference/opt/shaders-msl/flatten/push-constant.flatten.vert
@@ -10,18 +10,18 @@ struct PushMe
     float Arr[4];
 };
 
-struct main0_in
-{
-    float4 Pos [[attribute(1)]];
-    float2 Rot [[attribute(0)]];
-};
-
 struct main0_out
 {
     float2 vRot [[user(locn0)]];
     float4 gl_Position [[position]];
 };
 
+struct main0_in
+{
+    float2 Rot [[attribute(0)]];
+    float4 Pos [[attribute(1)]];
+};
+
 vertex main0_out main0(main0_in in [[stage_in]], constant PushMe& registers [[buffer(0)]])
 {
     main0_out out = {};
diff --git a/reference/opt/shaders-msl/flatten/rowmajor.flatten.vert b/reference/opt/shaders-msl/flatten/rowmajor.flatten.vert
index 3e0fcdbb75..387fe0a835 100644
--- a/reference/opt/shaders-msl/flatten/rowmajor.flatten.vert
+++ b/reference/opt/shaders-msl/flatten/rowmajor.flatten.vert
@@ -10,16 +10,16 @@ struct UBO
     float2x4 uMVP;
 };
 
-struct main0_in
-{
-    float4 aVertex [[attribute(0)]];
-};
-
 struct main0_out
 {
     float4 gl_Position [[position]];
 };
 
+struct main0_in
+{
+    float4 aVertex [[attribute(0)]];
+};
+
 vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _18 [[buffer(0)]])
 {
     main0_out out = {};
diff --git a/reference/opt/shaders-msl/flatten/struct.flatten.vert b/reference/opt/shaders-msl/flatten/struct.flatten.vert
index 594d29fe57..10b64d9906 100644
--- a/reference/opt/shaders-msl/flatten/struct.flatten.vert
+++ b/reference/opt/shaders-msl/flatten/struct.flatten.vert
@@ -16,24 +16,24 @@ struct UBO
     Light light;
 };
 
-struct main0_in
-{
-    float3 aNormal [[attribute(1)]];
-    float4 aVertex [[attribute(0)]];
-};
-
 struct main0_out
 {
     float4 vColor [[user(locn0)]];
     float4 gl_Position [[position]];
 };
 
+struct main0_in
+{
+    float4 aVertex [[attribute(0)]];
+    float3 aNormal [[attribute(1)]];
+};
+
 vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _18 [[buffer(0)]])
 {
     main0_out out = {};
     out.gl_Position = _18.uMVP * in.aVertex;
     out.vColor = float4(0.0);
-    float3 _39 = in.aVertex.xyz - _18.light.Position;
+    float3 _39 = in.aVertex.xyz - float3(_18.light.Position);
     out.vColor += ((_18.light.Color * clamp(1.0 - (length(_39) / _18.light.Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(_39)));
     return out;
 }
diff --git a/reference/opt/shaders-msl/flatten/swizzle.flatten.vert b/reference/opt/shaders-msl/flatten/swizzle.flatten.vert
index 53fc21f99e..52940411bc 100644
--- a/reference/opt/shaders-msl/flatten/swizzle.flatten.vert
+++ b/reference/opt/shaders-msl/flatten/swizzle.flatten.vert
@@ -39,7 +39,7 @@ vertex main0_out main0(constant UBO& _22 [[buffer(0)]])
     out.oA = _22.A;
     out.oB = float4(_22.B0, _22.B1);
     out.oC = float4(_22.C0, _22.C1) + float4(_22.C1.xy, _22.C1.z, _22.C0);
-    out.oD = float4(_22.D0, _22.D1) + float4(float3(_22.D0).xy, float3(_22.D0).z, _22.D1);
+    out.oD = float4(_22.D0[0], _22.D0[1], _22.D0[2], _22.D1) + float4(_22.D0[0], _22.D0[1], _22.D0[2u], _22.D1);
     out.oE = float4(_22.E0, _22.E1, _22.E2, _22.E3);
     out.oF = float4(_22.F0, _22.F1, _22.F2);
     return out;
diff --git a/reference/opt/shaders-msl/flatten/types.flatten.frag b/reference/opt/shaders-msl/flatten/types.flatten.frag
index cee53d9e58..540c5baeb1 100644
--- a/reference/opt/shaders-msl/flatten/types.flatten.frag
+++ b/reference/opt/shaders-msl/flatten/types.flatten.frag
@@ -26,7 +26,7 @@ struct main0_out
     float4 FragColor [[color(0)]];
 };
 
-fragment main0_out main0(constant UBO1& _14 [[buffer(0)]], constant UBO2& _29 [[buffer(1)]], constant UBO0& _41 [[buffer(2)]])
+fragment main0_out main0(constant UBO0& _41 [[buffer(0)]], constant UBO1& _14 [[buffer(1)]], constant UBO2& _29 [[buffer(2)]])
 {
     main0_out out = {};
     out.FragColor = ((((float4(_14.c) + float4(_14.d)) + float4(_29.e)) + float4(_29.f)) + _41.a) + _41.b;
diff --git a/reference/opt/shaders-msl/frag/array-lut-no-loop-variable.frag b/reference/opt/shaders-msl/frag/array-lut-no-loop-variable.frag
new file mode 100644
index 0000000000..6c8299fa91
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/array-lut-no-loop-variable.frag
@@ -0,0 +1,42 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+constant float _17[5] = {1.0, 2.0, 3.0, 4.0, 5.0};
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment.
+template<typename T, uint N>
+void spvArrayCopy(thread T (&dst)[N], thread const T (&src)[N])
+{
+    for (uint i = 0; i < N; dst[i] = src[i], i++);
+}
+
+// An overload for constant arrays.
+template<typename T, uint N>
+void spvArrayCopyConstant(thread T (&dst)[N], constant T (&src)[N])
+{
+    for (uint i = 0; i < N; dst[i] = src[i], i++);
+}
+
+fragment main0_out main0()
+{
+    main0_out out = {};
+    float lut[5] = {1.0, 2.0, 3.0, 4.0, 5.0};
+    for (int _46 = 0; _46 < 4; )
+    {
+        int _33 = _46 + 1;
+        out.FragColor += float4(lut[_33]);
+        _46 = _33;
+        continue;
+    }
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/basic.frag b/reference/opt/shaders-msl/frag/basic.frag
index 4d33ee7bca..f33db61eba 100644
--- a/reference/opt/shaders-msl/frag/basic.frag
+++ b/reference/opt/shaders-msl/frag/basic.frag
@@ -3,17 +3,17 @@
 
 using namespace metal;
 
-struct main0_in
-{
-    float2 vTex [[user(locn1)]];
-    float4 vColor [[user(locn0)]];
-};
-
 struct main0_out
 {
     float4 FragColor [[color(0)]];
 };
 
+struct main0_in
+{
+    float4 vColor [[user(locn0)]];
+    float2 vTex [[user(locn1)]];
+};
+
 fragment main0_out main0(main0_in in [[stage_in]], texture2d<float> uTex [[texture(0)]], sampler uTexSmplr [[sampler(0)]])
 {
     main0_out out = {};
diff --git a/reference/opt/shaders-msl/frag/binary-func-unpack-pack-arguments.frag b/reference/opt/shaders-msl/frag/binary-func-unpack-pack-arguments.frag
new file mode 100644
index 0000000000..134cfe1847
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/binary-func-unpack-pack-arguments.frag
@@ -0,0 +1,28 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct UBO
+{
+    packed_float3 color;
+    float v;
+};
+
+struct main0_out
+{
+    float FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float3 vIn [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], constant UBO& _15 [[buffer(0)]])
+{
+    main0_out out = {};
+    out.FragColor = dot(in.vIn, float3(_15.color));
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/binary-unpack-pack-arguments.frag b/reference/opt/shaders-msl/frag/binary-unpack-pack-arguments.frag
new file mode 100644
index 0000000000..8bd538bec6
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/binary-unpack-pack-arguments.frag
@@ -0,0 +1,28 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct UBO
+{
+    packed_float3 color;
+    float v;
+};
+
+struct main0_out
+{
+    float3 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float3 vIn [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], constant UBO& _15 [[buffer(0)]])
+{
+    main0_out out = {};
+    out.FragColor = cross(in.vIn, float3(_15.color) - in.vIn);
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/bitcasting.frag b/reference/opt/shaders-msl/frag/bitcasting.frag
index 659d320e9f..ae6d45e013 100644
--- a/reference/opt/shaders-msl/frag/bitcasting.frag
+++ b/reference/opt/shaders-msl/frag/bitcasting.frag
@@ -3,17 +3,17 @@
 
 using namespace metal;
 
-struct main0_in
-{
-    float4 VertGeom [[user(locn0)]];
-};
-
 struct main0_out
 {
     float4 FragColor0 [[color(0)]];
     float4 FragColor1 [[color(1)]];
 };
 
+struct main0_in
+{
+    float4 VertGeom [[user(locn0)]];
+};
+
 fragment main0_out main0(main0_in in [[stage_in]], texture2d<float> TextureBase [[texture(0)]], texture2d<float> TextureDetail [[texture(1)]], sampler TextureBaseSmplr [[sampler(0)]], sampler TextureDetailSmplr [[sampler(1)]])
 {
     main0_out out = {};
diff --git a/reference/opt/shaders-msl/frag/builtins.frag b/reference/opt/shaders-msl/frag/builtins.frag
index 9283d1a66b..f9085252b3 100644
--- a/reference/opt/shaders-msl/frag/builtins.frag
+++ b/reference/opt/shaders-msl/frag/builtins.frag
@@ -3,17 +3,17 @@
 
 using namespace metal;
 
-struct main0_in
-{
-    float4 vColor [[user(locn0)]];
-};
-
 struct main0_out
 {
     float4 FragColor [[color(0)]];
     float gl_FragDepth [[depth(any)]];
 };
 
+struct main0_in
+{
+    float4 vColor [[user(locn0)]];
+};
+
 fragment main0_out main0(main0_in in [[stage_in]], float4 gl_FragCoord [[position]])
 {
     main0_out out = {};
diff --git a/reference/opt/shaders-msl/frag/composite-extract-forced-temporary.frag b/reference/opt/shaders-msl/frag/composite-extract-forced-temporary.frag
index 5539c2508f..6948e4d098 100644
--- a/reference/opt/shaders-msl/frag/composite-extract-forced-temporary.frag
+++ b/reference/opt/shaders-msl/frag/composite-extract-forced-temporary.frag
@@ -3,16 +3,16 @@
 
 using namespace metal;
 
-struct main0_in
-{
-    float2 vTexCoord [[user(locn0)]];
-};
-
 struct main0_out
 {
     float4 FragColor [[color(0)]];
 };
 
+struct main0_in
+{
+    float2 vTexCoord [[user(locn0)]];
+};
+
 fragment main0_out main0(main0_in in [[stage_in]], texture2d<float> Texture [[texture(0)]], sampler TextureSmplr [[sampler(0)]])
 {
     main0_out out = {};
diff --git a/reference/opt/shaders-msl/frag/constant-array.frag b/reference/opt/shaders-msl/frag/constant-array.frag
index 9cdd52276b..63576f109c 100644
--- a/reference/opt/shaders-msl/frag/constant-array.frag
+++ b/reference/opt/shaders-msl/frag/constant-array.frag
@@ -17,16 +17,16 @@ constant float4 _54[2] = {float4(8.0), float4(10.0)};
 constant float4 _55[2][2] = {{float4(1.0), float4(2.0)}, {float4(8.0), float4(10.0)}};
 constant Foobar _75[2] = {{10.0, 40.0}, {90.0, 70.0}};
 
-struct main0_in
-{
-    int index [[user(locn0)]];
-};
-
 struct main0_out
 {
     float4 FragColor [[color(0)]];
 };
 
+struct main0_in
+{
+    int index [[user(locn0)]];
+};
+
 // Implementation of an array copy function to cover GLSL's ability to copy an array via assignment.
 template<typename T, uint N>
 void spvArrayCopy(thread T (&dst)[N], thread const T (&src)[N])
@@ -47,7 +47,7 @@ fragment main0_out main0(main0_in in [[stage_in]])
     float4 indexable[3] = {float4(1.0), float4(2.0), float4(3.0)};
     float4 indexable_1[2][2] = {{float4(1.0), float4(2.0)}, {float4(8.0), float4(10.0)}};
     Foobar indexable_2[2] = {{10.0, 40.0}, {90.0, 70.0}};
-    out.FragColor = ((indexable[in.index] + (indexable_1[in.index][in.index + 1])) + float4(10.0 + 20.0)) + float4(indexable_2[in.index].a + indexable_2[in.index].b);
+    out.FragColor = ((indexable[in.index] + (indexable_1[in.index][in.index + 1])) + float4(30.0)) + float4(indexable_2[in.index].a + indexable_2[in.index].b);
     return out;
 }
 
diff --git a/reference/opt/shaders-msl/frag/constant-composites.frag b/reference/opt/shaders-msl/frag/constant-composites.frag
index d216da6d13..ec5d66e86d 100644
--- a/reference/opt/shaders-msl/frag/constant-composites.frag
+++ b/reference/opt/shaders-msl/frag/constant-composites.frag
@@ -14,16 +14,16 @@ struct Foo
 constant float _16[4] = {1.0, 4.0, 3.0, 2.0};
 constant Foo _28[2] = {{10.0, 20.0}, {30.0, 40.0}};
 
-struct main0_in
-{
-    int line [[user(locn0)]];
-};
-
 struct main0_out
 {
     float4 FragColor [[color(0)]];
 };
 
+struct main0_in
+{
+    int line [[user(locn0)]];
+};
+
 // Implementation of an array copy function to cover GLSL's ability to copy an array via assignment.
 template<typename T, uint N>
 void spvArrayCopy(thread T (&dst)[N], thread const T (&src)[N])
diff --git a/reference/opt/shaders-msl/frag/control-dependent-in-branch.desktop.frag b/reference/opt/shaders-msl/frag/control-dependent-in-branch.desktop.frag
new file mode 100644
index 0000000000..b75f86d23c
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/control-dependent-in-branch.desktop.frag
@@ -0,0 +1,45 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float4 vInput [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], texture2d<float> uSampler [[texture(0)]], sampler uSamplerSmplr [[sampler(0)]])
+{
+    main0_out out = {};
+    out.FragColor = in.vInput;
+    float4 _23 = uSampler.sample(uSamplerSmplr, in.vInput.xy);
+    float4 _26 = dfdx(in.vInput);
+    float4 _29 = dfdy(in.vInput);
+    float4 _32 = fwidth(in.vInput);
+    float4 _35 = dfdx(in.vInput);
+    float4 _38 = dfdy(in.vInput);
+    float4 _41 = fwidth(in.vInput);
+    float4 _44 = dfdx(in.vInput);
+    float4 _47 = dfdy(in.vInput);
+    float4 _50 = fwidth(in.vInput);
+    if (in.vInput.y > 10.0)
+    {
+        out.FragColor += _23;
+        out.FragColor += _26;
+        out.FragColor += _29;
+        out.FragColor += _32;
+        out.FragColor += _35;
+        out.FragColor += _38;
+        out.FragColor += _41;
+        out.FragColor += _44;
+        out.FragColor += _47;
+        out.FragColor += _50;
+    }
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/dual-source-blending.frag b/reference/opt/shaders-msl/frag/dual-source-blending.frag
new file mode 100644
index 0000000000..37938bf8ca
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/dual-source-blending.frag
@@ -0,0 +1,19 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor0 [[color(0), index(0)]];
+    float4 FragColor1 [[color(0), index(1)]];
+};
+
+fragment main0_out main0()
+{
+    main0_out out = {};
+    out.FragColor0 = float4(1.0);
+    out.FragColor1 = float4(2.0);
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/false-loop-init.frag b/reference/opt/shaders-msl/frag/false-loop-init.frag
index c67bb9d396..9233caa05d 100644
--- a/reference/opt/shaders-msl/frag/false-loop-init.frag
+++ b/reference/opt/shaders-msl/frag/false-loop-init.frag
@@ -3,35 +3,25 @@
 
 using namespace metal;
 
-constant uint _49 = {};
-
-struct main0_in
-{
-    float4 accum [[user(locn0)]];
-};
-
 struct main0_out
 {
     float4 result [[color(0)]];
 };
 
+struct main0_in
+{
+    float4 accum [[user(locn0)]];
+};
+
 fragment main0_out main0(main0_in in [[stage_in]])
 {
     main0_out out = {};
     out.result = float4(0.0);
-    uint _51;
-    uint _50;
-    for (int _48 = 0; _48 < 4; _48 += int(_51), _50 = _51)
+    for (int _48 = 0; _48 < 4; )
     {
-        if (in.accum.y > 10.0)
-        {
-            _51 = 40u;
-        }
-        else
-        {
-            _51 = 30u;
-        }
         out.result += in.accum;
+        _48 += int((in.accum.y > 10.0) ? 40u : 30u);
+        continue;
     }
     return out;
 }
diff --git a/reference/opt/shaders-msl/frag/flush_params.frag b/reference/opt/shaders-msl/frag/flush_params.frag
index 059167fd4b..64edee8722 100644
--- a/reference/opt/shaders-msl/frag/flush_params.frag
+++ b/reference/opt/shaders-msl/frag/flush_params.frag
@@ -3,11 +3,6 @@
 
 using namespace metal;
 
-struct Structy
-{
-    float4 c;
-};
-
 struct main0_out
 {
     float4 FragColor [[color(0)]];
diff --git a/reference/opt/shaders-msl/frag/for-loop-init.frag b/reference/opt/shaders-msl/frag/for-loop-init.frag
index 0e5c92c7e5..71c156f8da 100644
--- a/reference/opt/shaders-msl/frag/for-loop-init.frag
+++ b/reference/opt/shaders-msl/frag/for-loop-init.frag
@@ -3,6 +3,9 @@
 
 using namespace metal;
 
+constant int2 _184 = {};
+constant int _199 = {};
+
 struct main0_out
 {
     int FragColor [[color(0)]];
@@ -12,46 +15,70 @@ fragment main0_out main0()
 {
     main0_out out = {};
     out.FragColor = 16;
-    for (int _140 = 0; _140 < 25; _140++)
+    for (int _168 = 0; _168 < 25; )
     {
         out.FragColor += 10;
+        _168++;
+        continue;
     }
-    for (int _141 = 1; _141 < 30; _141++)
+    for (int _169 = 1; _169 < 30; )
     {
         out.FragColor += 11;
+        _169++;
+        continue;
     }
-    int _142;
-    _142 = 0;
-    for (; _142 < 20; _142++)
+    int _170;
+    _170 = 0;
+    for (; _170 < 20; )
     {
         out.FragColor += 12;
+        _170++;
+        continue;
     }
-    int _62 = _142 + 3;
+    int _62 = _170 + 3;
     out.FragColor += _62;
-    if (_62 == 40)
+    bool _68 = _62 == 40;
+    if (_68)
     {
-        for (int _143 = 0; _143 < 40; _143++)
+        for (int _171 = 0; _171 < 40; )
         {
             out.FragColor += 13;
+            _171++;
+            continue;
         }
-        return out;
     }
     else
     {
         out.FragColor += _62;
     }
-    int2 _144;
-    _144 = int2(0);
-    int2 _139;
-    for (; _144.x < 10; _139 = _144, _139.x = _144.x + 4, _144 = _139)
+    bool2 _211 = bool2(_68);
+    int2 _212 = int2(_211.x ? _184.x : _184.x, _211.y ? _184.y : _184.y);
+    bool _213 = _68 ? true : false;
+    bool2 _214 = bool2(_213);
+    if (!_213)
     {
-        out.FragColor += _144.y;
+        int2 _177;
+        _177 = int2(_214.x ? _212.x : int2(0).x, _214.y ? _212.y : int2(0).y);
+        for (; _177.x < 10; )
+        {
+            out.FragColor += _177.y;
+            int2 _167 = _177;
+            _167.x = _177.x + 4;
+            _177 = _167;
+            continue;
+        }
     }
-    for (int _145 = _62; _145 < 40; _145++)
+    int _216 = _213 ? (_68 ? _199 : _199) : _62;
+    if (!_213)
     {
-        out.FragColor += _145;
+        for (int _191 = _216; _191 < 40; )
+        {
+            out.FragColor += _191;
+            _191++;
+            continue;
+        }
+        out.FragColor += _216;
     }
-    out.FragColor += _62;
     return out;
 }
 
diff --git a/reference/opt/shaders-msl/frag/fp16-packing.frag b/reference/opt/shaders-msl/frag/fp16-packing.frag
new file mode 100644
index 0000000000..358681f6dd
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/fp16-packing.frag
@@ -0,0 +1,25 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float2 FP32Out [[color(0)]];
+    uint FP16Out [[color(1)]];
+};
+
+struct main0_in
+{
+    uint FP16 [[user(locn0)]];
+    float2 FP32 [[user(locn1)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    out.FP32Out = float2(as_type<half2>(in.FP16));
+    out.FP16Out = as_type<uint>(half2(in.FP32));
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/fp16.desktop.frag b/reference/opt/shaders-msl/frag/fp16.desktop.frag
new file mode 100644
index 0000000000..d9a0390e1f
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/fp16.desktop.frag
@@ -0,0 +1,16 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_in
+{
+    half4 v4 [[user(locn3)]];
+};
+
+fragment void main0(main0_in in [[stage_in]])
+{
+    half4 _491;
+    half4 _563 = modf(in.v4, _491);
+}
+
diff --git a/reference/opt/shaders-msl/frag/front-facing.frag b/reference/opt/shaders-msl/frag/front-facing.frag
index 3856498943..2f83642492 100644
--- a/reference/opt/shaders-msl/frag/front-facing.frag
+++ b/reference/opt/shaders-msl/frag/front-facing.frag
@@ -3,17 +3,17 @@
 
 using namespace metal;
 
-struct main0_in
-{
-    float4 vB [[user(locn1)]];
-    float4 vA [[user(locn0)]];
-};
-
 struct main0_out
 {
     float4 FragColor [[color(0)]];
 };
 
+struct main0_in
+{
+    float4 vA [[user(locn0)]];
+    float4 vB [[user(locn1)]];
+};
+
 fragment main0_out main0(main0_in in [[stage_in]], bool gl_FrontFacing [[front_facing]])
 {
     main0_out out = {};
diff --git a/reference/opt/shaders-msl/frag/gather-dref.frag b/reference/opt/shaders-msl/frag/gather-dref.frag
new file mode 100644
index 0000000000..c5c5ccf0bb
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/gather-dref.frag
@@ -0,0 +1,22 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float3 vUV [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], depth2d<float> uT [[texture(0)]], sampler uTSmplr [[sampler(0)]])
+{
+    main0_out out = {};
+    out.FragColor = uT.gather_compare(uTSmplr, in.vUV.xy, in.vUV.z);
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/gather-offset.frag b/reference/opt/shaders-msl/frag/gather-offset.frag
new file mode 100644
index 0000000000..02b80194b5
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/gather-offset.frag
@@ -0,0 +1,17 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+fragment main0_out main0(texture2d<float> uT [[texture(0)]], sampler uTSmplr [[sampler(0)]])
+{
+    main0_out out = {};
+    out.FragColor = uT.gather(uTSmplr, float2(0.5), int2(0), component::w);
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/illegal-name-test-0.frag b/reference/opt/shaders-msl/frag/illegal-name-test-0.frag
new file mode 100644
index 0000000000..81cd3b5621
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/illegal-name-test-0.frag
@@ -0,0 +1,17 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+fragment main0_out main0()
+{
+    main0_out out = {};
+    out.FragColor = float4(40.0);
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/in_block.frag b/reference/opt/shaders-msl/frag/in_block.frag
index 43b4a05897..2af2024f52 100644
--- a/reference/opt/shaders-msl/frag/in_block.frag
+++ b/reference/opt/shaders-msl/frag/in_block.frag
@@ -3,17 +3,17 @@
 
 using namespace metal;
 
-struct main0_in
-{
-    float4 VertexOut_color2 [[user(locn3)]];
-    float4 VertexOut_color [[user(locn2)]];
-};
-
 struct main0_out
 {
     float4 FragColor [[color(0)]];
 };
 
+struct main0_in
+{
+    float4 VertexOut_color [[user(locn2)]];
+    float4 VertexOut_color2 [[user(locn3)]];
+};
+
 fragment main0_out main0(main0_in in [[stage_in]])
 {
     main0_out out = {};
diff --git a/reference/opt/shaders-msl/frag/in_mat.frag b/reference/opt/shaders-msl/frag/in_mat.frag
new file mode 100644
index 0000000000..905a3a2759
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/in_mat.frag
@@ -0,0 +1,37 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 outFragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float3 inPos [[user(locn0)]];
+    float3 inNormal [[user(locn1)]];
+    float4 inInvModelView_0 [[user(locn2)]];
+    float4 inInvModelView_1 [[user(locn3)]];
+    float4 inInvModelView_2 [[user(locn4)]];
+    float4 inInvModelView_3 [[user(locn5)]];
+    float inLodBias [[user(locn6)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], texturecube<float> samplerColor [[texture(1)]], sampler samplerColorSmplr [[sampler(1)]])
+{
+    main0_out out = {};
+    float4x4 inInvModelView = {};
+    inInvModelView[0] = in.inInvModelView_0;
+    inInvModelView[1] = in.inInvModelView_1;
+    inInvModelView[2] = in.inInvModelView_2;
+    inInvModelView[3] = in.inInvModelView_3;
+    float4 _31 = inInvModelView * float4(reflect(normalize(in.inPos), normalize(in.inNormal)), 0.0);
+    float _33 = _31.x;
+    float3 _60 = float3(_33, _31.yz);
+    _60.x = _33 * (-1.0);
+    out.outFragColor = samplerColor.sample(samplerColorSmplr, _60, bias(in.inLodBias));
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/mix.frag b/reference/opt/shaders-msl/frag/mix.frag
index 9c9b8398cf..5e385087bc 100644
--- a/reference/opt/shaders-msl/frag/mix.frag
+++ b/reference/opt/shaders-msl/frag/mix.frag
@@ -3,27 +3,26 @@
 
 using namespace metal;
 
-struct main0_in
-{
-    float vIn3 [[user(locn3)]];
-    float vIn2 [[user(locn2)]];
-    float4 vIn1 [[user(locn1)]];
-    float4 vIn0 [[user(locn0)]];
-};
-
 struct main0_out
 {
     float4 FragColor [[color(0)]];
 };
 
+struct main0_in
+{
+    float4 vIn0 [[user(locn0)]];
+    float4 vIn1 [[user(locn1)]];
+    float vIn2 [[user(locn2)]];
+    float vIn3 [[user(locn3)]];
+};
+
 fragment main0_out main0(main0_in in [[stage_in]])
 {
     main0_out out = {};
-    out.FragColor = float4(bool4(false, true, false, false).x ? in.vIn1.x : in.vIn0.x, bool4(false, true, false, false).y ? in.vIn1.y : in.vIn0.y, bool4(false, true, false, false).z ? in.vIn1.z : in.vIn0.z, bool4(false, true, false, false).w ? in.vIn1.w : in.vIn0.w);
-    out.FragColor = float4(true ? in.vIn3 : in.vIn2);
-    bool4 _37 = bool4(true);
-    out.FragColor = float4(_37.x ? in.vIn0.x : in.vIn1.x, _37.y ? in.vIn0.y : in.vIn1.y, _37.z ? in.vIn0.z : in.vIn1.z, _37.w ? in.vIn0.w : in.vIn1.w);
-    out.FragColor = float4(true ? in.vIn2 : in.vIn3);
+    out.FragColor = float4(in.vIn0.x, in.vIn1.y, in.vIn0.z, in.vIn0.w);
+    out.FragColor = float4(in.vIn3);
+    out.FragColor = in.vIn0.xyzw;
+    out.FragColor = float4(in.vIn2);
     return out;
 }
 
diff --git a/reference/opt/shaders-msl/frag/mrt-array.frag b/reference/opt/shaders-msl/frag/mrt-array.frag
new file mode 100644
index 0000000000..d7cea6baf9
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/mrt-array.frag
@@ -0,0 +1,43 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor_0 [[color(0)]];
+    float4 FragColor_1 [[color(1)]];
+    float4 FragColor_2 [[color(2)]];
+    float4 FragColor_3 [[color(3)]];
+};
+
+struct main0_in
+{
+    float4 vA [[user(locn0)]];
+    float4 vB [[user(locn1)]];
+};
+
+// Implementation of the GLSL mod() function, which is slightly different than Metal fmod()
+template<typename Tx, typename Ty>
+Tx mod(Tx x, Ty y)
+{
+    return x - y * floor(x / y);
+}
+
+fragment main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    float4 FragColor[4] = {};
+    FragColor[0] = mod(in.vA, in.vB);
+    FragColor[1] = in.vA + in.vB;
+    FragColor[2] = in.vA - in.vB;
+    FragColor[3] = in.vA * in.vB;
+    out.FragColor_0 = FragColor[0];
+    out.FragColor_1 = FragColor[1];
+    out.FragColor_2 = FragColor[2];
+    out.FragColor_3 = FragColor[3];
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/packed-expression-vector-shuffle.frag b/reference/opt/shaders-msl/frag/packed-expression-vector-shuffle.frag
new file mode 100644
index 0000000000..dd319af555
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/packed-expression-vector-shuffle.frag
@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct UBO
+{
+    packed_float3 color;
+    float v;
+};
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+fragment main0_out main0(constant UBO& _15 [[buffer(0)]])
+{
+    main0_out out = {};
+    out.FragColor = float4(_15.color[0], _15.color[1], _15.color[2], float4(1.0).w);
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/packing-test-3.frag b/reference/opt/shaders-msl/frag/packing-test-3.frag
new file mode 100644
index 0000000000..436829e85c
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/packing-test-3.frag
@@ -0,0 +1,28 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct TestStruct
+{
+    packed_float3 position;
+    float radius;
+};
+
+struct CB0
+{
+    TestStruct CB0[16];
+};
+
+struct main0_out
+{
+    float4 _entryPointOutput [[color(0)]];
+};
+
+fragment main0_out main0(constant CB0& _26 [[buffer(0)]], float4 gl_FragCoord [[position]])
+{
+    main0_out out = {};
+    out._entryPointOutput = float4(_26.CB0[1].position[0], _26.CB0[1].position[1], _26.CB0[1].position[2], _26.CB0[1].radius);
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/pls.frag b/reference/opt/shaders-msl/frag/pls.frag
index 42b5d2bf59..ee774a04af 100644
--- a/reference/opt/shaders-msl/frag/pls.frag
+++ b/reference/opt/shaders-msl/frag/pls.frag
@@ -3,14 +3,6 @@
 
 using namespace metal;
 
-struct main0_in
-{
-    float4 PLSIn3 [[user(locn3)]];
-    float4 PLSIn2 [[user(locn2)]];
-    float4 PLSIn1 [[user(locn1)]];
-    float4 PLSIn0 [[user(locn0)]];
-};
-
 struct main0_out
 {
     float4 PLSOut0 [[color(0)]];
@@ -19,6 +11,14 @@ struct main0_out
     float4 PLSOut3 [[color(3)]];
 };
 
+struct main0_in
+{
+    float4 PLSIn0 [[user(locn0)]];
+    float4 PLSIn1 [[user(locn1)]];
+    float4 PLSIn2 [[user(locn2)]];
+    float4 PLSIn3 [[user(locn3)]];
+};
+
 fragment main0_out main0(main0_in in [[stage_in]])
 {
     main0_out out = {};
diff --git a/reference/opt/shaders-msl/frag/readonly-ssbo.frag b/reference/opt/shaders-msl/frag/readonly-ssbo.frag
new file mode 100644
index 0000000000..777fd65c8a
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/readonly-ssbo.frag
@@ -0,0 +1,22 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO
+{
+    float4 v;
+};
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+fragment main0_out main0(const device SSBO& _13 [[buffer(0)]])
+{
+    main0_out out = {};
+    out.FragColor = _13.v + _13.v;
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/sample-depth-separate-image-sampler.frag b/reference/opt/shaders-msl/frag/sample-depth-separate-image-sampler.frag
index 6626946c45..17e1421a3d 100644
--- a/reference/opt/shaders-msl/frag/sample-depth-separate-image-sampler.frag
+++ b/reference/opt/shaders-msl/frag/sample-depth-separate-image-sampler.frag
@@ -8,7 +8,7 @@ struct main0_out
     float FragColor [[color(0)]];
 };
 
-fragment main0_out main0(depth2d<float> uDepth [[texture(0)]], texture2d<float> uColor [[texture(1)]], sampler uSamplerShadow [[sampler(0)]], sampler uSampler [[sampler(1)]])
+fragment main0_out main0(depth2d<float> uDepth [[texture(0)]], texture2d<float> uColor [[texture(1)]], sampler uSampler [[sampler(2)]], sampler uSamplerShadow [[sampler(3)]])
 {
     main0_out out = {};
     out.FragColor = uDepth.sample_compare(uSamplerShadow, float3(0.5).xy, 0.5) + uColor.sample(uSampler, float2(0.5)).x;
diff --git a/reference/opt/shaders-msl/frag/sampler-1d-lod.frag b/reference/opt/shaders-msl/frag/sampler-1d-lod.frag
new file mode 100644
index 0000000000..1da2036e3a
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/sampler-1d-lod.frag
@@ -0,0 +1,22 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float vTex [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], texture1d<float> uSampler [[texture(0)]], sampler uSamplerSmplr [[sampler(0)]])
+{
+    main0_out out = {};
+    out.FragColor += ((uSampler.sample(uSamplerSmplr, in.vTex) + uSampler.sample(uSamplerSmplr, in.vTex)) + uSampler.sample(uSamplerSmplr, in.vTex));
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/sampler-image-arrays.msl2.frag b/reference/opt/shaders-msl/frag/sampler-image-arrays.msl2.frag
new file mode 100644
index 0000000000..5e9c488d5d
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/sampler-image-arrays.msl2.frag
@@ -0,0 +1,28 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float2 vTex [[user(locn0)]];
+    int vIndex [[user(locn1)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], array<texture2d<float>, 4> uSampler [[texture(0)]], array<texture2d<float>, 4> uTextures [[texture(8)]], array<sampler, 4> uSamplerSmplr [[sampler(0)]], array<sampler, 4> uSamplers [[sampler(4)]])
+{
+    main0_out out = {};
+    out.FragColor = float4(0.0);
+    out.FragColor += uTextures[2].sample(uSamplers[1], in.vTex);
+    out.FragColor += uSampler[in.vIndex].sample(uSamplerSmplr[in.vIndex], in.vTex);
+    out.FragColor += uSampler[in.vIndex].sample(uSamplerSmplr[in.vIndex], (in.vTex + float2(0.100000001490116119384765625)));
+    out.FragColor += uSampler[in.vIndex].sample(uSamplerSmplr[in.vIndex], (in.vTex + float2(0.20000000298023223876953125)));
+    out.FragColor += uSampler[3].sample(uSamplerSmplr[3], (in.vTex + float2(0.300000011920928955078125)));
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/sampler.frag b/reference/opt/shaders-msl/frag/sampler.frag
index 4d33ee7bca..f33db61eba 100644
--- a/reference/opt/shaders-msl/frag/sampler.frag
+++ b/reference/opt/shaders-msl/frag/sampler.frag
@@ -3,17 +3,17 @@
 
 using namespace metal;
 
-struct main0_in
-{
-    float2 vTex [[user(locn1)]];
-    float4 vColor [[user(locn0)]];
-};
-
 struct main0_out
 {
     float4 FragColor [[color(0)]];
 };
 
+struct main0_in
+{
+    float4 vColor [[user(locn0)]];
+    float2 vTex [[user(locn1)]];
+};
+
 fragment main0_out main0(main0_in in [[stage_in]], texture2d<float> uTex [[texture(0)]], sampler uTexSmplr [[sampler(0)]])
 {
     main0_out out = {};
diff --git a/reference/opt/shaders-msl/frag/separate-image-sampler-argument.frag b/reference/opt/shaders-msl/frag/separate-image-sampler-argument.frag
index e576b49e7e..b1261f3722 100644
--- a/reference/opt/shaders-msl/frag/separate-image-sampler-argument.frag
+++ b/reference/opt/shaders-msl/frag/separate-image-sampler-argument.frag
@@ -8,7 +8,7 @@ struct main0_out
     float4 FragColor [[color(0)]];
 };
 
-fragment main0_out main0(texture2d<float> uDepth [[texture(0)]], sampler uSampler [[sampler(0)]])
+fragment main0_out main0(texture2d<float> uDepth [[texture(1)]], sampler uSampler [[sampler(0)]])
 {
     main0_out out = {};
     out.FragColor = uDepth.sample(uSampler, float2(0.5));
diff --git a/reference/opt/shaders-msl/frag/shadow-compare-global-alias.frag b/reference/opt/shaders-msl/frag/shadow-compare-global-alias.frag
new file mode 100644
index 0000000000..926172577d
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/shadow-compare-global-alias.frag
@@ -0,0 +1,27 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float3 vUV [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], depth2d<float> uSampler [[texture(0)]], depth2d<float> uTex [[texture(1)]], sampler uSamplerSmplr [[sampler(0)]], sampler uSamp [[sampler(2)]])
+{
+    main0_out out = {};
+    out.FragColor = uSampler.sample_compare(uSamplerSmplr, in.vUV.xy, in.vUV.z);
+    out.FragColor += uTex.sample_compare(uSamp, in.vUV.xy, in.vUV.z);
+    out.FragColor += uTex.sample_compare(uSamp, in.vUV.xy, in.vUV.z);
+    out.FragColor += uSampler.sample_compare(uSamplerSmplr, in.vUV.xy, in.vUV.z);
+    out.FragColor += uTex.sample_compare(uSamp, in.vUV.xy, in.vUV.z);
+    out.FragColor += uSampler.sample_compare(uSamplerSmplr, in.vUV.xy, in.vUV.z);
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/spec-constant-block-size.frag b/reference/opt/shaders-msl/frag/spec-constant-block-size.frag
index 4237d941fe..445f4362de 100644
--- a/reference/opt/shaders-msl/frag/spec-constant-block-size.frag
+++ b/reference/opt/shaders-msl/frag/spec-constant-block-size.frag
@@ -8,16 +8,16 @@ struct SpecConstArray
     float4 samples[2];
 };
 
-struct main0_in
-{
-    int Index [[user(locn0)]];
-};
-
 struct main0_out
 {
     float4 FragColor [[color(0)]];
 };
 
+struct main0_in
+{
+    int Index [[user(locn0)]];
+};
+
 fragment main0_out main0(main0_in in [[stage_in]], constant SpecConstArray& _15 [[buffer(0)]])
 {
     main0_out out = {};
diff --git a/reference/opt/shaders-msl/frag/swizzle.frag b/reference/opt/shaders-msl/frag/swizzle.frag
index eb46111f00..7a0494e064 100644
--- a/reference/opt/shaders-msl/frag/swizzle.frag
+++ b/reference/opt/shaders-msl/frag/swizzle.frag
@@ -3,17 +3,17 @@
 
 using namespace metal;
 
-struct main0_in
-{
-    float2 vUV [[user(locn2)]];
-    float3 vNormal [[user(locn1)]];
-};
-
 struct main0_out
 {
     float4 FragColor [[color(0)]];
 };
 
+struct main0_in
+{
+    float3 vNormal [[user(locn1)]];
+    float2 vUV [[user(locn2)]];
+};
+
 fragment main0_out main0(main0_in in [[stage_in]], texture2d<float> samp [[texture(0)]], sampler sampSmplr [[sampler(0)]])
 {
     main0_out out = {};
diff --git a/reference/opt/shaders-msl/frag/texture-proj-shadow.frag b/reference/opt/shaders-msl/frag/texture-proj-shadow.frag
index 8b9b03a59e..c5ab0ee007 100644
--- a/reference/opt/shaders-msl/frag/texture-proj-shadow.frag
+++ b/reference/opt/shaders-msl/frag/texture-proj-shadow.frag
@@ -3,19 +3,19 @@
 
 using namespace metal;
 
-struct main0_in
-{
-    float2 vClip2 [[user(locn2)]];
-    float4 vClip4 [[user(locn1)]];
-    float3 vClip3 [[user(locn0)]];
-};
-
 struct main0_out
 {
     float FragColor [[color(0)]];
 };
 
-fragment main0_out main0(main0_in in [[stage_in]], depth2d<float> uShadow2D [[texture(0)]], texture1d<float> uSampler1D [[texture(1)]], texture2d<float> uSampler2D [[texture(2)]], texture3d<float> uSampler3D [[texture(3)]], sampler uShadow2DSmplr [[sampler(0)]], sampler uSampler1DSmplr [[sampler(1)]], sampler uSampler2DSmplr [[sampler(2)]], sampler uSampler3DSmplr [[sampler(3)]])
+struct main0_in
+{
+    float3 vClip3 [[user(locn0)]];
+    float4 vClip4 [[user(locn1)]];
+    float2 vClip2 [[user(locn2)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], depth2d<float> uShadow2D [[texture(1)]], texture1d<float> uSampler1D [[texture(2)]], texture2d<float> uSampler2D [[texture(3)]], texture3d<float> uSampler3D [[texture(4)]], sampler uShadow2DSmplr [[sampler(1)]], sampler uSampler1DSmplr [[sampler(2)]], sampler uSampler2DSmplr [[sampler(3)]], sampler uSampler3DSmplr [[sampler(4)]])
 {
     main0_out out = {};
     float4 _20 = in.vClip4;
diff --git a/reference/opt/shaders-msl/frag/ubo_layout.frag b/reference/opt/shaders-msl/frag/ubo_layout.frag
index 8c03e33b39..0bc27462b2 100644
--- a/reference/opt/shaders-msl/frag/ubo_layout.frag
+++ b/reference/opt/shaders-msl/frag/ubo_layout.frag
@@ -13,9 +13,14 @@ struct UBO1
     Str foo;
 };
 
+struct Str_1
+{
+    float4x4 foo;
+};
+
 struct UBO2
 {
-    Str foo;
+    Str_1 foo;
 };
 
 struct main0_out
diff --git a/reference/opt/shaders-msl/frag/unary-enclose.frag b/reference/opt/shaders-msl/frag/unary-enclose.frag
index 7437f1dfe8..c8648f1e01 100644
--- a/reference/opt/shaders-msl/frag/unary-enclose.frag
+++ b/reference/opt/shaders-msl/frag/unary-enclose.frag
@@ -3,20 +3,20 @@
 
 using namespace metal;
 
-struct main0_in
-{
-    float4 vIn [[user(locn0)]];
-};
-
 struct main0_out
 {
     float4 FragColor [[color(0)]];
 };
 
+struct main0_in
+{
+    float4 vIn [[user(locn0)]];
+};
+
 fragment main0_out main0(main0_in in [[stage_in]])
 {
     main0_out out = {};
-    out.FragColor = -(-in.vIn);
+    out.FragColor = in.vIn;
     return out;
 }
 
diff --git a/reference/opt/shaders-msl/legacy/vert/transpose.legacy.vert b/reference/opt/shaders-msl/legacy/vert/transpose.legacy.vert
index b28067e589..abd884ca8f 100644
--- a/reference/opt/shaders-msl/legacy/vert/transpose.legacy.vert
+++ b/reference/opt/shaders-msl/legacy/vert/transpose.legacy.vert
@@ -10,16 +10,16 @@ struct Buffer
     float4x4 M;
 };
 
-struct main0_in
-{
-    float4 Position [[attribute(0)]];
-};
-
 struct main0_out
 {
     float4 gl_Position [[position]];
 };
 
+struct main0_in
+{
+    float4 Position [[attribute(0)]];
+};
+
 vertex main0_out main0(main0_in in [[stage_in]], constant Buffer& _13 [[buffer(0)]])
 {
     main0_out out = {};
diff --git a/reference/opt/shaders-msl/vert/basic.vert b/reference/opt/shaders-msl/vert/basic.vert
index 1592b5c5cf..ffb4357126 100644
--- a/reference/opt/shaders-msl/vert/basic.vert
+++ b/reference/opt/shaders-msl/vert/basic.vert
@@ -8,18 +8,18 @@ struct UBO
     float4x4 uMVP;
 };
 
-struct main0_in
-{
-    float3 aNormal [[attribute(1)]];
-    float4 aVertex [[attribute(0)]];
-};
-
 struct main0_out
 {
     float3 vNormal [[user(locn0)]];
     float4 gl_Position [[position]];
 };
 
+struct main0_in
+{
+    float4 aVertex [[attribute(0)]];
+    float3 aNormal [[attribute(1)]];
+};
+
 vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _16 [[buffer(0)]])
 {
     main0_out out = {};
diff --git a/reference/opt/shaders-msl/vert/copy.flatten.vert b/reference/opt/shaders-msl/vert/copy.flatten.vert
index dc87c849dc..4cc68e83f5 100644
--- a/reference/opt/shaders-msl/vert/copy.flatten.vert
+++ b/reference/opt/shaders-msl/vert/copy.flatten.vert
@@ -16,27 +16,29 @@ struct UBO
     Light lights[4];
 };
 
-struct main0_in
-{
-    float3 aNormal [[attribute(1)]];
-    float4 aVertex [[attribute(0)]];
-};
-
 struct main0_out
 {
     float4 vColor [[user(locn0)]];
     float4 gl_Position [[position]];
 };
 
+struct main0_in
+{
+    float4 aVertex [[attribute(0)]];
+    float3 aNormal [[attribute(1)]];
+};
+
 vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _21 [[buffer(0)]])
 {
     main0_out out = {};
     out.gl_Position = _21.uMVP * in.aVertex;
     out.vColor = float4(0.0);
-    for (int _103 = 0; _103 < 4; _103++)
+    for (int _96 = 0; _96 < 4; )
     {
-        float3 _68 = in.aVertex.xyz - _21.lights[_103].Position;
-        out.vColor += ((_21.lights[_103].Color * clamp(1.0 - (length(_68) / _21.lights[_103].Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(_68)));
+        float3 _68 = in.aVertex.xyz - _21.lights[_96].Position;
+        out.vColor += ((_21.lights[_96].Color * clamp(1.0 - (length(_68) / _21.lights[_96].Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(_68)));
+        _96++;
+        continue;
     }
     return out;
 }
diff --git a/reference/opt/shaders-msl/vert/dynamic.flatten.vert b/reference/opt/shaders-msl/vert/dynamic.flatten.vert
index eb38ab4fd1..38354358e2 100644
--- a/reference/opt/shaders-msl/vert/dynamic.flatten.vert
+++ b/reference/opt/shaders-msl/vert/dynamic.flatten.vert
@@ -16,27 +16,29 @@ struct UBO
     Light lights[4];
 };
 
-struct main0_in
-{
-    float3 aNormal [[attribute(1)]];
-    float4 aVertex [[attribute(0)]];
-};
-
 struct main0_out
 {
     float4 vColor [[user(locn0)]];
     float4 gl_Position [[position]];
 };
 
+struct main0_in
+{
+    float4 aVertex [[attribute(0)]];
+    float3 aNormal [[attribute(1)]];
+};
+
 vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _21 [[buffer(0)]])
 {
     main0_out out = {};
     out.gl_Position = _21.uMVP * in.aVertex;
     out.vColor = float4(0.0);
-    for (int _82 = 0; _82 < 4; _82++)
+    for (int _82 = 0; _82 < 4; )
     {
-        float3 _54 = in.aVertex.xyz - _21.lights[_82].Position;
+        float3 _54 = in.aVertex.xyz - float3(_21.lights[_82].Position);
         out.vColor += ((_21.lights[_82].Color * clamp(1.0 - (length(_54) / _21.lights[_82].Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(_54)));
+        _82++;
+        continue;
     }
     return out;
 }
diff --git a/reference/opt/shaders-msl/vert/functions.vert b/reference/opt/shaders-msl/vert/functions.vert
index 6e07667b69..f710225261 100644
--- a/reference/opt/shaders-msl/vert/functions.vert
+++ b/reference/opt/shaders-msl/vert/functions.vert
@@ -13,12 +13,6 @@ struct UBO
     int2 bits;
 };
 
-struct main0_in
-{
-    float3 aNormal [[attribute(1)]];
-    float4 aVertex [[attribute(0)]];
-};
-
 struct main0_out
 {
     float3 vNormal [[user(locn0)]];
@@ -29,18 +23,24 @@ struct main0_out
     float4 gl_Position [[position]];
 };
 
+struct main0_in
+{
+    float4 aVertex [[attribute(0)]];
+    float3 aNormal [[attribute(1)]];
+};
+
 // Implementation of the GLSL radians() function
 template<typename T>
 T radians(T d)
 {
-    return d * 0.01745329251;
+    return d * T(0.01745329251);
 }
 
 // Implementation of the GLSL degrees() function
 template<typename T>
 T degrees(T r)
 {
-    return r * 57.2957795131;
+    return r * T(57.2957795131);
 }
 
 // Implementation of the GLSL findLSB() function
diff --git a/reference/opt/shaders-msl/vert/in_out_array_mat.vert b/reference/opt/shaders-msl/vert/in_out_array_mat.vert
new file mode 100644
index 0000000000..0d6976e260
--- /dev/null
+++ b/reference/opt/shaders-msl/vert/in_out_array_mat.vert
@@ -0,0 +1,67 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct UBO
+{
+    float4x4 projection;
+    float4x4 model;
+    float lodBias;
+};
+
+struct main0_out
+{
+    float3 outPos [[user(locn0)]];
+    float3 outNormal [[user(locn1)]];
+    float4 outTransModel_0 [[user(locn2)]];
+    float4 outTransModel_1 [[user(locn3)]];
+    float4 outTransModel_2 [[user(locn4)]];
+    float4 outTransModel_3 [[user(locn5)]];
+    float outLodBias [[user(locn6)]];
+    float4 color [[user(locn7)]];
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float3 inPos [[attribute(0)]];
+    float4 colors_0 [[attribute(1)]];
+    float4 colors_1 [[attribute(2)]];
+    float4 colors_2 [[attribute(3)]];
+    float3 inNormal [[attribute(4)]];
+    float4 inViewMat_0 [[attribute(5)]];
+    float4 inViewMat_1 [[attribute(6)]];
+    float4 inViewMat_2 [[attribute(7)]];
+    float4 inViewMat_3 [[attribute(8)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]], constant UBO& ubo [[buffer(0)]])
+{
+    main0_out out = {};
+    float4x4 outTransModel = {};
+    float4 colors[3] = {};
+    float4x4 inViewMat = {};
+    colors[0] = in.colors_0;
+    colors[1] = in.colors_1;
+    colors[2] = in.colors_2;
+    inViewMat[0] = in.inViewMat_0;
+    inViewMat[1] = in.inViewMat_1;
+    inViewMat[2] = in.inViewMat_2;
+    inViewMat[3] = in.inViewMat_3;
+    float4 _64 = float4(in.inPos, 1.0);
+    out.gl_Position = (ubo.projection * ubo.model) * _64;
+    out.outPos = float3((ubo.model * _64).xyz);
+    out.outNormal = float3x3(float3(ubo.model[0].x, ubo.model[0].y, ubo.model[0].z), float3(ubo.model[1].x, ubo.model[1].y, ubo.model[1].z), float3(ubo.model[2].x, ubo.model[2].y, ubo.model[2].z)) * in.inNormal;
+    out.outLodBias = ubo.lodBias;
+    outTransModel = transpose(ubo.model) * inViewMat;
+    outTransModel[2] = float4(in.inNormal, 1.0);
+    outTransModel[1].y = ubo.lodBias;
+    out.color = colors[2];
+    out.outTransModel_0 = outTransModel[0];
+    out.outTransModel_1 = outTransModel[1];
+    out.outTransModel_2 = outTransModel[2];
+    out.outTransModel_3 = outTransModel[3];
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/vert/out_block.vert b/reference/opt/shaders-msl/vert/out_block.vert
index 3ae18387a6..cf1334ec0b 100644
--- a/reference/opt/shaders-msl/vert/out_block.vert
+++ b/reference/opt/shaders-msl/vert/out_block.vert
@@ -8,12 +8,6 @@ struct Transform
     float4x4 transform;
 };
 
-struct main0_in
-{
-    float4 color [[attribute(1)]];
-    float3 position [[attribute(0)]];
-};
-
 struct main0_out
 {
     float4 VertexOut_color [[user(locn2)]];
@@ -21,6 +15,12 @@ struct main0_out
     float4 gl_Position [[position]];
 };
 
+struct main0_in
+{
+    float3 position [[attribute(0)]];
+    float4 color [[attribute(1)]];
+};
+
 vertex main0_out main0(main0_in in [[stage_in]], constant Transform& block [[buffer(0)]])
 {
     main0_out out = {};
diff --git a/reference/opt/shaders-msl/vert/packed_matrix.vert b/reference/opt/shaders-msl/vert/packed_matrix.vert
index 53d7d164fa..4483ad79e7 100644
--- a/reference/opt/shaders-msl/vert/packed_matrix.vert
+++ b/reference/opt/shaders-msl/vert/packed_matrix.vert
@@ -26,28 +26,22 @@ struct _42
     float2 _m9;
 };
 
-struct main0_in
-{
-    float4 m_25 [[attribute(0)]];
-};
-
 struct main0_out
 {
     float3 m_72 [[user(locn0)]];
     float4 gl_Position [[position]];
 };
 
-vertex main0_out main0(main0_in in [[stage_in]], constant _15& _17 [[buffer(0)]], constant _42& _44 [[buffer(1)]])
+struct main0_in
+{
+    float4 m_25 [[attribute(0)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]], constant _42& _44 [[buffer(12)]], constant _15& _17 [[buffer(13)]])
 {
     main0_out out = {};
-    float3 _34;
-    do
-    {
-        _34 = normalize(float4(in.m_25.xyz, 0.0) * _17._m1);
-        break;
-    } while (false);
-    float4 _70 = _44._m0 * float4(_44._m3 + (in.m_25.xyz * (_44._m6 + _44._m7)), 1.0);
-    out.m_72 = _34;
+    float4 _70 = _44._m0 * float4(float3(_44._m3) + (in.m_25.xyz * (_44._m6 + _44._m7)), 1.0);
+    out.m_72 = normalize(float4(in.m_25.xyz, 0.0) * _17._m1);
     float4 _95 = _70;
     _95.y = -_70.y;
     out.gl_Position = _95;
diff --git a/reference/opt/shaders-msl/vert/pointsize.vert b/reference/opt/shaders-msl/vert/pointsize.vert
index faf828b4d3..8e5782bde4 100644
--- a/reference/opt/shaders-msl/vert/pointsize.vert
+++ b/reference/opt/shaders-msl/vert/pointsize.vert
@@ -9,12 +9,6 @@ struct params
     float psize;
 };
 
-struct main0_in
-{
-    float4 color0 [[attribute(1)]];
-    float4 position [[attribute(0)]];
-};
-
 struct main0_out
 {
     float4 color [[user(locn0)]];
@@ -22,6 +16,12 @@ struct main0_out
     float gl_PointSize [[point_size]];
 };
 
+struct main0_in
+{
+    float4 position [[attribute(0)]];
+    float4 color0 [[attribute(1)]];
+};
+
 vertex main0_out main0(main0_in in [[stage_in]], constant params& _19 [[buffer(0)]])
 {
     main0_out out = {};
diff --git a/reference/opt/shaders-msl/vert/read-from-row-major-array.vert b/reference/opt/shaders-msl/vert/read-from-row-major-array.vert
new file mode 100644
index 0000000000..9b85a25956
--- /dev/null
+++ b/reference/opt/shaders-msl/vert/read-from-row-major-array.vert
@@ -0,0 +1,37 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct Block
+{
+    float2x3 var[3][4];
+};
+
+struct main0_out
+{
+    float v_vtxResult [[user(locn0)]];
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float4 a_position [[attribute(0)]];
+};
+
+// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization.
+float2x3 spvConvertFromRowMajor2x3(float2x3 m)
+{
+    return float2x3(float3(m[0][0], m[0][2], m[1][1]), float3(m[0][1], m[1][0], m[1][2]));
+}
+
+vertex main0_out main0(main0_in in [[stage_in]], constant Block& _104 [[buffer(0)]])
+{
+    main0_out out = {};
+    out.gl_Position = in.a_position;
+    out.v_vtxResult = ((float(abs(spvConvertFromRowMajor2x3(_104.var[0][0])[0].x - 2.0) < 0.0500000007450580596923828125) * float(abs(spvConvertFromRowMajor2x3(_104.var[0][0])[0].y - 6.0) < 0.0500000007450580596923828125)) * float(abs(spvConvertFromRowMajor2x3(_104.var[0][0])[0].z - (-6.0)) < 0.0500000007450580596923828125)) * ((float(abs(spvConvertFromRowMajor2x3(_104.var[0][0])[1].x) < 0.0500000007450580596923828125) * float(abs(spvConvertFromRowMajor2x3(_104.var[0][0])[1].y - 5.0) < 0.0500000007450580596923828125)) * float(abs(spvConvertFromRowMajor2x3(_104.var[0][0])[1].z - 5.0) < 0.0500000007450580596923828125));
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/vert/return-array.vert b/reference/opt/shaders-msl/vert/return-array.vert
index 7804d2d94f..ce13349a0f 100644
--- a/reference/opt/shaders-msl/vert/return-array.vert
+++ b/reference/opt/shaders-msl/vert/return-array.vert
@@ -3,18 +3,16 @@
 
 using namespace metal;
 
-constant float4 _20[2] = {float4(10.0), float4(20.0)};
+struct main0_out
+{
+    float4 gl_Position [[position]];
+};
 
 struct main0_in
 {
     float4 vInput1 [[attribute(1)]];
 };
 
-struct main0_out
-{
-    float4 gl_Position [[position]];
-};
-
 vertex main0_out main0(main0_in in [[stage_in]])
 {
     main0_out out = {};
diff --git a/reference/opt/shaders-msl/vert/texture_buffer.vert b/reference/opt/shaders-msl/vert/texture_buffer.vert
index 690757b830..f7bcb7918b 100644
--- a/reference/opt/shaders-msl/vert/texture_buffer.vert
+++ b/reference/opt/shaders-msl/vert/texture_buffer.vert
@@ -8,7 +8,7 @@ struct main0_out
     float4 gl_Position [[position]];
 };
 
-vertex main0_out main0(texture2d<float> uSamp [[texture(0)]], texture2d<float> uSampo [[texture(1)]])
+vertex main0_out main0(texture2d<float> uSamp [[texture(4)]], texture2d<float> uSampo [[texture(5)]])
 {
     main0_out out = {};
     out.gl_Position = uSamp.read(uint2(10, 0)) + uSampo.read(uint2(100, 0));
diff --git a/reference/opt/shaders-msl/vert/ubo.alignment.vert b/reference/opt/shaders-msl/vert/ubo.alignment.vert
index 6e48ae0e42..9a7ea56c61 100644
--- a/reference/opt/shaders-msl/vert/ubo.alignment.vert
+++ b/reference/opt/shaders-msl/vert/ubo.alignment.vert
@@ -12,12 +12,6 @@ struct UBO
     float opacity;
 };
 
-struct main0_in
-{
-    float3 aNormal [[attribute(1)]];
-    float4 aVertex [[attribute(0)]];
-};
-
 struct main0_out
 {
     float3 vNormal [[user(locn0)]];
@@ -26,12 +20,18 @@ struct main0_out
     float4 gl_Position [[position]];
 };
 
+struct main0_in
+{
+    float4 aVertex [[attribute(0)]];
+    float3 aNormal [[attribute(1)]];
+};
+
 vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _18 [[buffer(0)]])
 {
     main0_out out = {};
     out.gl_Position = _18.mvp * in.aVertex;
     out.vNormal = in.aNormal;
-    out.vColor = _18.color * _18.opacity;
+    out.vColor = float3(_18.color) * _18.opacity;
     out.vSize = _18.targSize * _18.opacity;
     return out;
 }
diff --git a/reference/opt/shaders-msl/vert/ubo.vert b/reference/opt/shaders-msl/vert/ubo.vert
index 4a1adcd7f6..86ba1e9687 100644
--- a/reference/opt/shaders-msl/vert/ubo.vert
+++ b/reference/opt/shaders-msl/vert/ubo.vert
@@ -8,18 +8,18 @@ struct UBO
     float4x4 mvp;
 };
 
-struct main0_in
-{
-    float3 aNormal [[attribute(1)]];
-    float4 aVertex [[attribute(0)]];
-};
-
 struct main0_out
 {
     float3 vNormal [[user(locn0)]];
     float4 gl_Position [[position]];
 };
 
+struct main0_in
+{
+    float4 aVertex [[attribute(0)]];
+    float3 aNormal [[attribute(1)]];
+};
+
 vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _16 [[buffer(0)]])
 {
     main0_out out = {};
diff --git a/reference/opt/shaders-msl/vulkan/frag/push-constant.vk.frag b/reference/opt/shaders-msl/vulkan/frag/push-constant.vk.frag
index bc97e3cc51..7b8c502b32 100644
--- a/reference/opt/shaders-msl/vulkan/frag/push-constant.vk.frag
+++ b/reference/opt/shaders-msl/vulkan/frag/push-constant.vk.frag
@@ -9,16 +9,16 @@ struct PushConstants
     float4 value1;
 };
 
-struct main0_in
-{
-    float4 vColor [[user(locn0)]];
-};
-
 struct main0_out
 {
     float4 FragColor [[color(0)]];
 };
 
+struct main0_in
+{
+    float4 vColor [[user(locn0)]];
+};
+
 fragment main0_out main0(main0_in in [[stage_in]], constant PushConstants& push [[buffer(0)]])
 {
     main0_out out = {};
diff --git a/reference/opt/shaders/amd/fragmentMaskFetch_subpassInput.vk.nocompat.invalid.frag b/reference/opt/shaders/amd/fragmentMaskFetch_subpassInput.vk.nocompat.invalid.frag
deleted file mode 100644
index d670898481..0000000000
--- a/reference/opt/shaders/amd/fragmentMaskFetch_subpassInput.vk.nocompat.invalid.frag
+++ /dev/null
@@ -1,11 +0,0 @@
-#version 450
-#extension GL_AMD_shader_fragment_mask : require
-
-layout(binding = 0) uniform sampler2DMS t;
-
-void main()
-{
-    vec4 test2 = fragmentFetchAMD(t, 4u);
-    uint testi2 = fragmentMaskFetchAMD(t);
-}
-
diff --git a/reference/opt/shaders/amd/fs.invalid.frag b/reference/opt/shaders/amd/fs.invalid.frag
index 97e7bcd180..aecf69eba7 100644
--- a/reference/opt/shaders/amd/fs.invalid.frag
+++ b/reference/opt/shaders/amd/fs.invalid.frag
@@ -2,9 +2,9 @@
 #extension GL_AMD_shader_fragment_mask : require
 #extension GL_AMD_shader_explicit_vertex_parameter : require
 
-uniform sampler2DMS texture1;
+layout(binding = 0) uniform sampler2DMS texture1;
 
-layout(location = 0) in vec4 vary;
+layout(location = 0) __explicitInterpAMD in vec4 vary;
 
 void main()
 {
diff --git a/reference/opt/shaders/amd/gcn_shader.comp b/reference/opt/shaders/amd/gcn_shader.comp
index 85851de5f9..e4bb67e9ba 100644
--- a/reference/opt/shaders/amd/gcn_shader.comp
+++ b/reference/opt/shaders/amd/gcn_shader.comp
@@ -1,5 +1,6 @@
 #version 450
 #extension GL_ARB_gpu_shader_int64 : require
+#extension GL_AMD_gcn_shader : require
 layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
 
 void main()
diff --git a/reference/opt/shaders/amd/shader_ballot.comp b/reference/opt/shaders/amd/shader_ballot.comp
index 8bdbfc9c0d..6c14bba422 100644
--- a/reference/opt/shaders/amd/shader_ballot.comp
+++ b/reference/opt/shaders/amd/shader_ballot.comp
@@ -9,18 +9,20 @@ layout(binding = 0, std430) buffer inputData
     float inputDataArray[];
 } _12;
 
-layout(binding = 1, std430) buffer outputData
+layout(binding = 1, std430) buffer _10_74
 {
-    float outputDataArray[];
+    float inputDataArray[];
 } _74;
 
 void main()
 {
     float _25 = _12.inputDataArray[gl_LocalInvocationID.x];
     bool _31 = _25 > 0.0;
+    uvec4 _37 = uvec4(unpackUint2x32(ballotARB(_31)), 0u, 0u);
+    uint _44 = mbcntAMD(packUint2x32(uvec2(_37.xy)));
     if (_31)
     {
-        _74.outputDataArray[mbcntAMD(packUint2x32(uvec2(unpackUint2x32(ballotARB(_31)).xy)))] = _25;
+        _74.inputDataArray[_44] = _25;
     }
 }
 
diff --git a/reference/opt/shaders/amd/shader_group_vote.comp b/reference/opt/shaders/amd/shader_group_vote.comp
index 77ea03495f..266998177c 100644
--- a/reference/opt/shaders/amd/shader_group_vote.comp
+++ b/reference/opt/shaders/amd/shader_group_vote.comp
@@ -1,7 +1,14 @@
 #version 450
+#extension GL_ARB_shader_group_vote : require
 layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
 
+layout(binding = 0, std430) buffer inputData
+{
+    float inputDataArray[];
+} _12;
+
 void main()
 {
+    bool _31 = _12.inputDataArray[gl_LocalInvocationID.x] > 0.0;
 }
 
diff --git a/reference/opt/shaders/amd/shader_trinary_minmax.comp b/reference/opt/shaders/amd/shader_trinary_minmax.comp
index 77ea03495f..2644551e4a 100644
--- a/reference/opt/shaders/amd/shader_trinary_minmax.comp
+++ b/reference/opt/shaders/amd/shader_trinary_minmax.comp
@@ -1,4 +1,5 @@
 #version 450
+#extension GL_AMD_shader_trinary_minmax : require
 layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
 
 void main()
diff --git a/reference/opt/shaders/asm/comp/bitcast_iequal.asm.comp b/reference/opt/shaders/asm/comp/bitcast_iequal.asm.comp
index 79398b404b..bdb3eeb9af 100644
--- a/reference/opt/shaders/asm/comp/bitcast_iequal.asm.comp
+++ b/reference/opt/shaders/asm/comp/bitcast_iequal.asm.comp
@@ -15,10 +15,12 @@ layout(binding = 1, std430) buffer _4_6
 
 void main()
 {
-    bvec4 _34 = equal(ivec4(_5._m1), _5._m0);
-    bvec4 _35 = equal(_5._m0, ivec4(_5._m1));
-    bvec4 _36 = equal(_5._m1, _5._m1);
-    bvec4 _37 = equal(_5._m0, _5._m0);
+    ivec4 _30 = _5._m0;
+    uvec4 _31 = _5._m1;
+    bvec4 _34 = equal(ivec4(_31), _30);
+    bvec4 _35 = equal(_30, ivec4(_31));
+    bvec4 _36 = equal(_31, _31);
+    bvec4 _37 = equal(_30, _30);
     _6._m0 = mix(uvec4(0u), uvec4(1u), _34);
     _6._m0 = mix(uvec4(0u), uvec4(1u), _35);
     _6._m0 = mix(uvec4(0u), uvec4(1u), _36);
diff --git a/reference/opt/shaders/asm/comp/bitcast_sar.asm.comp b/reference/opt/shaders/asm/comp/bitcast_sar.asm.comp
index 42a4ed0233..283b444cce 100644
--- a/reference/opt/shaders/asm/comp/bitcast_sar.asm.comp
+++ b/reference/opt/shaders/asm/comp/bitcast_sar.asm.comp
@@ -15,13 +15,15 @@ layout(binding = 1, std430) buffer _4_6
 
 void main()
 {
-    _6._m0 = uvec4(ivec4(_5._m1) >> _5._m0);
-    _6._m0 = uvec4(_5._m0 >> ivec4(_5._m1));
-    _6._m0 = uvec4(ivec4(_5._m1) >> ivec4(_5._m1));
-    _6._m0 = uvec4(_5._m0 >> _5._m0);
-    _6._m1 = ivec4(_5._m1) >> ivec4(_5._m1);
-    _6._m1 = _5._m0 >> _5._m0;
-    _6._m1 = ivec4(_5._m1) >> _5._m0;
-    _6._m1 = _5._m0 >> ivec4(_5._m1);
+    ivec4 _22 = _5._m0;
+    uvec4 _23 = _5._m1;
+    _6._m0 = uvec4(ivec4(_23) >> _22);
+    _6._m0 = uvec4(_22 >> ivec4(_23));
+    _6._m0 = uvec4(ivec4(_23) >> ivec4(_23));
+    _6._m0 = uvec4(_22 >> _22);
+    _6._m1 = ivec4(_23) >> ivec4(_23);
+    _6._m1 = _22 >> _22;
+    _6._m1 = ivec4(_23) >> _22;
+    _6._m1 = _22 >> ivec4(_23);
 }
 
diff --git a/reference/opt/shaders/asm/comp/bitcast_sdiv.asm.comp b/reference/opt/shaders/asm/comp/bitcast_sdiv.asm.comp
index eeb97e14a2..e28c481d21 100644
--- a/reference/opt/shaders/asm/comp/bitcast_sdiv.asm.comp
+++ b/reference/opt/shaders/asm/comp/bitcast_sdiv.asm.comp
@@ -15,13 +15,15 @@ layout(binding = 1, std430) buffer _4_6
 
 void main()
 {
-    _6._m0 = uvec4(ivec4(_5._m1) / _5._m0);
-    _6._m0 = uvec4(_5._m0 / ivec4(_5._m1));
-    _6._m0 = uvec4(ivec4(_5._m1) / ivec4(_5._m1));
-    _6._m0 = uvec4(_5._m0 / _5._m0);
-    _6._m1 = ivec4(_5._m1) / ivec4(_5._m1);
-    _6._m1 = _5._m0 / _5._m0;
-    _6._m1 = ivec4(_5._m1) / _5._m0;
-    _6._m1 = _5._m0 / ivec4(_5._m1);
+    ivec4 _22 = _5._m0;
+    uvec4 _23 = _5._m1;
+    _6._m0 = uvec4(ivec4(_23) / _22);
+    _6._m0 = uvec4(_22 / ivec4(_23));
+    _6._m0 = uvec4(ivec4(_23) / ivec4(_23));
+    _6._m0 = uvec4(_22 / _22);
+    _6._m1 = ivec4(_23) / ivec4(_23);
+    _6._m1 = _22 / _22;
+    _6._m1 = ivec4(_23) / _22;
+    _6._m1 = _22 / ivec4(_23);
 }
 
diff --git a/reference/opt/shaders/asm/comp/bitcast_slr.asm.comp b/reference/opt/shaders/asm/comp/bitcast_slr.asm.comp
index 25245e63eb..78efaf3852 100644
--- a/reference/opt/shaders/asm/comp/bitcast_slr.asm.comp
+++ b/reference/opt/shaders/asm/comp/bitcast_slr.asm.comp
@@ -15,13 +15,15 @@ layout(binding = 1, std430) buffer _4_6
 
 void main()
 {
-    _6._m0 = _5._m1 >> uvec4(_5._m0);
-    _6._m0 = uvec4(_5._m0) >> _5._m1;
-    _6._m0 = _5._m1 >> _5._m1;
-    _6._m0 = uvec4(_5._m0) >> uvec4(_5._m0);
-    _6._m1 = ivec4(_5._m1 >> _5._m1);
-    _6._m1 = ivec4(uvec4(_5._m0) >> uvec4(_5._m0));
-    _6._m1 = ivec4(_5._m1 >> uvec4(_5._m0));
-    _6._m1 = ivec4(uvec4(_5._m0) >> _5._m1);
+    ivec4 _22 = _5._m0;
+    uvec4 _23 = _5._m1;
+    _6._m0 = _23 >> uvec4(_22);
+    _6._m0 = uvec4(_22) >> _23;
+    _6._m0 = _23 >> _23;
+    _6._m0 = uvec4(_22) >> uvec4(_22);
+    _6._m1 = ivec4(_23 >> _23);
+    _6._m1 = ivec4(uvec4(_22) >> uvec4(_22));
+    _6._m1 = ivec4(_23 >> uvec4(_22));
+    _6._m1 = ivec4(uvec4(_22) >> _23);
 }
 
diff --git a/reference/opt/shaders/asm/comp/hlsl-functionality.asm.comp b/reference/opt/shaders/asm/comp/hlsl-functionality.asm.comp
new file mode 100644
index 0000000000..29bc02c693
--- /dev/null
+++ b/reference/opt/shaders/asm/comp/hlsl-functionality.asm.comp
@@ -0,0 +1,19 @@
+#version 450
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer Buf
+{
+    vec4 _data[];
+} Buf_1;
+
+layout(std430) buffer Buf_count
+{
+    int _count;
+} Buf_count_1;
+
+void main()
+{
+    int _32 = atomicAdd(Buf_count_1._count, 1);
+    Buf_1._data[_32] = vec4(1.0);
+}
+
diff --git a/reference/opt/shaders/asm/comp/name-alias.asm.invalid.comp b/reference/opt/shaders/asm/comp/name-alias.asm.invalid.comp
deleted file mode 100644
index 870b1df98d..0000000000
--- a/reference/opt/shaders/asm/comp/name-alias.asm.invalid.comp
+++ /dev/null
@@ -1,37 +0,0 @@
-#version 310 es
-layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
-
-struct alias
-{
-    vec3 alias[100];
-};
-
-struct alias_1
-{
-    vec4 alias;
-    vec2 alias_1[10];
-    alias alias_2[2];
-};
-
-struct alias_2
-{
-    vec4 alias;
-    alias_1 alias_1;
-};
-
-layout(binding = 0, std430) buffer alias_3
-{
-    alias_2 alias;
-} alias_4;
-
-layout(binding = 1, std140) buffer alias_5
-{
-    alias_2 alias;
-} alias_6;
-
-void main()
-{
-    alias_2 alias_7 = alias_4.alias;
-    alias_6.alias = alias_7;
-}
-
diff --git a/reference/shaders/asm/comp/storage-buffer-basic.asm.comp b/reference/opt/shaders/asm/comp/storage-buffer-basic.invalid.asm.comp
similarity index 100%
rename from reference/shaders/asm/comp/storage-buffer-basic.asm.comp
rename to reference/opt/shaders/asm/comp/storage-buffer-basic.invalid.asm.comp
diff --git a/reference/opt/shaders/asm/frag/combined-sampler-reuse.vk.asm.frag b/reference/opt/shaders/asm/frag/combined-sampler-reuse.vk.asm.frag
new file mode 100644
index 0000000000..b5e59f88bb
--- /dev/null
+++ b/reference/opt/shaders/asm/frag/combined-sampler-reuse.vk.asm.frag
@@ -0,0 +1,13 @@
+#version 450
+
+uniform sampler2D SPIRV_Cross_CombineduTexuSampler;
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) in vec2 vUV;
+
+void main()
+{
+    FragColor = texture(SPIRV_Cross_CombineduTexuSampler, vUV);
+    FragColor += textureOffset(SPIRV_Cross_CombineduTexuSampler, vUV, ivec2(1));
+}
+
diff --git a/reference/opt/shaders/asm/frag/combined-sampler-reuse.vk.asm.frag.vk b/reference/opt/shaders/asm/frag/combined-sampler-reuse.vk.asm.frag.vk
new file mode 100644
index 0000000000..bce9808950
--- /dev/null
+++ b/reference/opt/shaders/asm/frag/combined-sampler-reuse.vk.asm.frag.vk
@@ -0,0 +1,14 @@
+#version 450
+
+layout(set = 0, binding = 1) uniform texture2D uTex;
+layout(set = 0, binding = 0) uniform sampler uSampler;
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) in vec2 vUV;
+
+void main()
+{
+    FragColor = texture(sampler2D(uTex, uSampler), vUV);
+    FragColor += textureOffset(sampler2D(uTex, uSampler), vUV, ivec2(1));
+}
+
diff --git a/reference/opt/shaders/asm/frag/complex-name-workarounds.asm.frag b/reference/opt/shaders/asm/frag/complex-name-workarounds.asm.frag
new file mode 100644
index 0000000000..c07f1657f4
--- /dev/null
+++ b/reference/opt/shaders/asm/frag/complex-name-workarounds.asm.frag
@@ -0,0 +1,16 @@
+#version 450
+
+layout(location = 0) in vec4 _;
+layout(location = 1) in vec4 a;
+layout(location = 0) out vec4 b;
+
+void main()
+{
+    vec4 _28 = (_ + a) + _;
+    vec4 _34 = (_ - a) + a;
+    b = _28;
+    b = _34;
+    b = _28;
+    b = _34;
+}
+
diff --git a/reference/opt/shaders/asm/frag/composite-construct-struct-no-swizzle.asm.frag b/reference/opt/shaders/asm/frag/composite-construct-struct-no-swizzle.asm.frag
index 23af17026c..45e83dbc18 100644
--- a/reference/opt/shaders/asm/frag/composite-construct-struct-no-swizzle.asm.frag
+++ b/reference/opt/shaders/asm/frag/composite-construct-struct-no-swizzle.asm.frag
@@ -2,12 +2,6 @@
 precision mediump float;
 precision highp int;
 
-struct SwizzleTest
-{
-    float a;
-    float b;
-};
-
 layout(location = 0) in vec2 foo;
 layout(location = 0) out float FooOut;
 
diff --git a/reference/opt/shaders/asm/frag/default-member-names.asm.frag b/reference/opt/shaders/asm/frag/default-member-names.asm.frag
index 2cf68fd201..7203f2be6a 100644
--- a/reference/opt/shaders/asm/frag/default-member-names.asm.frag
+++ b/reference/opt/shaders/asm/frag/default-member-names.asm.frag
@@ -1,33 +1,11 @@
 #version 450
 
-struct _9
-{
-    float _m0;
-};
-
-struct _10
-{
-    float _m0;
-    float _m1;
-    float _m2;
-    float _m3;
-    float _m4;
-    float _m5;
-    float _m6;
-    float _m7;
-    float _m8;
-    float _m9;
-    float _m10;
-    float _m11;
-    _9 _m12;
-};
-
 layout(location = 0) out vec4 _3;
 
-_10 _51;
+float _57;
 
 void main()
 {
-    _3 = vec4(_51._m0, _51._m1, _51._m2, _51._m3);
+    _3 = vec4(_57);
 }
 
diff --git a/reference/opt/shaders/asm/vert/empty-struct-composite.asm.vert b/reference/opt/shaders/asm/frag/empty-struct.asm.frag
similarity index 100%
rename from reference/opt/shaders/asm/vert/empty-struct-composite.asm.vert
rename to reference/opt/shaders/asm/frag/empty-struct.asm.frag
diff --git a/reference/opt/shaders/asm/frag/function-overload-alias.asm.frag b/reference/opt/shaders/asm/frag/function-overload-alias.asm.frag
index f726fabb18..16b4994148 100644
--- a/reference/opt/shaders/asm/frag/function-overload-alias.asm.frag
+++ b/reference/opt/shaders/asm/frag/function-overload-alias.asm.frag
@@ -6,6 +6,6 @@ layout(location = 0) out vec4 FragColor;
 
 void main()
 {
-    FragColor = (((vec4(1.0) + vec4(1.0)) + (vec3(1.0).xyzz + vec4(1.0))) + (vec4(1.0) + vec4(2.0))) + (vec2(1.0).xyxy + vec4(2.0));
+    FragColor = vec4(10.0);
 }
 
diff --git a/reference/opt/shaders/asm/frag/image-extract-reuse.asm.frag b/reference/opt/shaders/asm/frag/image-extract-reuse.asm.frag
new file mode 100644
index 0000000000..ab2749b4df
--- /dev/null
+++ b/reference/opt/shaders/asm/frag/image-extract-reuse.asm.frag
@@ -0,0 +1,11 @@
+#version 450
+
+layout(binding = 0) uniform sampler2D uTexture;
+
+layout(location = 0) out ivec2 Size;
+
+void main()
+{
+    Size = textureSize(uTexture, 0) + textureSize(uTexture, 1);
+}
+
diff --git a/reference/opt/shaders/asm/frag/image-fetch-no-sampler.asm.vk.frag b/reference/opt/shaders/asm/frag/image-fetch-no-sampler.asm.vk.frag
index ebf8c1201c..452fd6fb95 100644
--- a/reference/opt/shaders/asm/frag/image-fetch-no-sampler.asm.vk.frag
+++ b/reference/opt/shaders/asm/frag/image-fetch-no-sampler.asm.vk.frag
@@ -7,7 +7,7 @@ layout(location = 0) out vec4 _entryPointOutput;
 
 void main()
 {
-    ivec3 _122 = ivec3(int(gl_FragCoord.x * 1280.0), int(gl_FragCoord.y * 720.0), 0);
-    _entryPointOutput = ((texelFetch(SPIRV_Cross_CombinedSampledImageSPIRV_Cross_DummySampler, _122.xy, 0) + texelFetch(SPIRV_Cross_CombinedSampledImageSPIRV_Cross_DummySampler, _122.xy, 0)) + texture(SPIRV_Cross_CombinedSampledImageSampler, gl_FragCoord.xy)) + texture(SPIRV_Cross_CombinedSampledImageSampler, gl_FragCoord.xy);
+    ivec2 _152 = ivec3(int(gl_FragCoord.x * 1280.0), int(gl_FragCoord.y * 720.0), 0).xy;
+    _entryPointOutput = ((texelFetch(SPIRV_Cross_CombinedSampledImageSPIRV_Cross_DummySampler, _152, 0) + texelFetch(SPIRV_Cross_CombinedSampledImageSPIRV_Cross_DummySampler, _152, 0)) + texture(SPIRV_Cross_CombinedSampledImageSampler, gl_FragCoord.xy)) + texture(SPIRV_Cross_CombinedSampledImageSampler, gl_FragCoord.xy);
 }
 
diff --git a/reference/opt/shaders/asm/frag/image-fetch-no-sampler.asm.vk.frag.vk b/reference/opt/shaders/asm/frag/image-fetch-no-sampler.asm.vk.frag.vk
index 433f5bc273..23acab0b16 100644
--- a/reference/opt/shaders/asm/frag/image-fetch-no-sampler.asm.vk.frag.vk
+++ b/reference/opt/shaders/asm/frag/image-fetch-no-sampler.asm.vk.frag.vk
@@ -2,13 +2,13 @@
 
 layout(set = 0, binding = 0) uniform sampler Sampler;
 layout(set = 0, binding = 0) uniform texture2D SampledImage;
-uniform sampler SPIRV_Cross_DummySampler;
+layout(set = 0, binding = 0) uniform sampler SPIRV_Cross_DummySampler;
 
 layout(location = 0) out vec4 _entryPointOutput;
 
 void main()
 {
-    ivec3 _122 = ivec3(int(gl_FragCoord.x * 1280.0), int(gl_FragCoord.y * 720.0), 0);
-    _entryPointOutput = ((texelFetch(sampler2D(SampledImage, SPIRV_Cross_DummySampler), _122.xy, 0) + texelFetch(sampler2D(SampledImage, SPIRV_Cross_DummySampler), _122.xy, 0)) + texture(sampler2D(SampledImage, Sampler), gl_FragCoord.xy)) + texture(sampler2D(SampledImage, Sampler), gl_FragCoord.xy);
+    ivec2 _152 = ivec3(int(gl_FragCoord.x * 1280.0), int(gl_FragCoord.y * 720.0), 0).xy;
+    _entryPointOutput = ((texelFetch(sampler2D(SampledImage, SPIRV_Cross_DummySampler), _152, 0) + texelFetch(sampler2D(SampledImage, SPIRV_Cross_DummySampler), _152, 0)) + texture(sampler2D(SampledImage, Sampler), gl_FragCoord.xy)) + texture(sampler2D(SampledImage, Sampler), gl_FragCoord.xy);
 }
 
diff --git a/reference/opt/shaders/asm/frag/image-query-no-sampler.vk.asm.frag b/reference/opt/shaders/asm/frag/image-query-no-sampler.vk.asm.frag
new file mode 100644
index 0000000000..4a20c87c74
--- /dev/null
+++ b/reference/opt/shaders/asm/frag/image-query-no-sampler.vk.asm.frag
@@ -0,0 +1,9 @@
+#version 450
+
+uniform sampler2D SPIRV_Cross_CombineduSampler2DSPIRV_Cross_DummySampler;
+uniform sampler2DMS SPIRV_Cross_CombineduSampler2DMSSPIRV_Cross_DummySampler;
+
+void main()
+{
+}
+
diff --git a/reference/opt/shaders/asm/frag/image-query-no-sampler.vk.asm.frag.vk b/reference/opt/shaders/asm/frag/image-query-no-sampler.vk.asm.frag.vk
new file mode 100644
index 0000000000..d1526a6a92
--- /dev/null
+++ b/reference/opt/shaders/asm/frag/image-query-no-sampler.vk.asm.frag.vk
@@ -0,0 +1,10 @@
+#version 450
+
+layout(set = 0, binding = 0) uniform texture2D uSampler2D;
+layout(set = 0, binding = 0) uniform texture2DMS uSampler2DMS;
+layout(set = 0, binding = 0) uniform sampler SPIRV_Cross_DummySampler;
+
+void main()
+{
+}
+
diff --git a/reference/opt/shaders/asm/frag/implicit-read-dep-phi.asm.frag b/reference/opt/shaders/asm/frag/implicit-read-dep-phi.asm.frag
new file mode 100644
index 0000000000..8a7f64d7f7
--- /dev/null
+++ b/reference/opt/shaders/asm/frag/implicit-read-dep-phi.asm.frag
@@ -0,0 +1,40 @@
+#version 450
+
+layout(binding = 0) uniform sampler2D uImage;
+
+layout(location = 0) in vec4 v0;
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    float phi;
+    vec4 _36;
+    int _51;
+    _51 = 0;
+    phi = 1.0;
+    _36 = vec4(1.0, 2.0, 1.0, 2.0);
+    for (;;)
+    {
+        FragColor = _36;
+        if (_51 < 4)
+        {
+            if (v0[_51] > 0.0)
+            {
+                vec2 _48 = vec2(phi);
+                _51++;
+                phi += 2.0;
+                _36 = textureLod(uImage, _48, 0.0);
+                continue;
+            }
+            else
+            {
+                break;
+            }
+        }
+        else
+        {
+            break;
+        }
+    }
+}
+
diff --git a/reference/opt/shaders/asm/frag/loop-body-dominator-continue-access.asm.frag b/reference/opt/shaders/asm/frag/loop-body-dominator-continue-access.asm.frag
index 4fb4b75740..9f7a1f8744 100644
--- a/reference/opt/shaders/asm/frag/loop-body-dominator-continue-access.asm.frag
+++ b/reference/opt/shaders/asm/frag/loop-body-dominator-continue-access.asm.frag
@@ -10,43 +10,73 @@ layout(binding = 0, std140) uniform Foo
 layout(location = 0) in vec3 fragWorld;
 layout(location = 0) out int _entryPointOutput;
 
-mat4 _152;
-uint _155;
-
-int GetCascade(vec3 fragWorldPosition)
-{
-    mat4 _153;
-    _153 = _152;
-    uint _156;
-    mat4 _157;
-    for (uint _151 = 0u; _151 < _11.shadowCascadesNum; _151 = _156 + uint(1), _153 = _157)
-    {
-        mat4 _154;
-        _154 = _153;
-        for (;;)
-        {
-            if (_11.test == 0)
-            {
-                _156 = _151;
-                _157 = mat4(vec4(0.5, 0.0, 0.0, 0.0), vec4(0.0, 0.5, 0.0, 0.0), vec4(0.0, 0.0, 0.5, 0.0), vec4(0.0, 0.0, 0.0, 1.0));
-                break;
-            }
-            _156 = _151;
-            _157 = mat4(vec4(1.0, 0.0, 0.0, 0.0), vec4(0.0, 1.0, 0.0, 0.0), vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0));
-            break;
-        }
-        vec4 _92 = (_157 * _11.lightVP[_156]) * vec4(fragWorldPosition, 1.0);
-        if ((((_92.z >= 0.0) && (_92.z <= 1.0)) && (max(_92.x, _92.y) <= 1.0)) && (min(_92.x, _92.y) >= 0.0))
-        {
-            return int(_156);
-        }
-    }
-    return -1;
-}
+mat4 _235;
+int _245;
 
 void main()
 {
-    vec3 _123 = fragWorld;
-    _entryPointOutput = GetCascade(_123);
+    uint _229;
+    bool _231;
+    mat4 _234;
+    _234 = _235;
+    _231 = false;
+    _229 = 0u;
+    bool _251;
+    mat4 _232;
+    int _243;
+    bool _158;
+    for (;;)
+    {
+        _158 = _229 < _11.shadowCascadesNum;
+        if (_158)
+        {
+            bool _209 = _11.test == 0;
+            mat4 _233;
+            if (_209)
+            {
+                _233 = mat4(vec4(0.5, 0.0, 0.0, 0.0), vec4(0.0, 0.5, 0.0, 0.0), vec4(0.0, 0.0, 0.5, 0.0), vec4(0.0, 0.0, 0.0, 1.0));
+            }
+            else
+            {
+                _233 = _234;
+            }
+            bool _250 = _209 ? true : _231;
+            if (!_250)
+            {
+                _232 = mat4(vec4(1.0, 0.0, 0.0, 0.0), vec4(0.0, 1.0, 0.0, 0.0), vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0));
+            }
+            else
+            {
+                _232 = _233;
+            }
+            _251 = _250 ? _250 : true;
+            vec4 _171 = (_232 * _11.lightVP[_229]) * vec4(fragWorld, 1.0);
+            float _218 = _171.z;
+            float _222 = _171.x;
+            float _224 = _171.y;
+            if ((((_218 >= 0.0) && (_218 <= 1.0)) && (max(_222, _224) <= 1.0)) && (min(_222, _224) >= 0.0))
+            {
+                _243 = int(_229);
+                break;
+            }
+            else
+            {
+                _234 = _232;
+                _231 = _251;
+                _229++;
+                continue;
+            }
+            _234 = _232;
+            _231 = _251;
+            _229++;
+            continue;
+        }
+        else
+        {
+            _243 = _245;
+            break;
+        }
+    }
+    _entryPointOutput = (_158 ? true : false) ? _243 : (-1);
 }
 
diff --git a/reference/opt/shaders/asm/frag/loop-header-to-continue.asm.frag b/reference/opt/shaders/asm/frag/loop-header-to-continue.asm.frag
index f3a6b4eceb..c2dba928df 100644
--- a/reference/opt/shaders/asm/frag/loop-header-to-continue.asm.frag
+++ b/reference/opt/shaders/asm/frag/loop-header-to-continue.asm.frag
@@ -28,11 +28,15 @@ void main()
     float _58;
     _55 = 0.0;
     _58 = 0.0;
-    float _64;
-    vec4 _72;
-    float _78;
-    for (int _60 = -3; _60 <= 3; _64 = float(_60), _72 = texture(SPIRV_Cross_CombinedmapTexturemapSampler, IN_uv + (vec2(0.0, _8.CB1.TextureSize.w) * _64)), _78 = exp(((-_64) * _64) * 0.2222220003604888916015625) * float(abs(_72.y - _50) < clamp((_50 * 80.0) * 0.0007999999797903001308441162109375, 7.999999797903001308441162109375e-05, 0.008000000379979610443115234375)), _55 += (_72.x * _78), _58 += _78, _60++)
+    for (int _60 = -3; _60 <= 3; )
     {
+        float _64 = float(_60);
+        vec4 _72 = texture(SPIRV_Cross_CombinedmapTexturemapSampler, IN_uv + (vec2(0.0, _8.CB1.TextureSize.w) * _64));
+        float _78 = exp(((-_64) * _64) * 0.2222220003604888916015625) * float(abs(_72.y - _50) < clamp(_50 * 0.06399999558925628662109375, 7.999999797903001308441162109375e-05, 0.008000000379979610443115234375));
+        _55 += (_72.x * _78);
+        _58 += _78;
+        _60++;
+        continue;
     }
     _entryPointOutput = vec4(_55 / _58, _50, 0.0, 1.0);
 }
diff --git a/reference/opt/shaders/asm/frag/loop-merge-to-continue.asm.frag b/reference/opt/shaders/asm/frag/loop-merge-to-continue.asm.frag
new file mode 100644
index 0000000000..faf32edcf4
--- /dev/null
+++ b/reference/opt/shaders/asm/frag/loop-merge-to-continue.asm.frag
@@ -0,0 +1,21 @@
+#version 450
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) in vec4 v0;
+
+void main()
+{
+    FragColor = vec4(1.0);
+    int _50;
+    _50 = 0;
+    for (; _50 < 4; _50++)
+    {
+        for (int _51 = 0; _51 < 4; )
+        {
+            FragColor += vec4(v0[(_50 + _51) & 3]);
+            _51++;
+            continue;
+        }
+    }
+}
+
diff --git a/reference/opt/shaders/asm/frag/multi-for-loop-init.asm.frag b/reference/opt/shaders/asm/frag/multi-for-loop-init.asm.frag
index c41c77c701..6a177288af 100644
--- a/reference/opt/shaders/asm/frag/multi-for-loop-init.asm.frag
+++ b/reference/opt/shaders/asm/frag/multi-for-loop-init.asm.frag
@@ -10,10 +10,13 @@ void main()
     FragColor = vec4(0.0);
     int _53 = 0;
     uint _54 = 1u;
-    for (; (_53 < 10) && (int(_54) < int(20u)); _53 += counter, _54 += uint(counter))
+    for (; (_53 < 10) && (int(_54) < int(20u)); )
     {
         FragColor += vec4(float(_53));
         FragColor += vec4(float(_54));
+        _54 += uint(counter);
+        _53 += counter;
+        continue;
     }
 }
 
diff --git a/reference/opt/shaders/asm/frag/op-constant-null.asm.frag b/reference/opt/shaders/asm/frag/op-constant-null.asm.frag
index cb882cd7b1..873a64cb41 100644
--- a/reference/opt/shaders/asm/frag/op-constant-null.asm.frag
+++ b/reference/opt/shaders/asm/frag/op-constant-null.asm.frag
@@ -2,12 +2,6 @@
 precision mediump float;
 precision highp int;
 
-struct D
-{
-    vec4 a;
-    float b;
-};
-
 layout(location = 0) out float FragColor;
 
 void main()
diff --git a/reference/opt/shaders/asm/frag/phi-loop-variable.asm.frag b/reference/opt/shaders/asm/frag/phi-loop-variable.asm.frag
index 786ac74de5..05ce10adfa 100644
--- a/reference/opt/shaders/asm/frag/phi-loop-variable.asm.frag
+++ b/reference/opt/shaders/asm/frag/phi-loop-variable.asm.frag
@@ -2,8 +2,5 @@
 
 void main()
 {
-    for (int _22 = 35; _22 >= 0; _22--)
-    {
-    }
 }
 
diff --git a/reference/opt/shaders/asm/frag/sampler-buffer-array-without-sampler.asm.frag b/reference/opt/shaders/asm/frag/sampler-buffer-array-without-sampler.asm.frag
new file mode 100644
index 0000000000..bdda0d6293
--- /dev/null
+++ b/reference/opt/shaders/asm/frag/sampler-buffer-array-without-sampler.asm.frag
@@ -0,0 +1,18 @@
+#version 450
+
+struct Registers
+{
+    int index;
+};
+
+uniform Registers registers;
+
+uniform sampler2D SPIRV_Cross_CombineduSamplerSPIRV_Cross_DummySampler[4];
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    FragColor = (texelFetch(SPIRV_Cross_CombineduSamplerSPIRV_Cross_DummySampler[registers.index], ivec2(10), 0) + texelFetch(SPIRV_Cross_CombineduSamplerSPIRV_Cross_DummySampler[registers.index], ivec2(4), 0)) + texelFetch(SPIRV_Cross_CombineduSamplerSPIRV_Cross_DummySampler[registers.index], ivec2(4), 0);
+}
+
diff --git a/reference/opt/shaders/asm/frag/sampler-buffer-without-sampler.asm.frag b/reference/opt/shaders/asm/frag/sampler-buffer-without-sampler.asm.frag
index 560a4e79b9..89058f1434 100644
--- a/reference/opt/shaders/asm/frag/sampler-buffer-without-sampler.asm.frag
+++ b/reference/opt/shaders/asm/frag/sampler-buffer-without-sampler.asm.frag
@@ -1,7 +1,7 @@
 #version 450
 
-layout(rgba32f) uniform writeonly imageBuffer RWTex;
-uniform samplerBuffer Tex;
+layout(binding = 0, rgba32f) uniform writeonly imageBuffer RWTex;
+layout(binding = 1) uniform samplerBuffer Tex;
 
 layout(location = 0) out vec4 _entryPointOutput;
 
diff --git a/reference/opt/shaders/asm/frag/selection-merge-to-continue.asm.frag b/reference/opt/shaders/asm/frag/selection-merge-to-continue.asm.frag
new file mode 100644
index 0000000000..05c17c7a66
--- /dev/null
+++ b/reference/opt/shaders/asm/frag/selection-merge-to-continue.asm.frag
@@ -0,0 +1,24 @@
+#version 450
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) in vec4 v0;
+
+void main()
+{
+    FragColor = vec4(1.0);
+    for (int _54 = 0; _54 < 4; _54++)
+    {
+        if (v0.x == 20.0)
+        {
+            FragColor += vec4(v0[_54 & 3]);
+            continue;
+        }
+        else
+        {
+            FragColor += vec4(v0[_54 & 1]);
+            continue;
+        }
+        continue;
+    }
+}
+
diff --git a/reference/opt/shaders/asm/frag/srem.asm.frag b/reference/opt/shaders/asm/frag/srem.asm.frag
new file mode 100644
index 0000000000..05a3d7554f
--- /dev/null
+++ b/reference/opt/shaders/asm/frag/srem.asm.frag
@@ -0,0 +1,13 @@
+#version 310 es
+precision mediump float;
+precision highp int;
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) flat in ivec4 vA;
+layout(location = 1) flat in ivec4 vB;
+
+void main()
+{
+    FragColor = vec4(vA - vB * (vA / vB));
+}
+
diff --git a/reference/opt/shaders/asm/frag/switch-merge-to-continue.asm.frag b/reference/opt/shaders/asm/frag/switch-merge-to-continue.asm.frag
new file mode 100644
index 0000000000..ea4a25995a
--- /dev/null
+++ b/reference/opt/shaders/asm/frag/switch-merge-to-continue.asm.frag
@@ -0,0 +1,31 @@
+#version 450
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    FragColor = vec4(1.0);
+    for (int _52 = 0; _52 < 4; _52++)
+    {
+        switch (_52)
+        {
+            case 0:
+            {
+                FragColor.x += 1.0;
+                break;
+            }
+            case 1:
+            {
+                FragColor.y += 3.0;
+                break;
+            }
+            default:
+            {
+                FragColor.z += 3.0;
+                break;
+            }
+        }
+        continue;
+    }
+}
+
diff --git a/reference/shaders/asm/vert/empty-struct-composite.asm.vert b/reference/opt/shaders/asm/frag/temporary-name-alias.asm.frag
similarity index 100%
rename from reference/shaders/asm/vert/empty-struct-composite.asm.vert
rename to reference/opt/shaders/asm/frag/temporary-name-alias.asm.frag
diff --git a/reference/opt/shaders/asm/frag/temporary-phi-hoisting.asm.frag b/reference/opt/shaders/asm/frag/temporary-phi-hoisting.asm.frag
index 3917594d98..1ecd61d744 100644
--- a/reference/opt/shaders/asm/frag/temporary-phi-hoisting.asm.frag
+++ b/reference/opt/shaders/asm/frag/temporary-phi-hoisting.asm.frag
@@ -16,10 +16,11 @@ void main()
 {
     vec3 _28;
     _28 = vec3(0.0);
-    vec3 _29;
-    for (int _31 = 0; _31 < 4; _28 = _29, _31++)
+    for (int _31 = 0; _31 < 4; )
     {
-        _29 = _28 + _6.g_MyStruct[_31].color.xyz;
+        _28 += _6.g_MyStruct[_31].color.xyz;
+        _31++;
+        continue;
     }
     _entryPointOutput = vec4(_28, 1.0);
 }
diff --git a/reference/opt/shaders/asm/frag/texel-fetch-no-lod.asm.frag b/reference/opt/shaders/asm/frag/texel-fetch-no-lod.asm.frag
new file mode 100644
index 0000000000..6193de0da9
--- /dev/null
+++ b/reference/opt/shaders/asm/frag/texel-fetch-no-lod.asm.frag
@@ -0,0 +1,11 @@
+#version 450
+
+layout(binding = 0) uniform sampler2D uTexture;
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    FragColor = texelFetch(uTexture, ivec2(gl_FragCoord.xy), 0);
+}
+
diff --git a/reference/opt/shaders/asm/frag/undef-variable-store.asm.frag b/reference/opt/shaders/asm/frag/undef-variable-store.asm.frag
index 23576ed850..84eb23a242 100644
--- a/reference/opt/shaders/asm/frag/undef-variable-store.asm.frag
+++ b/reference/opt/shaders/asm/frag/undef-variable-store.asm.frag
@@ -2,29 +2,8 @@
 
 layout(location = 0) out vec4 _entryPointOutput;
 
-vec4 _38;
-vec4 _50;
-
 void main()
 {
-    vec4 _51;
-    _51 = _50;
-    vec4 _52;
-    for (;;)
-    {
-        if (0.0 != 0.0)
-        {
-            _52 = vec4(1.0, 0.0, 0.0, 1.0);
-            break;
-        }
-        else
-        {
-            _52 = vec4(1.0, 1.0, 0.0, 1.0);
-            break;
-        }
-        _52 = _38;
-        break;
-    }
-    _entryPointOutput = _52;
+    _entryPointOutput = vec4(1.0, 1.0, 0.0, 1.0);
 }
 
diff --git a/reference/opt/shaders/asm/frag/unreachable.asm.frag b/reference/opt/shaders/asm/frag/unreachable.asm.frag
index d8126d752e..eb7e8a912c 100644
--- a/reference/opt/shaders/asm/frag/unreachable.asm.frag
+++ b/reference/opt/shaders/asm/frag/unreachable.asm.frag
@@ -3,24 +3,21 @@
 layout(location = 0) flat in int counter;
 layout(location = 0) out vec4 FragColor;
 
-vec4 _21;
-
 void main()
 {
-    vec4 _33;
-    do
+    bool _29;
+    for (;;)
     {
-        if (counter == 10)
+        _29 = counter == 10;
+        if (_29)
         {
-            _33 = vec4(10.0);
             break;
         }
         else
         {
-            _33 = vec4(30.0);
             break;
         }
-    } while (false);
-    FragColor = _33;
+    }
+    FragColor = mix(vec4(30.0), vec4(10.0), bvec4(_29));
 }
 
diff --git a/reference/opt/shaders/asm/frag/vector-shuffle-oom.asm.frag b/reference/opt/shaders/asm/frag/vector-shuffle-oom.asm.frag
index 1c211caa6d..e5408d533b 100644
--- a/reference/opt/shaders/asm/frag/vector-shuffle-oom.asm.frag
+++ b/reference/opt/shaders/asm/frag/vector-shuffle-oom.asm.frag
@@ -94,209 +94,209 @@ _28 _74;
 
 void main()
 {
-    _28 _77 = _74;
-    _77._m0 = vec4(0.0);
     vec2 _82 = gl_FragCoord.xy * _19._m23.xy;
     vec4 _88 = _7._m2 * _7._m0.xyxy;
-    vec2 _97 = clamp(_82 + (vec3(0.0, -2.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
+    vec2 _95 = _88.xy;
+    vec2 _96 = _88.zw;
+    vec2 _97 = clamp(_82 + (vec2(0.0, -2.0) * _7._m0.xy), _95, _96);
     vec3 _109 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _97, 0.0).w * _7._m1, 0.0, 1.0);
     vec4 _113 = textureLod(SPIRV_Cross_Combined_1, _97, 0.0);
+    float _114 = _113.y;
     vec3 _129;
-    if (_113.y > 0.0)
+    if (_114 > 0.0)
     {
-        _129 = _109 + (textureLod(SPIRV_Cross_Combined_2, _97, 0.0).xyz * clamp(_113.y * _113.z, 0.0, 1.0));
+        _129 = _109 + (textureLod(SPIRV_Cross_Combined_2, _97, 0.0).xyz * clamp(_114 * _113.z, 0.0, 1.0));
     }
     else
     {
         _129 = _109;
     }
-    vec3 _133 = vec4(0.0).xyz + (_129 * 0.5);
-    vec4 _134 = vec4(_133.x, _133.y, _133.z, vec4(0.0).w);
-    _28 _135 = _77;
-    _135._m0 = _134;
-    vec2 _144 = clamp(_82 + (vec3(-1.0, -1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
+    vec3 _130 = _129 * 0.5;
+    vec4 _134 = vec4(_130.x, _130.y, _130.z, vec4(0.0).w);
+    vec2 _144 = clamp(_82 + (vec2(-1.0) * _7._m0.xy), _95, _96);
     vec3 _156 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _144, 0.0).w * _7._m1, 0.0, 1.0);
     vec4 _160 = textureLod(SPIRV_Cross_Combined_1, _144, 0.0);
+    float _161 = _160.y;
     vec3 _176;
-    if (_160.y > 0.0)
+    if (_161 > 0.0)
     {
-        _176 = _156 + (textureLod(SPIRV_Cross_Combined_2, _144, 0.0).xyz * clamp(_160.y * _160.z, 0.0, 1.0));
+        _176 = _156 + (textureLod(SPIRV_Cross_Combined_2, _144, 0.0).xyz * clamp(_161 * _160.z, 0.0, 1.0));
     }
     else
     {
         _176 = _156;
     }
-    vec3 _180 = _134.xyz + (_176 * 0.5);
+    vec3 _177 = _176 * 0.5;
+    vec3 _180 = _134.xyz + _177;
     vec4 _181 = vec4(_180.x, _180.y, _180.z, _134.w);
-    _28 _182 = _135;
-    _182._m0 = _181;
-    vec2 _191 = clamp(_82 + (vec3(0.0, -1.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw);
+    vec2 _191 = clamp(_82 + (vec2(0.0, -1.0) * _7._m0.xy), _95, _96);
     vec3 _203 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _191, 0.0).w * _7._m1, 0.0, 1.0);
     vec4 _207 = textureLod(SPIRV_Cross_Combined_1, _191, 0.0);
+    float _208 = _207.y;
     vec3 _223;
-    if (_207.y > 0.0)
+    if (_208 > 0.0)
     {
-        _223 = _203 + (textureLod(SPIRV_Cross_Combined_2, _191, 0.0).xyz * clamp(_207.y * _207.z, 0.0, 1.0));
+        _223 = _203 + (textureLod(SPIRV_Cross_Combined_2, _191, 0.0).xyz * clamp(_208 * _207.z, 0.0, 1.0));
     }
     else
     {
         _223 = _203;
     }
-    vec3 _227 = _181.xyz + (_223 * 0.75);
+    vec3 _224 = _223 * 0.75;
+    vec3 _227 = _181.xyz + _224;
     vec4 _228 = vec4(_227.x, _227.y, _227.z, _181.w);
-    _28 _229 = _182;
-    _229._m0 = _228;
-    vec2 _238 = clamp(_82 + (vec3(1.0, -1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
+    vec2 _238 = clamp(_82 + (vec2(1.0, -1.0) * _7._m0.xy), _95, _96);
     vec3 _250 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _238, 0.0).w * _7._m1, 0.0, 1.0);
     vec4 _254 = textureLod(SPIRV_Cross_Combined_1, _238, 0.0);
+    float _255 = _254.y;
     vec3 _270;
-    if (_254.y > 0.0)
+    if (_255 > 0.0)
     {
-        _270 = _250 + (textureLod(SPIRV_Cross_Combined_2, _238, 0.0).xyz * clamp(_254.y * _254.z, 0.0, 1.0));
+        _270 = _250 + (textureLod(SPIRV_Cross_Combined_2, _238, 0.0).xyz * clamp(_255 * _254.z, 0.0, 1.0));
     }
     else
     {
         _270 = _250;
     }
-    vec3 _274 = _228.xyz + (_270 * 0.5);
+    vec3 _271 = _270 * 0.5;
+    vec3 _274 = _228.xyz + _271;
     vec4 _275 = vec4(_274.x, _274.y, _274.z, _228.w);
-    _28 _276 = _229;
-    _276._m0 = _275;
-    vec2 _285 = clamp(_82 + (vec3(-2.0, 0.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
+    vec2 _285 = clamp(_82 + (vec2(-2.0, 0.0) * _7._m0.xy), _95, _96);
     vec3 _297 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _285, 0.0).w * _7._m1, 0.0, 1.0);
     vec4 _301 = textureLod(SPIRV_Cross_Combined_1, _285, 0.0);
+    float _302 = _301.y;
     vec3 _317;
-    if (_301.y > 0.0)
+    if (_302 > 0.0)
     {
-        _317 = _297 + (textureLod(SPIRV_Cross_Combined_2, _285, 0.0).xyz * clamp(_301.y * _301.z, 0.0, 1.0));
+        _317 = _297 + (textureLod(SPIRV_Cross_Combined_2, _285, 0.0).xyz * clamp(_302 * _301.z, 0.0, 1.0));
     }
     else
     {
         _317 = _297;
     }
-    vec3 _321 = _275.xyz + (_317 * 0.5);
+    vec3 _318 = _317 * 0.5;
+    vec3 _321 = _275.xyz + _318;
     vec4 _322 = vec4(_321.x, _321.y, _321.z, _275.w);
-    _28 _323 = _276;
-    _323._m0 = _322;
-    vec2 _332 = clamp(_82 + (vec3(-1.0, 0.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw);
+    vec2 _332 = clamp(_82 + (vec2(-1.0, 0.0) * _7._m0.xy), _95, _96);
     vec3 _344 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _332, 0.0).w * _7._m1, 0.0, 1.0);
     vec4 _348 = textureLod(SPIRV_Cross_Combined_1, _332, 0.0);
+    float _349 = _348.y;
     vec3 _364;
-    if (_348.y > 0.0)
+    if (_349 > 0.0)
     {
-        _364 = _344 + (textureLod(SPIRV_Cross_Combined_2, _332, 0.0).xyz * clamp(_348.y * _348.z, 0.0, 1.0));
+        _364 = _344 + (textureLod(SPIRV_Cross_Combined_2, _332, 0.0).xyz * clamp(_349 * _348.z, 0.0, 1.0));
     }
     else
     {
         _364 = _344;
     }
-    vec3 _368 = _322.xyz + (_364 * 0.75);
+    vec3 _365 = _364 * 0.75;
+    vec3 _368 = _322.xyz + _365;
     vec4 _369 = vec4(_368.x, _368.y, _368.z, _322.w);
-    _28 _370 = _323;
-    _370._m0 = _369;
-    vec2 _379 = clamp(_82 + (vec3(0.0, 0.0, 1.0).xy * _7._m0.xy), _88.xy, _88.zw);
+    vec2 _379 = clamp(_82, _95, _96);
     vec3 _391 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _379, 0.0).w * _7._m1, 0.0, 1.0);
     vec4 _395 = textureLod(SPIRV_Cross_Combined_1, _379, 0.0);
+    float _396 = _395.y;
     vec3 _411;
-    if (_395.y > 0.0)
+    if (_396 > 0.0)
     {
-        _411 = _391 + (textureLod(SPIRV_Cross_Combined_2, _379, 0.0).xyz * clamp(_395.y * _395.z, 0.0, 1.0));
+        _411 = _391 + (textureLod(SPIRV_Cross_Combined_2, _379, 0.0).xyz * clamp(_396 * _395.z, 0.0, 1.0));
     }
     else
     {
         _411 = _391;
     }
-    vec3 _415 = _369.xyz + (_411 * 1.0);
+    vec3 _412 = _411 * 1.0;
+    vec3 _415 = _369.xyz + _412;
     vec4 _416 = vec4(_415.x, _415.y, _415.z, _369.w);
-    _28 _417 = _370;
-    _417._m0 = _416;
-    vec2 _426 = clamp(_82 + (vec3(1.0, 0.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw);
+    vec2 _426 = clamp(_82 + (vec2(1.0, 0.0) * _7._m0.xy), _95, _96);
     vec3 _438 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _426, 0.0).w * _7._m1, 0.0, 1.0);
     vec4 _442 = textureLod(SPIRV_Cross_Combined_1, _426, 0.0);
+    float _443 = _442.y;
     vec3 _458;
-    if (_442.y > 0.0)
+    if (_443 > 0.0)
     {
-        _458 = _438 + (textureLod(SPIRV_Cross_Combined_2, _426, 0.0).xyz * clamp(_442.y * _442.z, 0.0, 1.0));
+        _458 = _438 + (textureLod(SPIRV_Cross_Combined_2, _426, 0.0).xyz * clamp(_443 * _442.z, 0.0, 1.0));
     }
     else
     {
         _458 = _438;
     }
-    vec3 _462 = _416.xyz + (_458 * 0.75);
+    vec3 _459 = _458 * 0.75;
+    vec3 _462 = _416.xyz + _459;
     vec4 _463 = vec4(_462.x, _462.y, _462.z, _416.w);
-    _28 _464 = _417;
-    _464._m0 = _463;
-    vec2 _473 = clamp(_82 + (vec3(2.0, 0.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
+    vec2 _473 = clamp(_82 + (vec2(2.0, 0.0) * _7._m0.xy), _95, _96);
     vec3 _485 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _473, 0.0).w * _7._m1, 0.0, 1.0);
     vec4 _489 = textureLod(SPIRV_Cross_Combined_1, _473, 0.0);
+    float _490 = _489.y;
     vec3 _505;
-    if (_489.y > 0.0)
+    if (_490 > 0.0)
     {
-        _505 = _485 + (textureLod(SPIRV_Cross_Combined_2, _473, 0.0).xyz * clamp(_489.y * _489.z, 0.0, 1.0));
+        _505 = _485 + (textureLod(SPIRV_Cross_Combined_2, _473, 0.0).xyz * clamp(_490 * _489.z, 0.0, 1.0));
     }
     else
     {
         _505 = _485;
     }
-    vec3 _509 = _463.xyz + (_505 * 0.5);
+    vec3 _506 = _505 * 0.5;
+    vec3 _509 = _463.xyz + _506;
     vec4 _510 = vec4(_509.x, _509.y, _509.z, _463.w);
-    _28 _511 = _464;
-    _511._m0 = _510;
-    vec2 _520 = clamp(_82 + (vec3(-1.0, 1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
+    vec2 _520 = clamp(_82 + (vec2(-1.0, 1.0) * _7._m0.xy), _95, _96);
     vec3 _532 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _520, 0.0).w * _7._m1, 0.0, 1.0);
     vec4 _536 = textureLod(SPIRV_Cross_Combined_1, _520, 0.0);
+    float _537 = _536.y;
     vec3 _552;
-    if (_536.y > 0.0)
+    if (_537 > 0.0)
     {
-        _552 = _532 + (textureLod(SPIRV_Cross_Combined_2, _520, 0.0).xyz * clamp(_536.y * _536.z, 0.0, 1.0));
+        _552 = _532 + (textureLod(SPIRV_Cross_Combined_2, _520, 0.0).xyz * clamp(_537 * _536.z, 0.0, 1.0));
     }
     else
     {
         _552 = _532;
     }
-    vec3 _556 = _510.xyz + (_552 * 0.5);
+    vec3 _553 = _552 * 0.5;
+    vec3 _556 = _510.xyz + _553;
     vec4 _557 = vec4(_556.x, _556.y, _556.z, _510.w);
-    _28 _558 = _511;
-    _558._m0 = _557;
-    vec2 _567 = clamp(_82 + (vec3(0.0, 1.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw);
+    vec2 _567 = clamp(_82 + (vec2(0.0, 1.0) * _7._m0.xy), _95, _96);
     vec3 _579 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _567, 0.0).w * _7._m1, 0.0, 1.0);
     vec4 _583 = textureLod(SPIRV_Cross_Combined_1, _567, 0.0);
+    float _584 = _583.y;
     vec3 _599;
-    if (_583.y > 0.0)
+    if (_584 > 0.0)
     {
-        _599 = _579 + (textureLod(SPIRV_Cross_Combined_2, _567, 0.0).xyz * clamp(_583.y * _583.z, 0.0, 1.0));
+        _599 = _579 + (textureLod(SPIRV_Cross_Combined_2, _567, 0.0).xyz * clamp(_584 * _583.z, 0.0, 1.0));
     }
     else
     {
         _599 = _579;
     }
-    vec3 _603 = _557.xyz + (_599 * 0.75);
+    vec3 _600 = _599 * 0.75;
+    vec3 _603 = _557.xyz + _600;
     vec4 _604 = vec4(_603.x, _603.y, _603.z, _557.w);
-    _28 _605 = _558;
-    _605._m0 = _604;
-    vec2 _614 = clamp(_82 + (vec3(1.0, 1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
+    vec2 _614 = clamp(_82 + _7._m0.xy, _95, _96);
     vec3 _626 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _614, 0.0).w * _7._m1, 0.0, 1.0);
     vec4 _630 = textureLod(SPIRV_Cross_Combined_1, _614, 0.0);
+    float _631 = _630.y;
     vec3 _646;
-    if (_630.y > 0.0)
+    if (_631 > 0.0)
     {
-        _646 = _626 + (textureLod(SPIRV_Cross_Combined_2, _614, 0.0).xyz * clamp(_630.y * _630.z, 0.0, 1.0));
+        _646 = _626 + (textureLod(SPIRV_Cross_Combined_2, _614, 0.0).xyz * clamp(_631 * _630.z, 0.0, 1.0));
     }
     else
     {
         _646 = _626;
     }
-    vec3 _650 = _604.xyz + (_646 * 0.5);
+    vec3 _647 = _646 * 0.5;
+    vec3 _650 = _604.xyz + _647;
     vec4 _651 = vec4(_650.x, _650.y, _650.z, _604.w);
-    _28 _652 = _605;
-    _652._m0 = _651;
-    vec2 _661 = clamp(_82 + (vec3(0.0, 2.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
+    vec2 _661 = clamp(_82 + (vec2(0.0, 2.0) * _7._m0.xy), _95, _96);
     vec3 _673 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _661, 0.0).w * _7._m1, 0.0, 1.0);
     vec4 _677 = textureLod(SPIRV_Cross_Combined_1, _661, 0.0);
+    float _678 = _677.y;
     vec3 _693;
-    if (_677.y > 0.0)
+    if (_678 > 0.0)
     {
-        _693 = _673 + (textureLod(SPIRV_Cross_Combined_2, _661, 0.0).xyz * clamp(_677.y * _677.z, 0.0, 1.0));
+        _693 = _673 + (textureLod(SPIRV_Cross_Combined_2, _661, 0.0).xyz * clamp(_678 * _677.z, 0.0, 1.0));
     }
     else
     {
@@ -304,10 +304,8 @@ void main()
     }
     vec3 _697 = _651.xyz + (_693 * 0.5);
     vec4 _698 = vec4(_697.x, _697.y, _697.z, _651.w);
-    _28 _699 = _652;
-    _699._m0 = _698;
-    vec3 _702 = _698.xyz / vec3(((((((((((((0.0 + 0.5) + 0.5) + 0.75) + 0.5) + 0.5) + 0.75) + 1.0) + 0.75) + 0.5) + 0.5) + 0.75) + 0.5) + 0.5);
-    _28 _704 = _699;
+    vec3 _702 = _698.xyz * vec3(0.125);
+    _28 _704 = _74;
     _704._m0 = vec4(_702.x, _702.y, _702.z, _698.w);
     _28 _705 = _704;
     _705._m0.w = 1.0;
diff --git a/reference/opt/shaders/asm/geom/inout-split-access-chain-handle.asm.geom b/reference/opt/shaders/asm/geom/inout-split-access-chain-handle.asm.geom
new file mode 100644
index 0000000000..ca1381cff6
--- /dev/null
+++ b/reference/opt/shaders/asm/geom/inout-split-access-chain-handle.asm.geom
@@ -0,0 +1,9 @@
+#version 440
+layout(triangles) in;
+layout(max_vertices = 5, triangle_strip) out;
+
+void main()
+{
+    gl_Position = gl_in[0].gl_Position;
+}
+
diff --git a/reference/opt/shaders/asm/geom/split-access-chain-input.asm.geom b/reference/opt/shaders/asm/geom/split-access-chain-input.asm.geom
new file mode 100644
index 0000000000..511d87fcbe
--- /dev/null
+++ b/reference/opt/shaders/asm/geom/split-access-chain-input.asm.geom
@@ -0,0 +1,9 @@
+#version 440
+layout(triangles) in;
+layout(max_vertices = 3, triangle_strip) out;
+
+void main()
+{
+    gl_Position = gl_in[0].gl_Position;
+}
+
diff --git a/reference/opt/shaders/asm/tesc/tess-fixed-input-array-builtin-array.asm.tesc b/reference/opt/shaders/asm/tesc/tess-fixed-input-array-builtin-array.asm.tesc
deleted file mode 100644
index dc43d91a9b..0000000000
--- a/reference/opt/shaders/asm/tesc/tess-fixed-input-array-builtin-array.asm.tesc
+++ /dev/null
@@ -1,81 +0,0 @@
-#version 450
-layout(vertices = 3) out;
-
-struct VertexOutput
-{
-    vec4 pos;
-    vec2 uv;
-};
-
-struct HSOut
-{
-    vec4 pos;
-    vec2 uv;
-};
-
-struct HSConstantOut
-{
-    float EdgeTess[3];
-    float InsideTess;
-};
-
-struct VertexOutput_1
-{
-    vec2 uv;
-};
-
-struct HSOut_1
-{
-    vec2 uv;
-};
-
-layout(location = 0) in VertexOutput_1 p[];
-layout(location = 0) out HSOut_1 _entryPointOutput[3];
-
-void main()
-{
-    VertexOutput p_1[3];
-    p_1[0].pos = gl_in[0].gl_Position;
-    p_1[0].uv = p[0].uv;
-    p_1[1].pos = gl_in[1].gl_Position;
-    p_1[1].uv = p[1].uv;
-    p_1[2].pos = gl_in[2].gl_Position;
-    p_1[2].uv = p[2].uv;
-    VertexOutput param[3] = p_1;
-    HSOut _158;
-    HSOut _197 = _158;
-    _197.pos = param[gl_InvocationID].pos;
-    HSOut _199 = _197;
-    _199.uv = param[gl_InvocationID].uv;
-    _158 = _199;
-    gl_out[gl_InvocationID].gl_Position = param[gl_InvocationID].pos;
-    _entryPointOutput[gl_InvocationID].uv = param[gl_InvocationID].uv;
-    barrier();
-    if (int(gl_InvocationID) == 0)
-    {
-        VertexOutput param_1[3] = p_1;
-        vec2 _174 = vec2(1.0) + param_1[0].uv;
-        float _175 = _174.x;
-        HSConstantOut _169;
-        HSConstantOut _205 = _169;
-        _205.EdgeTess[0] = _175;
-        vec2 _180 = vec2(1.0) + param_1[0].uv;
-        float _181 = _180.x;
-        HSConstantOut _207 = _205;
-        _207.EdgeTess[1] = _181;
-        vec2 _186 = vec2(1.0) + param_1[0].uv;
-        float _187 = _186.x;
-        HSConstantOut _209 = _207;
-        _209.EdgeTess[2] = _187;
-        vec2 _192 = vec2(1.0) + param_1[0].uv;
-        float _193 = _192.x;
-        HSConstantOut _211 = _209;
-        _211.InsideTess = _193;
-        _169 = _211;
-        gl_TessLevelOuter[0] = _175;
-        gl_TessLevelOuter[1] = _181;
-        gl_TessLevelOuter[2] = _187;
-        gl_TessLevelInner[0] = _193;
-    }
-}
-
diff --git a/reference/shaders/asm/tesc/tess-fixed-input-array-builtin-array.asm.tesc b/reference/opt/shaders/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc
similarity index 100%
rename from reference/shaders/asm/tesc/tess-fixed-input-array-builtin-array.asm.tesc
rename to reference/opt/shaders/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc
diff --git a/reference/opt/shaders/asm/vert/empty-io.asm.vert b/reference/opt/shaders/asm/vert/empty-io.asm.vert
index 5286269337..3819a71dd2 100644
--- a/reference/opt/shaders/asm/vert/empty-io.asm.vert
+++ b/reference/opt/shaders/asm/vert/empty-io.asm.vert
@@ -1,13 +1,8 @@
 #version 450
 
-struct VSInput
-{
-    vec4 position;
-};
-
 struct VSOutput
 {
-    vec4 position;
+    int empty_struct_member;
 };
 
 layout(location = 0) in vec4 position;
diff --git a/reference/opt/shaders/asm/vert/global-builtin.sso.asm.vert b/reference/opt/shaders/asm/vert/global-builtin.sso.asm.vert
index 2fc44c526e..20cb3b1704 100644
--- a/reference/opt/shaders/asm/vert/global-builtin.sso.asm.vert
+++ b/reference/opt/shaders/asm/vert/global-builtin.sso.asm.vert
@@ -8,15 +8,9 @@ out gl_PerVertex
 struct VSOut
 {
     float a;
-    vec4 pos;
 };
 
-struct VSOut_1
-{
-    float a;
-};
-
-layout(location = 0) out VSOut_1 _entryPointOutput;
+layout(location = 0) out VSOut _entryPointOutput;
 
 void main()
 {
diff --git a/reference/opt/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert b/reference/opt/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert
new file mode 100644
index 0000000000..baadbf0216
--- /dev/null
+++ b/reference/opt/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert
@@ -0,0 +1,16 @@
+#version 450
+
+layout(location = 0) flat out int _4;
+
+void main()
+{
+    vec4 _64 = vec4(0.0);
+    _64.y = float(((-10) + 2));
+    vec4 _68 = _64;
+    _68.z = float((100u % 5u));
+    vec4 _52 = _68 + vec4(ivec4(20, 30, 0, 0));
+    vec2 _56 = _52.xy + vec2(ivec2(ivec4(20, 30, 0, 0).y, ivec4(20, 30, 0, 0).x));
+    gl_Position = vec4(_56.x, _56.y, _52.z, _52.w);
+    _4 = ivec4(20, 30, 0, 0).y;
+}
+
diff --git a/reference/opt/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert.vk b/reference/opt/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert.vk
new file mode 100644
index 0000000000..69daed106a
--- /dev/null
+++ b/reference/opt/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert.vk
@@ -0,0 +1,24 @@
+#version 450
+
+layout(constant_id = 201) const int _7 = -10;
+layout(constant_id = 202) const uint _8 = 100u;
+const int _20 = (_7 + 2);
+const uint _25 = (_8 % 5u);
+const ivec4 _30 = ivec4(20, 30, _20, _20);
+const ivec2 _32 = ivec2(_30.y, _30.x);
+const int _33 = _30.y;
+
+layout(location = 0) flat out int _4;
+
+void main()
+{
+    vec4 _64 = vec4(0.0);
+    _64.y = float(_20);
+    vec4 _68 = _64;
+    _68.z = float(_25);
+    vec4 _52 = _68 + vec4(_30);
+    vec2 _56 = _52.xy + vec2(_32);
+    gl_Position = vec4(_56.x, _56.y, _52.z, _52.w);
+    _4 = _33;
+}
+
diff --git a/reference/opt/shaders/comp/bake_gradient.comp b/reference/opt/shaders/comp/bake_gradient.comp
index 0af4833926..69634d5d84 100644
--- a/reference/opt/shaders/comp/bake_gradient.comp
+++ b/reference/opt/shaders/comp/bake_gradient.comp
@@ -15,9 +15,12 @@ layout(binding = 3, rgba16f) uniform writeonly mediump image2D iGradJacobian;
 void main()
 {
     vec4 _59 = (vec2(gl_GlobalInvocationID.xy) * _46.uInvSize.xy).xyxy + (_46.uInvSize * 0.5);
-    vec2 _157 = ((textureLodOffset(uDisplacement, _59.zw, 0.0, ivec2(1, 0)).xy - textureLodOffset(uDisplacement, _59.zw, 0.0, ivec2(-1, 0)).xy) * 0.60000002384185791015625) * _46.uScale.z;
-    vec2 _161 = ((textureLodOffset(uDisplacement, _59.zw, 0.0, ivec2(0, 1)).xy - textureLodOffset(uDisplacement, _59.zw, 0.0, ivec2(0, -1)).xy) * 0.60000002384185791015625) * _46.uScale.z;
-    imageStore(iHeightDisplacement, ivec2(gl_GlobalInvocationID.xy), vec4(textureLod(uHeight, _59.xy, 0.0).x, 0.0, 0.0, 0.0));
-    imageStore(iGradJacobian, ivec2(gl_GlobalInvocationID.xy), vec4((_46.uScale.xy * 0.5) * vec2(textureLodOffset(uHeight, _59.xy, 0.0, ivec2(1, 0)).x - textureLodOffset(uHeight, _59.xy, 0.0, ivec2(-1, 0)).x, textureLodOffset(uHeight, _59.xy, 0.0, ivec2(0, 1)).x - textureLodOffset(uHeight, _59.xy, 0.0, ivec2(0, -1)).x), ((1.0 + _157.x) * (1.0 + _161.y)) - (_157.y * _161.x), 0.0));
+    vec2 _67 = _59.xy;
+    vec2 _128 = _59.zw;
+    vec2 _157 = ((textureLodOffset(uDisplacement, _128, 0.0, ivec2(1, 0)).xy - textureLodOffset(uDisplacement, _128, 0.0, ivec2(-1, 0)).xy) * 0.60000002384185791015625) * _46.uScale.z;
+    vec2 _161 = ((textureLodOffset(uDisplacement, _128, 0.0, ivec2(0, 1)).xy - textureLodOffset(uDisplacement, _128, 0.0, ivec2(0, -1)).xy) * 0.60000002384185791015625) * _46.uScale.z;
+    ivec2 _172 = ivec2(gl_GlobalInvocationID.xy);
+    imageStore(iHeightDisplacement, _172, vec4(textureLod(uHeight, _67, 0.0).x, 0.0, 0.0, 0.0));
+    imageStore(iGradJacobian, _172, vec4((_46.uScale.xy * 0.5) * vec2(textureLodOffset(uHeight, _67, 0.0, ivec2(1, 0)).x - textureLodOffset(uHeight, _67, 0.0, ivec2(-1, 0)).x, textureLodOffset(uHeight, _67, 0.0, ivec2(0, 1)).x - textureLodOffset(uHeight, _67, 0.0, ivec2(0, -1)).x), ((1.0 + _157.x) * (1.0 + _161.y)) - (_157.y * _161.x), 0.0));
 }
 
diff --git a/reference/opt/shaders/comp/casts.comp b/reference/opt/shaders/comp/casts.comp
index 11ef36287b..43de6aa6a0 100644
--- a/reference/opt/shaders/comp/casts.comp
+++ b/reference/opt/shaders/comp/casts.comp
@@ -6,13 +6,13 @@ layout(binding = 1, std430) buffer SSBO1
     ivec4 outputs[];
 } _21;
 
-layout(binding = 0, std430) buffer SSBO0
+layout(binding = 0, std430) buffer _19_27
 {
-    ivec4 inputs[];
+    ivec4 outputs[];
 } _27;
 
 void main()
 {
-    _21.outputs[gl_GlobalInvocationID.x] = mix(ivec4(0), ivec4(1), notEqual((_27.inputs[gl_GlobalInvocationID.x] & ivec4(3)), ivec4(uvec4(0u))));
+    _21.outputs[gl_GlobalInvocationID.x] = mix(ivec4(0), ivec4(1), notEqual((_27.outputs[gl_GlobalInvocationID.x] & ivec4(3)), ivec4(uvec4(0u))));
 }
 
diff --git a/reference/opt/shaders/comp/cfg.comp b/reference/opt/shaders/comp/cfg.comp
index c2c7136bbd..45b219ec50 100644
--- a/reference/opt/shaders/comp/cfg.comp
+++ b/reference/opt/shaders/comp/cfg.comp
@@ -6,7 +6,7 @@ layout(binding = 0, std430) buffer SSBO
     float data;
 } _11;
 
-float _180;
+float _183;
 
 void main()
 {
@@ -31,26 +31,15 @@ void main()
             break;
         }
     }
-    switch (int(_11.data))
+    float _180;
+    _180 = _183;
+    for (int _179 = 0; _179 < 20; )
     {
-        case 0:
-        {
-            break;
-        }
-        case 1:
-        {
-            break;
-        }
+        _180 += 10.0;
+        _179++;
+        continue;
     }
-    float _181;
-    _181 = _180;
-    for (int _179 = 0; _179 < 20; _179++, _181 += 10.0)
-    {
-    }
-    _11.data = _181;
-    do
-    {
-    } while (_180 != 20.0);
     _11.data = _180;
+    _11.data = _183;
 }
 
diff --git a/reference/opt/shaders/comp/composite-construct.comp b/reference/opt/shaders/comp/composite-construct.comp
index 5371f7e528..3e44af9905 100644
--- a/reference/opt/shaders/comp/composite-construct.comp
+++ b/reference/opt/shaders/comp/composite-construct.comp
@@ -1,26 +1,18 @@
 #version 310 es
 layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
 
-struct Composite
-{
-    vec4 a[2];
-    vec4 b[2];
-};
-
 layout(binding = 0, std430) buffer SSBO0
 {
     vec4 as[];
 } _41;
 
-layout(binding = 1, std430) buffer SSBO1
+layout(binding = 1, std430) buffer _39_55
 {
-    vec4 bs[];
+    vec4 as[];
 } _55;
 
 void main()
 {
-    vec4 _60[2] = vec4[](_41.as[gl_GlobalInvocationID.x], _55.bs[gl_GlobalInvocationID.x]);
-    vec4 param[3][2] = vec4[][](_60, vec4[](vec4(10.0), vec4(30.0)), _60);
-    _41.as[gl_GlobalInvocationID.x] = ((param[0][0] + param[2][1]) + param[0][1]) + param[1][0];
+    _41.as[gl_GlobalInvocationID.x] = ((_41.as[gl_GlobalInvocationID.x] + _55.as[gl_GlobalInvocationID.x]) + _55.as[gl_GlobalInvocationID.x]) + vec4(10.0);
 }
 
diff --git a/reference/opt/shaders/comp/defer-parens.comp b/reference/opt/shaders/comp/defer-parens.comp
index 51fa7f0abf..c48fb9e080 100644
--- a/reference/opt/shaders/comp/defer-parens.comp
+++ b/reference/opt/shaders/comp/defer-parens.comp
@@ -10,10 +10,11 @@ layout(binding = 0, std430) buffer SSBO
 void main()
 {
     vec4 _17 = _13.data;
-    _13.data = vec4(_17.x, _17.yz + vec2(10.0), _17.w);
+    vec2 _28 = _17.yz + vec2(10.0);
+    _13.data = vec4(_17.x, _28, _17.w);
     _13.data = (_17 + _17) + _17;
-    _13.data = (_17.yz + vec2(10.0)).xxyy;
-    _13.data = vec4((_17.yz + vec2(10.0)).y);
+    _13.data = _28.xxyy;
+    _13.data = vec4(_28.y);
     _13.data = vec4((_17.zw + vec2(10.0))[_13.index]);
 }
 
diff --git a/reference/opt/shaders/comp/dowhile.comp b/reference/opt/shaders/comp/dowhile.comp
index 61a3735d13..c62a89e467 100644
--- a/reference/opt/shaders/comp/dowhile.comp
+++ b/reference/opt/shaders/comp/dowhile.comp
@@ -27,7 +27,6 @@ void main()
         if (i < 16)
         {
             _56 = _42;
-            continue;
         }
         else
         {
diff --git a/reference/opt/shaders/comp/generate_height.comp b/reference/opt/shaders/comp/generate_height.comp
index 1b5e0c3dc1..17d3073d2e 100644
--- a/reference/opt/shaders/comp/generate_height.comp
+++ b/reference/opt/shaders/comp/generate_height.comp
@@ -4,51 +4,52 @@ layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
 layout(binding = 0, std430) readonly buffer Distribution
 {
     vec2 distribution[];
-} _136;
+} _137;
 
 layout(binding = 2, std140) uniform UBO
 {
     vec4 uModTime;
-} _165;
+} _166;
 
 layout(binding = 1, std430) writeonly buffer HeightmapFFT
 {
     uint heights[];
-} _224;
+} _225;
 
 void main()
 {
-    uvec2 _263 = uvec2(64u, 1u) * gl_NumWorkGroups.xy;
-    uvec2 _268 = _263 - gl_GlobalInvocationID.xy;
-    bvec2 _270 = equal(gl_GlobalInvocationID.xy, uvec2(0u));
-    uint _470;
-    if (_270.x)
+    uvec2 _264 = uvec2(64u, 1u) * gl_NumWorkGroups.xy;
+    uvec2 _269 = _264 - gl_GlobalInvocationID.xy;
+    bvec2 _271 = equal(gl_GlobalInvocationID.xy, uvec2(0u));
+    uint _475;
+    if (_271.x)
     {
-        _470 = 0u;
+        _475 = 0u;
     }
     else
     {
-        _470 = _268.x;
+        _475 = _269.x;
     }
-    uint _471;
-    if (_270.y)
+    uint _476;
+    if (_271.y)
     {
-        _471 = 0u;
+        _476 = 0u;
     }
     else
     {
-        _471 = _268.y;
+        _476 = _269.y;
     }
-    vec2 _296 = vec2(gl_GlobalInvocationID.xy);
-    vec2 _298 = vec2(_263);
-    float _308 = sqrt(9.81000041961669921875 * length(_165.uModTime.xy * mix(_296, _296 - _298, greaterThan(_296, _298 * 0.5)))) * _165.uModTime.z;
-    float _310 = cos(_308);
-    float _312 = sin(_308);
-    vec2 _315 = vec2(_310, _312);
-    vec2 _394 = _315.yy * (_136.distribution[(gl_GlobalInvocationID.xy.y * _263.x) + gl_GlobalInvocationID.xy.x]).yx;
-    vec2 _320 = vec2(_310, _312);
-    vec2 _420 = _320.yy * (_136.distribution[(_471 * _263.x) + _470]).yx;
-    vec2 _428 = ((_136.distribution[(_471 * _263.x) + _470]) * _320.xx) + vec2(-_420.x, _420.y);
-    _224.heights[(gl_GlobalInvocationID.xy.y * _263.x) + gl_GlobalInvocationID.xy.x] = packHalf2x16((((_136.distribution[(gl_GlobalInvocationID.xy.y * _263.x) + gl_GlobalInvocationID.xy.x]) * _315.xx) + vec2(-_394.x, _394.y)) + vec2(_428.x, -_428.y));
+    uint _448 = _264.x;
+    uint _280 = (gl_GlobalInvocationID.y * _448) + gl_GlobalInvocationID.x;
+    vec2 _297 = vec2(gl_GlobalInvocationID.xy);
+    vec2 _299 = vec2(_264);
+    float _309 = sqrt(9.81000041961669921875 * length(_166.uModTime.xy * mix(_297, _297 - _299, greaterThan(_297, _299 * 0.5)))) * _166.uModTime.z;
+    vec2 _316 = vec2(cos(_309), sin(_309));
+    vec2 _387 = _316.xx;
+    vec2 _392 = _316.yy;
+    vec2 _395 = _392 * _137.distribution[_280].yx;
+    vec2 _421 = _392 * (_137.distribution[(_476 * _448) + _475]).yx;
+    vec2 _429 = ((_137.distribution[(_476 * _448) + _475]) * _387) + vec2(-_421.x, _421.y);
+    _225.heights[_280] = packHalf2x16(((_137.distribution[_280] * _387) + vec2(-_395.x, _395.y)) + vec2(_429.x, -_429.y));
 }
 
diff --git a/reference/opt/shaders/comp/image.comp b/reference/opt/shaders/comp/image.comp
index 8bd7dd06ab..cdb57142c3 100644
--- a/reference/opt/shaders/comp/image.comp
+++ b/reference/opt/shaders/comp/image.comp
@@ -6,6 +6,7 @@ layout(binding = 1, rgba8) uniform writeonly mediump image2D uImageOut;
 
 void main()
 {
-    imageStore(uImageOut, ivec2(gl_GlobalInvocationID.xy), imageLoad(uImageIn, ivec2(gl_GlobalInvocationID.xy) + imageSize(uImageIn)));
+    ivec2 _23 = ivec2(gl_GlobalInvocationID.xy);
+    imageStore(uImageOut, _23, imageLoad(uImageIn, _23 + imageSize(uImageIn)));
 }
 
diff --git a/reference/opt/shaders/comp/return.comp b/reference/opt/shaders/comp/return.comp
deleted file mode 100644
index ea41907a7b..0000000000
--- a/reference/opt/shaders/comp/return.comp
+++ /dev/null
@@ -1,31 +0,0 @@
-#version 310 es
-layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
-
-layout(binding = 1, std430) writeonly buffer SSBO2
-{
-    vec4 out_data[];
-} _27;
-
-int _69;
-
-void main()
-{
-    if (gl_GlobalInvocationID.x == 2u)
-    {
-        _27.out_data[gl_GlobalInvocationID.x] = vec4(20.0);
-    }
-    else
-    {
-        if (gl_GlobalInvocationID.x == 4u)
-        {
-            _27.out_data[gl_GlobalInvocationID.x] = vec4(10.0);
-            return;
-        }
-    }
-    for (int _68 = 0; _68 < 20; _68 = _69 + 1)
-    {
-        return;
-    }
-    _27.out_data[gl_GlobalInvocationID.x] = vec4(10.0);
-}
-
diff --git a/reference/opt/shaders/comp/rmw-matrix.comp b/reference/opt/shaders/comp/rmw-matrix.comp
new file mode 100644
index 0000000000..5c4ac94bc7
--- /dev/null
+++ b/reference/opt/shaders/comp/rmw-matrix.comp
@@ -0,0 +1,20 @@
+#version 310 es
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer SSBO
+{
+    float a;
+    vec4 b;
+    mat4 c;
+    float a1;
+    vec4 b1;
+    mat4 c1;
+} _11;
+
+void main()
+{
+    _11.a *= _11.a1;
+    _11.b *= _11.b1;
+    _11.c = _11.c * _11.c1;
+}
+
diff --git a/reference/opt/shaders/comp/rmw-opt.comp b/reference/opt/shaders/comp/rmw-opt.comp
index 7d4d24b29f..342e6632db 100644
--- a/reference/opt/shaders/comp/rmw-opt.comp
+++ b/reference/opt/shaders/comp/rmw-opt.comp
@@ -18,7 +18,6 @@ void main()
     _9.a ^= 10;
     _9.a %= 40;
     _9.a |= 1;
-    bool _65 = false && true;
-    _9.a = int(_65 && (true || _65));
+    _9.a = 0;
 }
 
diff --git a/reference/opt/shaders/comp/shared.comp b/reference/opt/shaders/comp/shared.comp
index 66ec1c2cc7..f95cb2b8b9 100644
--- a/reference/opt/shaders/comp/shared.comp
+++ b/reference/opt/shaders/comp/shared.comp
@@ -18,6 +18,6 @@ void main()
     sShared[gl_LocalInvocationIndex] = _22.in_data[gl_GlobalInvocationID.x];
     memoryBarrierShared();
     barrier();
-    _44.out_data[gl_GlobalInvocationID.x] = sShared[(4u - gl_LocalInvocationIndex) - 1u];
+    _44.out_data[gl_GlobalInvocationID.x] = sShared[3u - gl_LocalInvocationIndex];
 }
 
diff --git a/reference/opt/shaders/comp/struct-packing.comp b/reference/opt/shaders/comp/struct-packing.comp
index 3c30aa6088..8a2ac76274 100644
--- a/reference/opt/shaders/comp/struct-packing.comp
+++ b/reference/opt/shaders/comp/struct-packing.comp
@@ -43,7 +43,26 @@ struct Content
     S4 m3s[8];
 };
 
-layout(binding = 1, std430) buffer SSBO1
+struct S0_1
+{
+    vec2 a[1];
+    float b;
+};
+
+struct Content_1
+{
+    S0_1 m0s[1];
+    S1 m1s[1];
+    S2 m2s[1];
+    S0_1 m0;
+    S1 m1;
+    S2 m2;
+    S3 m3;
+    float m4;
+    S4 m3s[8];
+};
+
+layout(binding = 1, std430) restrict buffer SSBO1
 {
     Content content;
     Content content1[2];
@@ -59,11 +78,11 @@ layout(binding = 1, std430) buffer SSBO1
     float array[];
 } ssbo_430;
 
-layout(binding = 0, std140) buffer SSBO0
+layout(binding = 0, std140) restrict buffer SSBO0
 {
-    Content content;
-    Content content1[2];
-    Content content2;
+    Content_1 content;
+    Content_1 content1[2];
+    Content_1 content2;
     mat2 m0;
     mat2 m1;
     mat2x3 m2[4];
diff --git a/reference/opt/shaders/comp/torture-loop.comp b/reference/opt/shaders/comp/torture-loop.comp
index 640142b30a..5943966c05 100644
--- a/reference/opt/shaders/comp/torture-loop.comp
+++ b/reference/opt/shaders/comp/torture-loop.comp
@@ -12,66 +12,29 @@ layout(binding = 1, std430) writeonly buffer SSBO2
     vec4 out_data[];
 } _89;
 
-uint _98;
-
 void main()
 {
-    vec4 _93;
-    int _94;
-    _93 = _24.in_data[gl_GlobalInvocationID.x];
-    _94 = 0;
-    int _40;
-    vec4 _46;
-    int _48;
-    for (;;)
+    vec4 _99;
+    _99 = _24.in_data[gl_GlobalInvocationID.x];
+    for (int _93 = 0; (_93 + 1) < 10; )
     {
-        _40 = _94 + 1;
-        if (_40 < 10)
+        _99 *= 2.0;
+        _93 += 2;
+        continue;
+    }
+    vec4 _98;
+    _98 = _99;
+    vec4 _103;
+    for (uint _94 = 0u; _94 < 16u; _98 = _103, _94++)
+    {
+        _103 = _98;
+        for (uint _100 = 0u; _100 < 30u; )
         {
-            _46 = _93 * 2.0;
-            _48 = _40 + 1;
-            _93 = _46;
-            _94 = _48;
+            _103 = _24.mvp * _103;
+            _100++;
             continue;
         }
-        else
-        {
-            break;
-        }
     }
-    vec4 _95;
-    int _96;
-    _95 = _93;
-    _96 = _40;
-    vec4 _100;
-    uint _101;
-    uint _99;
-    for (uint _97 = 0u; _97 < 16u; _95 = _100, _96++, _97++, _99 = _101)
-    {
-        _100 = _95;
-        _101 = 0u;
-        vec4 _71;
-        for (; _101 < 30u; _100 = _71, _101++)
-        {
-            _71 = _24.mvp * _100;
-        }
-    }
-    int _102;
-    _102 = _96;
-    int _83;
-    for (;;)
-    {
-        _83 = _102 + 1;
-        if (_83 > 10)
-        {
-            _102 = _83;
-            continue;
-        }
-        else
-        {
-            break;
-        }
-    }
-    _89.out_data[gl_GlobalInvocationID.x] = _95;
+    _89.out_data[gl_GlobalInvocationID.x] = _98;
 }
 
diff --git a/reference/opt/shaders/comp/type-alias.comp b/reference/opt/shaders/comp/type-alias.comp
index c0f57f4bda..a6e13156fc 100644
--- a/reference/opt/shaders/comp/type-alias.comp
+++ b/reference/opt/shaders/comp/type-alias.comp
@@ -6,19 +6,14 @@ struct S0
     vec4 a;
 };
 
-struct S1
-{
-    vec4 a;
-};
-
 layout(binding = 0, std430) buffer SSBO0
 {
     S0 s0s[];
 } _36;
 
-layout(binding = 1, std430) buffer SSBO1
+layout(binding = 1, std430) buffer _34_55
 {
-    S1 s1s[];
+    S0 s0s[];
 } _55;
 
 layout(binding = 2, std430) buffer SSBO2
@@ -28,6 +23,6 @@ layout(binding = 2, std430) buffer SSBO2
 
 void main()
 {
-    _66.outputs[gl_GlobalInvocationID.x] = _36.s0s[gl_GlobalInvocationID.x].a + _55.s1s[gl_GlobalInvocationID.x].a;
+    _66.outputs[gl_GlobalInvocationID.x] = _36.s0s[gl_GlobalInvocationID.x].a + _55.s0s[gl_GlobalInvocationID.x].a;
 }
 
diff --git a/reference/opt/shaders/comp/udiv.comp b/reference/opt/shaders/comp/udiv.comp
index 0c1f926ad0..80d1c7f532 100644
--- a/reference/opt/shaders/comp/udiv.comp
+++ b/reference/opt/shaders/comp/udiv.comp
@@ -6,13 +6,13 @@ layout(binding = 0, std430) buffer SSBO2
     uint outputs[];
 } _10;
 
-layout(binding = 0, std430) buffer SSBO
+layout(binding = 0, std430) buffer _8_23
 {
-    uint inputs[];
+    uint outputs[];
 } _23;
 
 void main()
 {
-    _10.outputs[gl_GlobalInvocationID.x] = _23.inputs[gl_GlobalInvocationID.x] / 29u;
+    _10.outputs[gl_GlobalInvocationID.x] = _23.outputs[gl_GlobalInvocationID.x] / 29u;
 }
 
diff --git a/reference/opt/shaders/desktop-only/frag/control-dependent-in-branch.desktop.frag b/reference/opt/shaders/desktop-only/frag/control-dependent-in-branch.desktop.frag
new file mode 100644
index 0000000000..29c59012cc
--- /dev/null
+++ b/reference/opt/shaders/desktop-only/frag/control-dependent-in-branch.desktop.frag
@@ -0,0 +1,37 @@
+#version 450
+
+layout(binding = 0) uniform sampler2D uSampler;
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) in vec4 vInput;
+
+void main()
+{
+    FragColor = vInput;
+    vec4 _23 = texture(uSampler, vInput.xy);
+    vec4 _26 = dFdx(vInput);
+    vec4 _29 = dFdy(vInput);
+    vec4 _32 = fwidth(vInput);
+    vec4 _35 = dFdxCoarse(vInput);
+    vec4 _38 = dFdyCoarse(vInput);
+    vec4 _41 = fwidthCoarse(vInput);
+    vec4 _44 = dFdxFine(vInput);
+    vec4 _47 = dFdyFine(vInput);
+    vec4 _50 = fwidthFine(vInput);
+    vec2 _56 = textureQueryLod(uSampler, vInput.zw);
+    if (vInput.y > 10.0)
+    {
+        FragColor += _23;
+        FragColor += _26;
+        FragColor += _29;
+        FragColor += _32;
+        FragColor += _35;
+        FragColor += _38;
+        FragColor += _41;
+        FragColor += _44;
+        FragColor += _47;
+        FragColor += _50;
+        FragColor += _56.xyxy;
+    }
+}
+
diff --git a/reference/opt/shaders/desktop-only/frag/dual-source-blending.desktop.frag b/reference/opt/shaders/desktop-only/frag/dual-source-blending.desktop.frag
new file mode 100644
index 0000000000..3d946b04a5
--- /dev/null
+++ b/reference/opt/shaders/desktop-only/frag/dual-source-blending.desktop.frag
@@ -0,0 +1,11 @@
+#version 450
+
+layout(location = 0, index = 0) out vec4 FragColor0;
+layout(location = 0, index = 1) out vec4 FragColor1;
+
+void main()
+{
+    FragColor0 = vec4(1.0);
+    FragColor1 = vec4(2.0);
+}
+
diff --git a/reference/opt/shaders/desktop-only/frag/fp16.desktop.frag b/reference/opt/shaders/desktop-only/frag/fp16.desktop.frag
new file mode 100644
index 0000000000..1761424e11
--- /dev/null
+++ b/reference/opt/shaders/desktop-only/frag/fp16.desktop.frag
@@ -0,0 +1,11 @@
+#version 450
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(location = 3) in f16vec4 v4;
+
+void main()
+{
+    f16vec4 _505;
+    f16vec4 _577 = modf(v4, _505);
+}
+
diff --git a/reference/opt/shaders/desktop-only/frag/image-query.desktop.frag b/reference/opt/shaders/desktop-only/frag/image-query.desktop.frag
index fa1ac0abae..3eaaa8a445 100644
--- a/reference/opt/shaders/desktop-only/frag/image-query.desktop.frag
+++ b/reference/opt/shaders/desktop-only/frag/image-query.desktop.frag
@@ -9,6 +9,15 @@ layout(binding = 5) uniform samplerCubeArray uSamplerCubeArray;
 layout(binding = 6) uniform samplerBuffer uSamplerBuffer;
 layout(binding = 7) uniform sampler2DMS uSamplerMS;
 layout(binding = 8) uniform sampler2DMSArray uSamplerMSArray;
+layout(binding = 9, r32f) uniform readonly writeonly image1D uImage1D;
+layout(binding = 10, r32f) uniform readonly writeonly image2D uImage2D;
+layout(binding = 11, r32f) uniform readonly writeonly image2DArray uImage2DArray;
+layout(binding = 12, r32f) uniform readonly writeonly image3D uImage3D;
+layout(binding = 13, r32f) uniform readonly writeonly imageCube uImageCube;
+layout(binding = 14, r32f) uniform readonly writeonly imageCubeArray uImageCubeArray;
+layout(binding = 15, r32f) uniform readonly writeonly imageBuffer uImageBuffer;
+layout(binding = 16, r32f) uniform readonly writeonly image2DMS uImageMS;
+layout(binding = 17, r32f) uniform readonly writeonly image2DMSArray uImageMSArray;
 
 void main()
 {
diff --git a/reference/opt/shaders/desktop-only/geom/basic.desktop.sso.geom b/reference/opt/shaders/desktop-only/geom/basic.desktop.sso.geom
index f1afee69ec..8e51cfa36b 100644
--- a/reference/opt/shaders/desktop-only/geom/basic.desktop.sso.geom
+++ b/reference/opt/shaders/desktop-only/geom/basic.desktop.sso.geom
@@ -22,13 +22,14 @@ layout(location = 0) in VertexData
 void main()
 {
     gl_Position = gl_in[0].gl_Position;
-    vNormal = vin[0].normal + vec3(float(gl_InvocationID));
+    float _37 = float(gl_InvocationID);
+    vNormal = vin[0].normal + vec3(_37);
     EmitVertex();
     gl_Position = gl_in[1].gl_Position;
-    vNormal = vin[1].normal + vec3(4.0 * float(gl_InvocationID));
+    vNormal = vin[1].normal + vec3(4.0 * _37);
     EmitVertex();
     gl_Position = gl_in[2].gl_Position;
-    vNormal = vin[2].normal + vec3(2.0 * float(gl_InvocationID));
+    vNormal = vin[2].normal + vec3(2.0 * _37);
     EmitVertex();
     EndPrimitive();
 }
diff --git a/reference/opt/shaders/flatten/copy.flatten.vert b/reference/opt/shaders/flatten/copy.flatten.vert
index 59f0dc1b42..27ce450d62 100644
--- a/reference/opt/shaders/flatten/copy.flatten.vert
+++ b/reference/opt/shaders/flatten/copy.flatten.vert
@@ -16,10 +16,12 @@ void main()
 {
     gl_Position = mat4(UBO[0], UBO[1], UBO[2], UBO[3]) * aVertex;
     vColor = vec4(0.0);
-    for (int _103 = 0; _103 < 4; _103++)
+    for (int _96 = 0; _96 < 4; )
     {
-        vec3 _68 = aVertex.xyz - Light(UBO[_103 * 2 + 4].xyz, UBO[_103 * 2 + 4].w, UBO[_103 * 2 + 5]).Position;
-        vColor += (((UBO[_103 * 2 + 5]) * clamp(1.0 - (length(_68) / Light(UBO[_103 * 2 + 4].xyz, UBO[_103 * 2 + 4].w, UBO[_103 * 2 + 5]).Radius), 0.0, 1.0)) * dot(aNormal, normalize(_68)));
+        vec3 _68 = aVertex.xyz - Light(UBO[_96 * 2 + 4].xyz, UBO[_96 * 2 + 4].w, UBO[_96 * 2 + 5]).Position;
+        vColor += (((UBO[_96 * 2 + 5]) * clamp(1.0 - (length(_68) / Light(UBO[_96 * 2 + 4].xyz, UBO[_96 * 2 + 4].w, UBO[_96 * 2 + 5]).Radius), 0.0, 1.0)) * dot(aNormal, normalize(_68)));
+        _96++;
+        continue;
     }
 }
 
diff --git a/reference/opt/shaders/flatten/dynamic.flatten.vert b/reference/opt/shaders/flatten/dynamic.flatten.vert
index c08f7445be..8fc8ff6eed 100644
--- a/reference/opt/shaders/flatten/dynamic.flatten.vert
+++ b/reference/opt/shaders/flatten/dynamic.flatten.vert
@@ -16,10 +16,12 @@ void main()
 {
     gl_Position = mat4(UBO[0], UBO[1], UBO[2], UBO[3]) * aVertex;
     vColor = vec4(0.0);
-    for (int _82 = 0; _82 < 4; _82++)
+    for (int _82 = 0; _82 < 4; )
     {
         vec3 _54 = aVertex.xyz - (UBO[_82 * 2 + 4].xyz);
         vColor += (((UBO[_82 * 2 + 5]) * clamp(1.0 - (length(_54) / (UBO[_82 * 2 + 4].w)), 0.0, 1.0)) * dot(aNormal, normalize(_54)));
+        _82++;
+        continue;
     }
 }
 
diff --git a/reference/opt/shaders/flatten/multi-dimensional.desktop.flatten_dim.frag b/reference/opt/shaders/flatten/multi-dimensional.desktop.flatten_dim.frag
index 6ccede21a9..3657298f18 100644
--- a/reference/opt/shaders/flatten/multi-dimensional.desktop.flatten_dim.frag
+++ b/reference/opt/shaders/flatten/multi-dimensional.desktop.flatten_dim.frag
@@ -6,26 +6,22 @@ layout(location = 1) in vec2 vUV;
 layout(location = 0) out vec4 FragColor;
 layout(location = 0) flat in int vIndex;
 
-int _93;
-
 void main()
 {
+    int _92;
+    _92 = 0;
     vec4 values3[2 * 3 * 1];
-    int _96;
-    int _97;
-    int _94;
-    int _95;
-    for (int _92 = 0; _92 < 2; _92++, _94 = _96, _95 = _97)
+    for (; _92 < 2; _92++)
     {
-        _96 = 0;
-        _97 = _95;
-        int _98;
-        for (; _96 < 3; _96++, _97 = _98)
+        int _93;
+        _93 = 0;
+        for (; _93 < 3; _93++)
         {
-            _98 = 0;
-            for (; _98 < 1; _98++)
+            for (int _95 = 0; _95 < 1; )
             {
-                values3[_92 * 3 * 1 + _96 * 1 + _98] = texture(uTextures[_92 * 3 * 1 + _96 * 1 + _98], vUV);
+                values3[_92 * 3 * 1 + _93 * 1 + _95] = texture(uTextures[_92 * 3 * 1 + _93 * 1 + _95], vUV);
+                _95++;
+                continue;
             }
         }
     }
diff --git a/reference/opt/shaders/frag/array-lut-no-loop-variable.frag b/reference/opt/shaders/frag/array-lut-no-loop-variable.frag
new file mode 100644
index 0000000000..7bdfe543e4
--- /dev/null
+++ b/reference/opt/shaders/frag/array-lut-no-loop-variable.frag
@@ -0,0 +1,18 @@
+#version 310 es
+precision mediump float;
+precision highp int;
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    float lut[5] = float[](1.0, 2.0, 3.0, 4.0, 5.0);
+    for (int _46 = 0; _46 < 4; )
+    {
+        mediump int _33 = _46 + 1;
+        FragColor += vec4(lut[_33]);
+        _46 = _33;
+        continue;
+    }
+}
+
diff --git a/reference/opt/shaders/frag/constant-array.frag b/reference/opt/shaders/frag/constant-array.frag
index a6ffda0737..2af87ad80d 100644
--- a/reference/opt/shaders/frag/constant-array.frag
+++ b/reference/opt/shaders/frag/constant-array.frag
@@ -16,6 +16,6 @@ void main()
     highp vec4 indexable[3] = vec4[](vec4(1.0), vec4(2.0), vec4(3.0));
     highp vec4 indexable_1[2][2] = vec4[][](vec4[](vec4(1.0), vec4(2.0)), vec4[](vec4(8.0), vec4(10.0)));
     Foobar indexable_2[2] = Foobar[](Foobar(10.0, 40.0), Foobar(90.0, 70.0));
-    FragColor = ((indexable[index] + (indexable_1[index][index + 1])) + vec4(10.0 + 20.0)) + vec4(indexable_2[index].a + indexable_2[index].b);
+    FragColor = ((indexable[index] + (indexable_1[index][index + 1])) + vec4(30.0)) + vec4(indexable_2[index].a + indexable_2[index].b);
 }
 
diff --git a/reference/opt/shaders/frag/constant-composites.frag b/reference/opt/shaders/frag/constant-composites.frag
index ab0816c3d2..b105dbd26c 100644
--- a/reference/opt/shaders/frag/constant-composites.frag
+++ b/reference/opt/shaders/frag/constant-composites.frag
@@ -9,7 +9,7 @@ struct Foo
 };
 
 layout(location = 0) out vec4 FragColor;
-layout(location = 0) flat in mediump int _line;
+layout(location = 0) flat in mediump int line;
 float lut[4];
 Foo foos[2];
 
@@ -17,7 +17,7 @@ void main()
 {
     lut = float[](1.0, 4.0, 3.0, 2.0);
     foos = Foo[](Foo(10.0, 20.0), Foo(30.0, 40.0));
-    FragColor = vec4(lut[_line]);
-    FragColor += vec4(foos[_line].a * (foos[1 - _line].a));
+    FragColor = vec4(lut[line]);
+    FragColor += vec4(foos[line].a * (foos[1 - line].a));
 }
 
diff --git a/reference/opt/shaders/frag/eliminate-dead-variables.frag b/reference/opt/shaders/frag/eliminate-dead-variables.frag
deleted file mode 100644
index c97ae20f9a..0000000000
--- a/reference/opt/shaders/frag/eliminate-dead-variables.frag
+++ /dev/null
@@ -1,14 +0,0 @@
-#version 310 es
-precision mediump float;
-precision highp int;
-
-layout(binding = 0) uniform mediump sampler2D uSampler;
-
-layout(location = 0) out vec4 FragColor;
-layout(location = 0) in vec2 vTexCoord;
-
-void main()
-{
-    FragColor = texture(uSampler, vTexCoord);
-}
-
diff --git a/reference/opt/shaders/frag/false-loop-init.frag b/reference/opt/shaders/frag/false-loop-init.frag
index 1db46c1bd5..2d5902a399 100644
--- a/reference/opt/shaders/frag/false-loop-init.frag
+++ b/reference/opt/shaders/frag/false-loop-init.frag
@@ -5,24 +5,14 @@ precision highp int;
 layout(location = 0) out vec4 result;
 layout(location = 0) in vec4 accum;
 
-uint _49;
-
 void main()
 {
     result = vec4(0.0);
-    uint _51;
-    uint _50;
-    for (int _48 = 0; _48 < 4; _48 += int(_51), _50 = _51)
+    for (int _48 = 0; _48 < 4; )
     {
-        if (accum.y > 10.0)
-        {
-            _51 = 40u;
-        }
-        else
-        {
-            _51 = 30u;
-        }
         result += accum;
+        _48 += int((accum.y > 10.0) ? 40u : 30u);
+        continue;
     }
 }
 
diff --git a/reference/opt/shaders/frag/flush_params.frag b/reference/opt/shaders/frag/flush_params.frag
index 5f386dffbb..16b4994148 100644
--- a/reference/opt/shaders/frag/flush_params.frag
+++ b/reference/opt/shaders/frag/flush_params.frag
@@ -2,11 +2,6 @@
 precision mediump float;
 precision highp int;
 
-struct Structy
-{
-    vec4 c;
-};
-
 layout(location = 0) out vec4 FragColor;
 
 void main()
diff --git a/reference/opt/shaders/frag/for-loop-init.frag b/reference/opt/shaders/frag/for-loop-init.frag
index 626d7c8d5e..6d13815d78 100644
--- a/reference/opt/shaders/frag/for-loop-init.frag
+++ b/reference/opt/shaders/frag/for-loop-init.frag
@@ -4,48 +4,72 @@ precision highp int;
 
 layout(location = 0) out mediump int FragColor;
 
+ivec2 _184;
+int _199;
+
 void main()
 {
     FragColor = 16;
-    for (int _140 = 0; _140 < 25; _140++)
+    for (int _168 = 0; _168 < 25; )
     {
         FragColor += 10;
+        _168++;
+        continue;
     }
-    for (int _141 = 1; _141 < 30; _141++)
+    for (int _169 = 1; _169 < 30; )
     {
         FragColor += 11;
+        _169++;
+        continue;
     }
-    int _142;
-    _142 = 0;
-    for (; _142 < 20; _142++)
+    int _170;
+    _170 = 0;
+    for (; _170 < 20; )
     {
         FragColor += 12;
+        _170++;
+        continue;
     }
-    mediump int _62 = _142 + 3;
+    mediump int _62 = _170 + 3;
     FragColor += _62;
-    if (_62 == 40)
+    bool _68 = _62 == 40;
+    if (_68)
     {
-        for (int _143 = 0; _143 < 40; _143++)
+        for (int _171 = 0; _171 < 40; )
         {
             FragColor += 13;
+            _171++;
+            continue;
         }
-        return;
     }
     else
     {
         FragColor += _62;
     }
-    ivec2 _144;
-    _144 = ivec2(0);
-    ivec2 _139;
-    for (; _144.x < 10; _139 = _144, _139.x = _144.x + 4, _144 = _139)
+    bool _213 = _68 ? true : false;
+    if (!_213)
     {
-        FragColor += _144.y;
+        ivec2 _177;
+        _177 = mix(ivec2(0), mix(_184, _184, bvec2(_68)), bvec2(_213));
+        for (; _177.x < 10; )
+        {
+            FragColor += _177.y;
+            ivec2 _167 = _177;
+            _167.x = _177.x + 4;
+            _177 = _167;
+            continue;
+        }
     }
-    for (int _145 = _62; _145 < 40; _145++)
+    int _216 = _213 ? (_68 ? _199 : _199) : _62;
+    if (!_213)
     {
-        FragColor += _145;
+        for (int _191 = _216; _191 < 40; )
+        {
+            FragColor += _191;
+            _191++;
+            continue;
+        }
+        FragColor += _216;
     }
-    FragColor += _62;
 }
 
diff --git a/reference/opt/shaders/frag/gather-dref.frag b/reference/opt/shaders/frag/gather-dref.frag
new file mode 100644
index 0000000000..5416f79cb5
--- /dev/null
+++ b/reference/opt/shaders/frag/gather-dref.frag
@@ -0,0 +1,14 @@
+#version 310 es
+precision mediump float;
+precision highp int;
+
+layout(binding = 0) uniform mediump sampler2DShadow uT;
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) in vec3 vUV;
+
+void main()
+{
+    FragColor = textureGather(uT, vUV.xy, vUV.z);
+}
+
diff --git a/reference/opt/shaders/frag/ground.frag b/reference/opt/shaders/frag/ground.frag
index aaca58c1cd..f59a402fe3 100644
--- a/reference/opt/shaders/frag/ground.frag
+++ b/reference/opt/shaders/frag/ground.frag
@@ -24,7 +24,7 @@ layout(location = 1) in vec3 EyeVec;
 void main()
 {
     vec3 _68 = normalize((texture(TexNormalmap, TexCoord).xyz * 2.0) - vec3(1.0));
-    float _113 = smoothstep(0.0, 0.1500000059604644775390625, (_101.g_CamPos.y + EyeVec.y) / 200.0);
+    float _113 = smoothstep(0.0, 0.1500000059604644775390625, (_101.g_CamPos.y + EyeVec.y) * 0.004999999888241291046142578125);
     float _125 = smoothstep(0.699999988079071044921875, 0.75, _68.y);
     vec3 _130 = mix(vec3(0.100000001490116119384765625), mix(vec3(0.100000001490116119384765625, 0.300000011920928955078125, 0.100000001490116119384765625), vec3(0.800000011920928955078125), vec3(_113)), vec3(_125));
     LightingOut = vec4(0.0);
diff --git a/reference/opt/shaders/frag/hoisted-temporary-use-continue-block-as-value.frag b/reference/opt/shaders/frag/hoisted-temporary-use-continue-block-as-value.frag
index 1d8e023582..74db2bb8cb 100644
--- a/reference/opt/shaders/frag/hoisted-temporary-use-continue-block-as-value.frag
+++ b/reference/opt/shaders/frag/hoisted-temporary-use-continue-block-as-value.frag
@@ -9,29 +9,20 @@ layout(location = 1) flat in mediump int vB;
 void main()
 {
     FragColor = vec4(0.0);
-    mediump int _49;
-    int _60;
-    for (int _57 = 0, _58 = 0; _58 < vA; _57 = _60, _58 += _49)
+    int _58;
+    for (int _57 = 0, _60 = 0; _57 < vA; FragColor += vec4(1.0), _60 = _58, _57 += (_58 + 10))
     {
-        if ((vA + _58) == 20)
+        if ((vA + _57) == 20)
         {
-            _60 = 50;
+            _58 = 50;
+            continue;
         }
         else
         {
-            int _59;
-            if ((vB + _58) == 40)
-            {
-                _59 = 60;
-            }
-            else
-            {
-                _59 = _57;
-            }
-            _60 = _59;
+            _58 = ((vB + _57) == 40) ? 60 : _60;
+            continue;
         }
-        _49 = _60 + 10;
-        FragColor += vec4(1.0);
+        continue;
     }
 }
 
diff --git a/reference/opt/shaders/frag/mix.frag b/reference/opt/shaders/frag/mix.frag
index f1494e0775..f791d45ce3 100644
--- a/reference/opt/shaders/frag/mix.frag
+++ b/reference/opt/shaders/frag/mix.frag
@@ -10,9 +10,9 @@ layout(location = 3) in float vIn3;
 
 void main()
 {
-    FragColor = mix(vIn0, vIn1, bvec4(false, true, false, false));
-    FragColor = vec4(true ? vIn3 : vIn2);
-    FragColor = mix(vIn1, vIn0, bvec4(true));
-    FragColor = vec4(true ? vIn2 : vIn3);
+    FragColor = vec4(vIn0.x, vIn1.y, vIn0.z, vIn0.w);
+    FragColor = vec4(vIn3);
+    FragColor = vIn0.xyzw;
+    FragColor = vec4(vIn2);
 }
 
diff --git a/reference/opt/shaders/frag/partial-write-preserve.frag b/reference/opt/shaders/frag/partial-write-preserve.frag
index 527b661bcc..d6c1fe8b44 100644
--- a/reference/opt/shaders/frag/partial-write-preserve.frag
+++ b/reference/opt/shaders/frag/partial-write-preserve.frag
@@ -2,12 +2,6 @@
 precision mediump float;
 precision highp int;
 
-struct B
-{
-    float a;
-    float b;
-};
-
 void main()
 {
 }
diff --git a/reference/opt/shaders/frag/sampler-ms-query.frag b/reference/opt/shaders/frag/sampler-ms-query.frag
deleted file mode 100644
index 4c30ed1529..0000000000
--- a/reference/opt/shaders/frag/sampler-ms-query.frag
+++ /dev/null
@@ -1,14 +0,0 @@
-#version 450
-
-layout(binding = 0) uniform sampler2DMS uSampler;
-layout(binding = 1) uniform sampler2DMSArray uSamplerArray;
-layout(binding = 2, rgba8) uniform readonly writeonly image2DMS uImage;
-layout(binding = 3, rgba8) uniform readonly writeonly image2DMSArray uImageArray;
-
-layout(location = 0) out vec4 FragColor;
-
-void main()
-{
-    FragColor = vec4(float(((textureSamples(uSampler) + textureSamples(uSamplerArray)) + imageSamples(uImage)) + imageSamples(uImageArray)));
-}
-
diff --git a/reference/opt/shaders/frag/swizzle.frag b/reference/opt/shaders/frag/swizzle.frag
index e619be2f48..a229e5b0d5 100644
--- a/reference/opt/shaders/frag/swizzle.frag
+++ b/reference/opt/shaders/frag/swizzle.frag
@@ -2,7 +2,7 @@
 precision mediump float;
 precision highp int;
 
-layout(location = 0) uniform mediump sampler2D samp;
+layout(binding = 0) uniform mediump sampler2D samp;
 
 layout(location = 0) out vec4 FragColor;
 layout(location = 2) in vec2 vUV;
diff --git a/reference/opt/shaders/frag/temporary.frag b/reference/opt/shaders/frag/temporary.frag
deleted file mode 100644
index ec9d3e4958..0000000000
--- a/reference/opt/shaders/frag/temporary.frag
+++ /dev/null
@@ -1,14 +0,0 @@
-#version 310 es
-precision mediump float;
-precision highp int;
-
-uniform mediump sampler2D uTex;
-
-layout(location = 0) in vec2 vTex;
-layout(location = 0) out vec4 FragColor;
-
-void main()
-{
-    FragColor = vec4(vTex.xxy, 1.0) + vec4(texture(uTex, vTex).xyz, 1.0);
-}
-
diff --git a/reference/opt/shaders/frag/ubo_layout.frag b/reference/opt/shaders/frag/ubo_layout.frag
index bc0b01c065..4b66e1396a 100644
--- a/reference/opt/shaders/frag/ubo_layout.frag
+++ b/reference/opt/shaders/frag/ubo_layout.frag
@@ -7,6 +7,11 @@ struct Str
     mat4 foo;
 };
 
+struct Str_1
+{
+    mat4 foo;
+};
+
 layout(binding = 0, std140) uniform UBO1
 {
     layout(row_major) Str foo;
@@ -14,7 +19,7 @@ layout(binding = 0, std140) uniform UBO1
 
 layout(binding = 1, std140) uniform UBO2
 {
-    Str foo;
+    Str_1 foo;
 } ubo0;
 
 layout(location = 0) out vec4 FragColor;
diff --git a/reference/opt/shaders/frag/unary-enclose.frag b/reference/opt/shaders/frag/unary-enclose.frag
index 118787bdf9..e7b0bf534c 100644
--- a/reference/opt/shaders/frag/unary-enclose.frag
+++ b/reference/opt/shaders/frag/unary-enclose.frag
@@ -7,6 +7,6 @@ layout(location = 0) in vec4 vIn;
 
 void main()
 {
-    FragColor = -(-vIn);
+    FragColor = vIn;
 }
 
diff --git a/reference/opt/shaders/geom/basic.geom b/reference/opt/shaders/geom/basic.geom
index 296ce5792c..f91136f609 100644
--- a/reference/opt/shaders/geom/basic.geom
+++ b/reference/opt/shaders/geom/basic.geom
@@ -13,13 +13,14 @@ layout(location = 0) in VertexData
 void main()
 {
     gl_Position = gl_in[0].gl_Position;
-    vNormal = vin[0].normal + vec3(float(gl_InvocationID));
+    float _37 = float(gl_InvocationID);
+    vNormal = vin[0].normal + vec3(_37);
     EmitVertex();
     gl_Position = gl_in[1].gl_Position;
-    vNormal = vin[1].normal + vec3(4.0 * float(gl_InvocationID));
+    vNormal = vin[1].normal + vec3(4.0 * _37);
     EmitVertex();
     gl_Position = gl_in[2].gl_Position;
-    vNormal = vin[2].normal + vec3(2.0 * float(gl_InvocationID));
+    vNormal = vin[2].normal + vec3(2.0 * _37);
     EmitVertex();
     EndPrimitive();
 }
diff --git a/reference/opt/shaders/legacy/vert/struct-varying.legacy.vert b/reference/opt/shaders/legacy/vert/struct-varying.legacy.vert
index 8520e2d562..01a3d73535 100644
--- a/reference/opt/shaders/legacy/vert/struct-varying.legacy.vert
+++ b/reference/opt/shaders/legacy/vert/struct-varying.legacy.vert
@@ -21,9 +21,8 @@ void main()
         vout_a = vout.a;
         vout_b = vout.b;
     }
-    Output _22 = Output(vout_a, vout_b);
-    vout_a = _22.a;
-    vout_b = _22.b;
+    vout_a = Output(vout_a, vout_b).a;
+    vout_b = Output(vout_a, vout_b).b;
     vout_a.x = 1.0;
     vout_b.y = 1.0;
 }
diff --git a/reference/opt/shaders/tesc/water_tess.tesc b/reference/opt/shaders/tesc/water_tess.tesc
index 0320fff2ca..69307d3c9e 100644
--- a/reference/opt/shaders/tesc/water_tess.tesc
+++ b/reference/opt/shaders/tesc/water_tess.tesc
@@ -22,13 +22,13 @@ void main()
     vec2 _440 = ((vPatchPosBase[0] + _41.uPatchSize) + vec2(10.0)) * _41.uScale.xy;
     vec3 _445 = vec3(_430.x, -10.0, _430.y);
     vec3 _450 = vec3(_440.x, 10.0, _440.y);
-    vec3 _454 = (_445 + _450) * 0.5;
-    float _459 = 0.5 * length(_450 - _445);
-    bool _515 = any(lessThanEqual(vec3(dot(_41.uFrustum[0], vec4(_454, 1.0)), dot(_41.uFrustum[1], vec4(_454, 1.0)), dot(_41.uFrustum[2], vec4(_454, 1.0))), vec3(-_459)));
+    vec4 _466 = vec4((_445 + _450) * 0.5, 1.0);
+    vec3 _513 = vec3(length(_450 - _445) * (-0.5));
+    bool _515 = any(lessThanEqual(vec3(dot(_41.uFrustum[0], _466), dot(_41.uFrustum[1], _466), dot(_41.uFrustum[2], _466)), _513));
     bool _525;
     if (!_515)
     {
-        _525 = any(lessThanEqual(vec3(dot(_41.uFrustum[3], vec4(_454, 1.0)), dot(_41.uFrustum[4], vec4(_454, 1.0)), dot(_41.uFrustum[5], vec4(_454, 1.0))), vec3(-_459)));
+        _525 = any(lessThanEqual(vec3(dot(_41.uFrustum[3], _466), dot(_41.uFrustum[4], _466), dot(_41.uFrustum[5], _466)), _513));
     }
     else
     {
diff --git a/reference/opt/shaders/tese/water_tess.tese b/reference/opt/shaders/tese/water_tess.tese
index 6efa9f0a69..a2aa104476 100644
--- a/reference/opt/shaders/tese/water_tess.tese
+++ b/reference/opt/shaders/tese/water_tess.tese
@@ -22,14 +22,13 @@ layout(location = 0) out vec3 vWorld;
 void main()
 {
     vec2 _201 = vOutPatchPosBase + (gl_TessCoord.xy * _31.uPatchSize);
-    vec2 _214 = mix(vPatchLods.yx, vPatchLods.zw, vec2(gl_TessCoord.xy.x));
-    float _221 = mix(_214.x, _214.y, gl_TessCoord.xy.y);
+    vec2 _214 = mix(vPatchLods.yx, vPatchLods.zw, vec2(gl_TessCoord.x));
+    float _221 = mix(_214.x, _214.y, gl_TessCoord.y);
     mediump float _223 = floor(_221);
-    mediump float _226 = _221 - _223;
     vec2 _125 = _201 * _31.uInvHeightmapSize;
     vec2 _141 = _31.uInvHeightmapSize * exp2(_223);
     vGradNormalTex = vec4(_125 + (_31.uInvHeightmapSize * 0.5), _125 * _31.uScale.zw);
-    mediump vec3 _253 = mix(textureLod(uHeightmapDisplacement, _125 + (_141 * 0.5), _223).xyz, textureLod(uHeightmapDisplacement, _125 + (_141 * 1.0), _223 + 1.0).xyz, vec3(_226));
+    mediump vec3 _253 = mix(textureLod(uHeightmapDisplacement, _125 + (_141 * 0.5), _223).xyz, textureLod(uHeightmapDisplacement, _125 + (_141 * 1.0), _223 + 1.0).xyz, vec3(_221 - _223));
     vec2 _171 = (_201 * _31.uScale.xy) + _253.yz;
     vWorld = vec3(_171.x, _253.x, _171.y);
     gl_Position = _31.uMVP * vec4(vWorld, 1.0);
diff --git a/reference/opt/shaders/vert/ocean.vert b/reference/opt/shaders/vert/ocean.vert
index d37a0a8a4c..f1b12fa046 100644
--- a/reference/opt/shaders/vert/ocean.vert
+++ b/reference/opt/shaders/vert/ocean.vert
@@ -50,7 +50,7 @@ layout(location = 0) in vec4 Position;
 layout(location = 0) out vec3 EyeVec;
 layout(location = 1) out vec4 TexCoord;
 
-uvec4 _483;
+uvec4 _484;
 
 void main()
 {
@@ -59,8 +59,9 @@ void main()
     uint _357 = uint(_352);
     uvec4 _359 = uvec4(Position);
     uvec2 _366 = (uvec2(1u) << uvec2(_357, _357 + 1u)) - uvec2(1u);
+    bool _369 = _359.x < 32u;
     uint _482;
-    if (_359.x < 32u)
+    if (_369)
     {
         _482 = _366.x;
     }
@@ -68,41 +69,42 @@ void main()
     {
         _482 = 0u;
     }
-    uvec4 _445 = _483;
+    uvec4 _445 = _484;
     _445.x = _482;
-    uint _484;
-    if (_359.y < 32u)
-    {
-        _484 = _366.x;
-    }
-    else
-    {
-        _484 = 0u;
-    }
-    uvec4 _451 = _445;
-    _451.y = _484;
+    bool _379 = _359.y < 32u;
     uint _485;
-    if (_359.x < 32u)
+    if (_379)
     {
-        _485 = _366.y;
+        _485 = _366.x;
     }
     else
     {
         _485 = 0u;
     }
-    uvec4 _457 = _451;
-    _457.z = _485;
-    uint _486;
-    if (_359.y < 32u)
+    uvec4 _451 = _445;
+    _451.y = _485;
+    uint _487;
+    if (_369)
     {
-        _486 = _366.y;
+        _487 = _366.y;
     }
     else
     {
-        _486 = 0u;
+        _487 = 0u;
+    }
+    uvec4 _457 = _451;
+    _457.z = _487;
+    uint _489;
+    if (_379)
+    {
+        _489 = _366.y;
+    }
+    else
+    {
+        _489 = 0u;
     }
     uvec4 _463 = _457;
-    _463.w = _486;
+    _463.w = _489;
     vec4 _415 = vec4((_359.xyxy + _463) & (~_366).xxyy);
     vec2 _197 = ((_53.Patches[(gl_InstanceID + SPIRV_Cross_BaseInstance)].Position.xz * _180.InvOceanSize_PatchScale.zw) + mix(_415.xy, _415.zw, vec2(_350 - _352))) * _180.InvOceanSize_PatchScale.xy;
     vec2 _204 = _197 * _180.NormalTexCoordScale.zw;
diff --git a/reference/opt/shaders/vert/read-from-row-major-array.vert b/reference/opt/shaders/vert/read-from-row-major-array.vert
new file mode 100644
index 0000000000..25fc9495d2
--- /dev/null
+++ b/reference/opt/shaders/vert/read-from-row-major-array.vert
@@ -0,0 +1,16 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform Block
+{
+    layout(row_major) mat2x3 var[3][4];
+} _104;
+
+layout(location = 0) in vec4 a_position;
+layout(location = 0) out mediump float v_vtxResult;
+
+void main()
+{
+    gl_Position = a_position;
+    v_vtxResult = ((float(abs(_104.var[0][0][0].x - 2.0) < 0.0500000007450580596923828125) * float(abs(_104.var[0][0][0].y - 6.0) < 0.0500000007450580596923828125)) * float(abs(_104.var[0][0][0].z - (-6.0)) < 0.0500000007450580596923828125)) * ((float(abs(_104.var[0][0][1].x) < 0.0500000007450580596923828125) * float(abs(_104.var[0][0][1].y - 5.0) < 0.0500000007450580596923828125)) * float(abs(_104.var[0][0][1].z - 5.0) < 0.0500000007450580596923828125));
+}
+
diff --git a/reference/opt/shaders/vulkan/comp/subgroups.nocompat.invalid.vk.comp.vk b/reference/opt/shaders/vulkan/comp/subgroups.nocompat.invalid.vk.comp.vk
new file mode 100644
index 0000000000..6d288574f7
--- /dev/null
+++ b/reference/opt/shaders/vulkan/comp/subgroups.nocompat.invalid.vk.comp.vk
@@ -0,0 +1,110 @@
+#version 450
+#extension GL_KHR_shader_subgroup_basic : require
+#extension GL_KHR_shader_subgroup_ballot : require
+#extension GL_KHR_shader_subgroup_shuffle : require
+#extension GL_KHR_shader_subgroup_shuffle_relative : require
+#extension GL_KHR_shader_subgroup_vote : require
+#extension GL_KHR_shader_subgroup_arithmetic : require
+#extension GL_KHR_shader_subgroup_clustered : require
+#extension GL_KHR_shader_subgroup_quad : require
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(set = 0, binding = 0, std430) buffer SSBO
+{
+    float FragColor;
+} _9;
+
+void main()
+{
+    _9.FragColor = float(gl_NumSubgroups);
+    _9.FragColor = float(gl_SubgroupID);
+    _9.FragColor = float(gl_SubgroupSize);
+    _9.FragColor = float(gl_SubgroupInvocationID);
+    subgroupMemoryBarrier();
+    subgroupBarrier();
+    subgroupMemoryBarrier();
+    subgroupMemoryBarrierBuffer();
+    subgroupMemoryBarrierShared();
+    subgroupMemoryBarrierImage();
+    bool elected = subgroupElect();
+    _9.FragColor = vec4(gl_SubgroupEqMask).x;
+    _9.FragColor = vec4(gl_SubgroupGeMask).x;
+    _9.FragColor = vec4(gl_SubgroupGtMask).x;
+    _9.FragColor = vec4(gl_SubgroupLeMask).x;
+    _9.FragColor = vec4(gl_SubgroupLtMask).x;
+    vec4 broadcasted = subgroupBroadcast(vec4(10.0), 8u);
+    vec3 first = subgroupBroadcastFirst(vec3(20.0));
+    uvec4 ballot_value = subgroupBallot(true);
+    bool inverse_ballot_value = subgroupInverseBallot(ballot_value);
+    bool bit_extracted = subgroupBallotBitExtract(uvec4(10u), 8u);
+    uint bit_count = subgroupBallotBitCount(ballot_value);
+    uint inclusive_bit_count = subgroupBallotInclusiveBitCount(ballot_value);
+    uint exclusive_bit_count = subgroupBallotExclusiveBitCount(ballot_value);
+    uint lsb = subgroupBallotFindLSB(ballot_value);
+    uint msb = subgroupBallotFindMSB(ballot_value);
+    uint shuffled = subgroupShuffle(10u, 8u);
+    uint shuffled_xor = subgroupShuffleXor(30u, 8u);
+    uint shuffled_up = subgroupShuffleUp(20u, 4u);
+    uint shuffled_down = subgroupShuffleDown(20u, 4u);
+    bool has_all = subgroupAll(true);
+    bool has_any = subgroupAny(true);
+    bool has_equal = subgroupAllEqual(true);
+    vec4 added = subgroupAdd(vec4(20.0));
+    ivec4 iadded = subgroupAdd(ivec4(20));
+    vec4 multiplied = subgroupMul(vec4(20.0));
+    ivec4 imultiplied = subgroupMul(ivec4(20));
+    vec4 lo = subgroupMin(vec4(20.0));
+    vec4 hi = subgroupMax(vec4(20.0));
+    ivec4 slo = subgroupMin(ivec4(20));
+    ivec4 shi = subgroupMax(ivec4(20));
+    uvec4 ulo = subgroupMin(uvec4(20u));
+    uvec4 uhi = subgroupMax(uvec4(20u));
+    uvec4 anded = subgroupAnd(ballot_value);
+    uvec4 ored = subgroupOr(ballot_value);
+    uvec4 xored = subgroupXor(ballot_value);
+    added = subgroupInclusiveAdd(added);
+    iadded = subgroupInclusiveAdd(iadded);
+    multiplied = subgroupInclusiveMul(multiplied);
+    imultiplied = subgroupInclusiveMul(imultiplied);
+    lo = subgroupInclusiveMin(lo);
+    hi = subgroupInclusiveMax(hi);
+    slo = subgroupInclusiveMin(slo);
+    shi = subgroupInclusiveMax(shi);
+    ulo = subgroupInclusiveMin(ulo);
+    uhi = subgroupInclusiveMax(uhi);
+    anded = subgroupInclusiveAnd(anded);
+    ored = subgroupInclusiveOr(ored);
+    xored = subgroupInclusiveXor(ored);
+    added = subgroupExclusiveAdd(lo);
+    added = subgroupExclusiveAdd(multiplied);
+    multiplied = subgroupExclusiveMul(multiplied);
+    iadded = subgroupExclusiveAdd(imultiplied);
+    imultiplied = subgroupExclusiveMul(imultiplied);
+    lo = subgroupExclusiveMin(lo);
+    hi = subgroupExclusiveMax(hi);
+    ulo = subgroupExclusiveMin(ulo);
+    uhi = subgroupExclusiveMax(uhi);
+    slo = subgroupExclusiveMin(slo);
+    shi = subgroupExclusiveMax(shi);
+    anded = subgroupExclusiveAnd(anded);
+    ored = subgroupExclusiveOr(ored);
+    xored = subgroupExclusiveXor(ored);
+    added = subgroupClusteredAdd(added, 4u);
+    multiplied = subgroupClusteredMul(multiplied, 4u);
+    iadded = subgroupClusteredAdd(iadded, 4u);
+    imultiplied = subgroupClusteredMul(imultiplied, 4u);
+    lo = subgroupClusteredMin(lo, 4u);
+    hi = subgroupClusteredMax(hi, 4u);
+    ulo = subgroupClusteredMin(ulo, 4u);
+    uhi = subgroupClusteredMax(uhi, 4u);
+    slo = subgroupClusteredMin(slo, 4u);
+    shi = subgroupClusteredMax(shi, 4u);
+    anded = subgroupClusteredAnd(anded, 4u);
+    ored = subgroupClusteredOr(ored, 4u);
+    xored = subgroupClusteredXor(xored, 4u);
+    vec4 swap_horiz = subgroupQuadSwapHorizontal(vec4(20.0));
+    vec4 swap_vertical = subgroupQuadSwapVertical(vec4(20.0));
+    vec4 swap_diagonal = subgroupQuadSwapDiagonal(vec4(20.0));
+    vec4 quad_broadcast = subgroupQuadBroadcast(vec4(20.0), 3u);
+}
+
diff --git a/reference/opt/shaders/vulkan/frag/separate-combined-fake-overload.vk.frag b/reference/opt/shaders/vulkan/frag/separate-combined-fake-overload.vk.frag
new file mode 100644
index 0000000000..c17c8e6d67
--- /dev/null
+++ b/reference/opt/shaders/vulkan/frag/separate-combined-fake-overload.vk.frag
@@ -0,0 +1,12 @@
+#version 450
+
+layout(binding = 0) uniform sampler2D uSamp;
+uniform sampler2D SPIRV_Cross_CombineduTuS;
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    FragColor = texture(uSamp, vec2(0.5)) + texture(SPIRV_Cross_CombineduTuS, vec2(0.5));
+}
+
diff --git a/reference/opt/shaders/vulkan/frag/separate-combined-fake-overload.vk.frag.vk b/reference/opt/shaders/vulkan/frag/separate-combined-fake-overload.vk.frag.vk
new file mode 100644
index 0000000000..5a5ec20298
--- /dev/null
+++ b/reference/opt/shaders/vulkan/frag/separate-combined-fake-overload.vk.frag.vk
@@ -0,0 +1,13 @@
+#version 450
+
+layout(set = 0, binding = 0) uniform sampler2D uSamp;
+layout(set = 0, binding = 1) uniform texture2D uT;
+layout(set = 0, binding = 2) uniform sampler uS;
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    FragColor = texture(uSamp, vec2(0.5)) + texture(sampler2D(uT, uS), vec2(0.5));
+}
+
diff --git a/reference/opt/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag b/reference/opt/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag
new file mode 100644
index 0000000000..df2994efb9
--- /dev/null
+++ b/reference/opt/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag
@@ -0,0 +1,19 @@
+#version 310 es
+precision mediump float;
+precision highp int;
+
+uniform mediump sampler2D SPIRV_Cross_CombineduTextureuSampler[4];
+uniform mediump sampler2DArray SPIRV_Cross_CombineduTextureArrayuSampler[4];
+uniform mediump samplerCube SPIRV_Cross_CombineduTextureCubeuSampler[4];
+uniform mediump sampler3D SPIRV_Cross_CombineduTexture3DuSampler[4];
+
+layout(location = 0) in vec2 vTex;
+layout(location = 1) in vec3 vTex3;
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    vec2 _95 = (vTex + (vec2(1.0) / vec2(textureSize(SPIRV_Cross_CombineduTextureuSampler[1], 0)))) + (vec2(1.0) / vec2(textureSize(SPIRV_Cross_CombineduTextureuSampler[2], 1)));
+    FragColor = ((((texture(SPIRV_Cross_CombineduTextureuSampler[2], _95) + texture(SPIRV_Cross_CombineduTextureuSampler[1], _95)) + texture(SPIRV_Cross_CombineduTextureuSampler[1], _95)) + texture(SPIRV_Cross_CombineduTextureArrayuSampler[3], vTex3)) + texture(SPIRV_Cross_CombineduTextureCubeuSampler[1], vTex3)) + texture(SPIRV_Cross_CombineduTexture3DuSampler[2], vTex3);
+}
+
diff --git a/reference/opt/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag.vk b/reference/opt/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag.vk
new file mode 100644
index 0000000000..d275a0f408
--- /dev/null
+++ b/reference/opt/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag.vk
@@ -0,0 +1,20 @@
+#version 310 es
+precision mediump float;
+precision highp int;
+
+layout(set = 0, binding = 1) uniform mediump texture2D uTexture[4];
+layout(set = 0, binding = 0) uniform mediump sampler uSampler;
+layout(set = 0, binding = 4) uniform mediump texture2DArray uTextureArray[4];
+layout(set = 0, binding = 3) uniform mediump textureCube uTextureCube[4];
+layout(set = 0, binding = 2) uniform mediump texture3D uTexture3D[4];
+
+layout(location = 0) in vec2 vTex;
+layout(location = 1) in vec3 vTex3;
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    vec2 _95 = (vTex + (vec2(1.0) / vec2(textureSize(sampler2D(uTexture[1], uSampler), 0)))) + (vec2(1.0) / vec2(textureSize(sampler2D(uTexture[2], uSampler), 1)));
+    FragColor = ((((texture(sampler2D(uTexture[2], uSampler), _95) + texture(sampler2D(uTexture[1], uSampler), _95)) + texture(sampler2D(uTexture[1], uSampler), _95)) + texture(sampler2DArray(uTextureArray[3], uSampler), vTex3)) + texture(samplerCube(uTextureCube[1], uSampler), vTex3)) + texture(sampler3D(uTexture3D[2], uSampler), vTex3);
+}
+
diff --git a/reference/opt/shaders/vulkan/frag/separate-sampler-texture.vk.frag b/reference/opt/shaders/vulkan/frag/separate-sampler-texture.vk.frag
index a52d5bc77c..aad1e43662 100644
--- a/reference/opt/shaders/vulkan/frag/separate-sampler-texture.vk.frag
+++ b/reference/opt/shaders/vulkan/frag/separate-sampler-texture.vk.frag
@@ -13,8 +13,7 @@ layout(location = 0) out vec4 FragColor;
 
 void main()
 {
-    vec2 _54 = vec2(1.0) / vec2(textureSize(SPIRV_Cross_CombineduTextureuSampler, 0));
-    vec2 _64 = vec2(1.0) / vec2(textureSize(SPIRV_Cross_CombineduTextureuSampler, 1));
-    FragColor = (((texture(SPIRV_Cross_CombineduTextureuSampler, (vTex + _54) + _64) + texture(SPIRV_Cross_CombineduTextureuSampler, (vTex + _54) + _64)) + texture(SPIRV_Cross_CombineduTextureArrayuSampler, vTex3)) + texture(SPIRV_Cross_CombineduTextureCubeuSampler, vTex3)) + texture(SPIRV_Cross_CombineduTexture3DuSampler, vTex3);
+    vec2 _73 = (vTex + (vec2(1.0) / vec2(textureSize(SPIRV_Cross_CombineduTextureuSampler, 0)))) + (vec2(1.0) / vec2(textureSize(SPIRV_Cross_CombineduTextureuSampler, 1)));
+    FragColor = (((texture(SPIRV_Cross_CombineduTextureuSampler, _73) + texture(SPIRV_Cross_CombineduTextureuSampler, _73)) + texture(SPIRV_Cross_CombineduTextureArrayuSampler, vTex3)) + texture(SPIRV_Cross_CombineduTextureCubeuSampler, vTex3)) + texture(SPIRV_Cross_CombineduTexture3DuSampler, vTex3);
 }
 
diff --git a/reference/opt/shaders/vulkan/frag/separate-sampler-texture.vk.frag.vk b/reference/opt/shaders/vulkan/frag/separate-sampler-texture.vk.frag.vk
index 105ca76e44..b79374aba9 100644
--- a/reference/opt/shaders/vulkan/frag/separate-sampler-texture.vk.frag.vk
+++ b/reference/opt/shaders/vulkan/frag/separate-sampler-texture.vk.frag.vk
@@ -14,8 +14,7 @@ layout(location = 0) out vec4 FragColor;
 
 void main()
 {
-    vec2 _54 = vec2(1.0) / vec2(textureSize(sampler2D(uTexture, uSampler), 0));
-    vec2 _64 = vec2(1.0) / vec2(textureSize(sampler2D(uTexture, uSampler), 1));
-    FragColor = (((texture(sampler2D(uTexture, uSampler), (vTex + _54) + _64) + texture(sampler2D(uTexture, uSampler), (vTex + _54) + _64)) + texture(sampler2DArray(uTextureArray, uSampler), vTex3)) + texture(samplerCube(uTextureCube, uSampler), vTex3)) + texture(sampler3D(uTexture3D, uSampler), vTex3);
+    vec2 _73 = (vTex + (vec2(1.0) / vec2(textureSize(sampler2D(uTexture, uSampler), 0)))) + (vec2(1.0) / vec2(textureSize(sampler2D(uTexture, uSampler), 1)));
+    FragColor = (((texture(sampler2D(uTexture, uSampler), _73) + texture(sampler2D(uTexture, uSampler), _73)) + texture(sampler2DArray(uTextureArray, uSampler), vTex3)) + texture(samplerCube(uTextureCube, uSampler), vTex3)) + texture(sampler3D(uTexture3D, uSampler), vTex3);
 }
 
diff --git a/reference/opt/shaders/vulkan/frag/spec-constant.vk.frag b/reference/opt/shaders/vulkan/frag/spec-constant.vk.frag
deleted file mode 100644
index 4f9b6f515f..0000000000
--- a/reference/opt/shaders/vulkan/frag/spec-constant.vk.frag
+++ /dev/null
@@ -1,20 +0,0 @@
-#version 310 es
-precision mediump float;
-precision highp int;
-
-struct Foo
-{
-    float elems[(4 + 2)];
-};
-
-layout(location = 0) out vec4 FragColor;
-
-float _146[(3 + 2)];
-
-void main()
-{
-    float vec0[(3 + 3)][8];
-    Foo foo;
-    FragColor = ((vec4(1.0 + 2.0) + vec4(vec0[0][0])) + vec4(_146[0])) + vec4(foo.elems[3]);
-}
-
diff --git a/reference/opt/shaders/vulkan/frag/spec-constant.vk.frag.vk b/reference/opt/shaders/vulkan/frag/spec-constant.vk.frag.vk
deleted file mode 100644
index 0b74896aef..0000000000
--- a/reference/opt/shaders/vulkan/frag/spec-constant.vk.frag.vk
+++ /dev/null
@@ -1,25 +0,0 @@
-#version 310 es
-precision mediump float;
-precision highp int;
-
-layout(constant_id = 1) const float a = 1.0;
-layout(constant_id = 2) const float b = 2.0;
-layout(constant_id = 3) const int c = 3;
-layout(constant_id = 4) const int d = 4;
-
-struct Foo
-{
-    float elems[(d + 2)];
-};
-
-layout(location = 0) out vec4 FragColor;
-
-float _146[(c + 2)];
-
-void main()
-{
-    float vec0[(c + 3)][8];
-    Foo foo;
-    FragColor = ((vec4(a + b) + vec4(vec0[0][0])) + vec4(_146[0])) + vec4(foo.elems[c]);
-}
-
diff --git a/reference/opt/shaders/vulkan/vert/multiview.nocompat.vk.vert b/reference/opt/shaders/vulkan/vert/multiview.nocompat.vk.vert
deleted file mode 100644
index 533738efc3..0000000000
--- a/reference/opt/shaders/vulkan/vert/multiview.nocompat.vk.vert
+++ /dev/null
@@ -1,15 +0,0 @@
-#version 310 es
-#extension GL_OVR_multiview2 : require
-
-layout(binding = 0, std140) uniform MVPs
-{
-    mat4 MVP[2];
-} _19;
-
-layout(location = 0) in vec4 Position;
-
-void main()
-{
-    gl_Position = _19.MVP[gl_ViewID_OVR] * Position;
-}
-
diff --git a/reference/opt/shaders-hlsl/asm/comp/specialization-constant-workgroup.nofxc.asm.comp b/reference/shaders-hlsl-no-opt/asm/comp/specialization-constant-workgroup.nofxc.asm.comp
similarity index 100%
rename from reference/opt/shaders-hlsl/asm/comp/specialization-constant-workgroup.nofxc.asm.comp
rename to reference/shaders-hlsl-no-opt/asm/comp/specialization-constant-workgroup.nofxc.asm.comp
diff --git a/reference/shaders-hlsl-no-opt/asm/vert/empty-struct-composite.asm.vert b/reference/shaders-hlsl-no-opt/asm/vert/empty-struct-composite.asm.vert
new file mode 100644
index 0000000000..ba1f576748
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/asm/vert/empty-struct-composite.asm.vert
@@ -0,0 +1,15 @@
+struct Test
+{
+    int empty_struct_member;
+};
+
+void vert_main()
+{
+    Test _14 = { 0 };
+    Test t = _14;
+}
+
+void main()
+{
+    vert_main();
+}
diff --git a/reference/opt/shaders-hlsl/comp/bitfield.noopt.comp b/reference/shaders-hlsl-no-opt/comp/bitfield.comp
similarity index 100%
rename from reference/opt/shaders-hlsl/comp/bitfield.noopt.comp
rename to reference/shaders-hlsl-no-opt/comp/bitfield.comp
diff --git a/reference/shaders-hlsl-no-opt/frag/spec-constant.frag b/reference/shaders-hlsl-no-opt/frag/spec-constant.frag
new file mode 100644
index 0000000000..c95ea97671
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/frag/spec-constant.frag
@@ -0,0 +1,118 @@
+static const float a = 1.0f;
+static const float b = 2.0f;
+static const int c = 3;
+static const uint _18 = (uint(c) + 0u);
+static const int _21 = (-c);
+static const int _23 = (~c);
+static const int d = 4;
+static const int _26 = (c + d);
+static const int _28 = (c - d);
+static const int _30 = (c * d);
+static const int _32 = (c / d);
+static const uint e = 5u;
+static const uint f = 6u;
+static const uint _36 = (e / f);
+static const int _38 = (c % d);
+static const uint _40 = (e % f);
+static const int _42 = (c >> d);
+static const uint _44 = (e >> f);
+static const int _46 = (c << d);
+static const int _48 = (c | d);
+static const int _50 = (c ^ d);
+static const int _52 = (c & d);
+static const bool g = false;
+static const bool h = true;
+static const bool _58 = (g || h);
+static const bool _60 = (g && h);
+static const bool _62 = (!g);
+static const bool _64 = (g == h);
+static const bool _66 = (g != h);
+static const bool _68 = (c == d);
+static const bool _70 = (c != d);
+static const bool _72 = (c < d);
+static const bool _74 = (e < f);
+static const bool _76 = (c > d);
+static const bool _78 = (e > f);
+static const bool _80 = (c <= d);
+static const bool _82 = (e <= f);
+static const bool _84 = (c >= d);
+static const bool _86 = (e >= f);
+static const int _92 = int(e + 0u);
+static const bool _94 = (c != int(0u));
+static const bool _96 = (e != 0u);
+static const int _100 = int(g);
+static const uint _103 = uint(g);
+static const int _111 = (c + 3);
+static const int _118 = (c + 2);
+static const int _124 = (d + 2);
+
+struct Foo
+{
+    float elems[_124];
+};
+
+static float4 FragColor;
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    float t0 = a;
+    float t1 = b;
+    uint c0 = _18;
+    int c1 = _21;
+    int c2 = _23;
+    int c3 = _26;
+    int c4 = _28;
+    int c5 = _30;
+    int c6 = _32;
+    uint c7 = _36;
+    int c8 = _38;
+    uint c9 = _40;
+    int c10 = _42;
+    uint c11 = _44;
+    int c12 = _46;
+    int c13 = _48;
+    int c14 = _50;
+    int c15 = _52;
+    bool c16 = _58;
+    bool c17 = _60;
+    bool c18 = _62;
+    bool c19 = _64;
+    bool c20 = _66;
+    bool c21 = _68;
+    bool c22 = _70;
+    bool c23 = _72;
+    bool c24 = _74;
+    bool c25 = _76;
+    bool c26 = _78;
+    bool c27 = _80;
+    bool c28 = _82;
+    bool c29 = _84;
+    bool c30 = _86;
+    int c31 = c8 + c3;
+    int c32 = _92;
+    bool c33 = _94;
+    bool c34 = _96;
+    int c35 = _100;
+    uint c36 = _103;
+    float c37 = float(g);
+    float vec0[_111][8];
+    vec0[0][0] = 10.0f;
+    float vec1[_118];
+    vec1[0] = 20.0f;
+    Foo foo;
+    foo.elems[c] = 10.0f;
+    FragColor = (((t0 + t1).xxxx + vec0[0][0].xxxx) + vec1[0].xxxx) + foo.elems[c].xxxx;
+}
+
+SPIRV_Cross_Output main()
+{
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/asm/comp/specialization-constant-workgroup.nofxc.asm.comp b/reference/shaders-hlsl/asm/comp/specialization-constant-workgroup.nofxc.asm.comp
deleted file mode 100644
index 8243347bf6..0000000000
--- a/reference/shaders-hlsl/asm/comp/specialization-constant-workgroup.nofxc.asm.comp
+++ /dev/null
@@ -1,16 +0,0 @@
-static const uint _5 = 9u;
-static const uint _6 = 4u;
-static const uint3 gl_WorkGroupSize = uint3(_5, 20u, _6);
-
-RWByteAddressBuffer _4 : register(u0);
-
-void comp_main()
-{
-    _4.Store(0, asuint(asfloat(_4.Load(0)) + 1.0f));
-}
-
-[numthreads(9, 20, 4)]
-void main()
-{
-    comp_main();
-}
diff --git a/reference/opt/shaders-hlsl/asm/comp/storage-buffer-basic.nofxc.asm.comp b/reference/shaders-hlsl/asm/comp/storage-buffer-basic.invalid.nofxc.asm.comp
similarity index 93%
rename from reference/opt/shaders-hlsl/asm/comp/storage-buffer-basic.nofxc.asm.comp
rename to reference/shaders-hlsl/asm/comp/storage-buffer-basic.invalid.nofxc.asm.comp
index 1bdb27d7fa..1887eaa88f 100644
--- a/reference/opt/shaders-hlsl/asm/comp/storage-buffer-basic.nofxc.asm.comp
+++ b/reference/shaders-hlsl/asm/comp/storage-buffer-basic.invalid.nofxc.asm.comp
@@ -11,6 +11,8 @@ struct SPIRV_Cross_Input
     uint3 gl_WorkGroupID : SV_GroupID;
 };
 
+static uint3 _22 = gl_WorkGroupSize;
+
 void comp_main()
 {
     _8.Store(gl_WorkGroupID.x * 4 + 0, asuint(asfloat(_9.Load(gl_WorkGroupID.x * 4 + 0)) + asfloat(_8.Load(gl_WorkGroupID.x * 4 + 0))));
diff --git a/reference/shaders-hlsl/asm/frag/combined-sampler-reuse.asm.frag b/reference/shaders-hlsl/asm/frag/combined-sampler-reuse.asm.frag
new file mode 100644
index 0000000000..3951fd511a
--- /dev/null
+++ b/reference/shaders-hlsl/asm/frag/combined-sampler-reuse.asm.frag
@@ -0,0 +1,30 @@
+Texture2D<float4> uTex : register(t1);
+SamplerState uSampler : register(s0);
+
+static float4 FragColor;
+static float2 vUV;
+
+struct SPIRV_Cross_Input
+{
+    float2 vUV : TEXCOORD0;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    FragColor = uTex.Sample(uSampler, vUV);
+    FragColor += uTex.Sample(uSampler, vUV, int2(1, 1));
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    vUV = stage_input.vUV;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/asm/frag/empty-struct.asm.frag b/reference/shaders-hlsl/asm/frag/empty-struct.asm.frag
new file mode 100644
index 0000000000..38d12cd630
--- /dev/null
+++ b/reference/shaders-hlsl/asm/frag/empty-struct.asm.frag
@@ -0,0 +1,27 @@
+struct EmptyStructTest
+{
+    int empty_struct_member;
+};
+
+float GetValue(EmptyStructTest self)
+{
+    return 0.0f;
+}
+
+float GetValue_1(EmptyStructTest self)
+{
+    return 0.0f;
+}
+
+void frag_main()
+{
+    EmptyStructTest _23 = { 0 };
+    EmptyStructTest emptyStruct;
+    float value = GetValue(emptyStruct);
+    value = GetValue_1(_23);
+}
+
+void main()
+{
+    frag_main();
+}
diff --git a/reference/shaders-hlsl/asm/frag/image-extract-reuse.asm.frag b/reference/shaders-hlsl/asm/frag/image-extract-reuse.asm.frag
new file mode 100644
index 0000000000..ed53720d94
--- /dev/null
+++ b/reference/shaders-hlsl/asm/frag/image-extract-reuse.asm.frag
@@ -0,0 +1,31 @@
+Texture2D<float4> uTexture : register(t0);
+SamplerState _uTexture_sampler : register(s0);
+
+static int2 Size;
+
+struct SPIRV_Cross_Output
+{
+    int2 Size : SV_Target0;
+};
+
+uint2 SPIRV_Cross_textureSize(Texture2D<float4> Tex, uint Level, out uint Param)
+{
+    uint2 ret;
+    Tex.GetDimensions(Level, ret.x, ret.y, Param);
+    return ret;
+}
+
+void frag_main()
+{
+    uint _19_dummy_parameter;
+    uint _20_dummy_parameter;
+    Size = int2(SPIRV_Cross_textureSize(uTexture, uint(0), _19_dummy_parameter)) + int2(SPIRV_Cross_textureSize(uTexture, uint(1), _20_dummy_parameter));
+}
+
+SPIRV_Cross_Output main()
+{
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.Size = Size;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/asm/frag/implicit-read-dep-phi.asm.frag b/reference/shaders-hlsl/asm/frag/implicit-read-dep-phi.asm.frag
new file mode 100644
index 0000000000..f668d63440
--- /dev/null
+++ b/reference/shaders-hlsl/asm/frag/implicit-read-dep-phi.asm.frag
@@ -0,0 +1,56 @@
+Texture2D<float4> uImage : register(t0);
+SamplerState _uImage_sampler : register(s0);
+
+static float4 v0;
+static float4 FragColor;
+
+struct SPIRV_Cross_Input
+{
+    float4 v0 : TEXCOORD0;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    int i = 0;
+    float phi;
+    float4 _36;
+    phi = 1.0f;
+    _36 = float4(1.0f, 2.0f, 1.0f, 2.0f);
+    for (;;)
+    {
+        FragColor = _36;
+        if (i < 4)
+        {
+            if (v0[i] > 0.0f)
+            {
+                float2 _48 = phi.xx;
+                i++;
+                phi += 2.0f;
+                _36 = uImage.SampleLevel(_uImage_sampler, _48, 0.0f);
+                continue;
+            }
+            else
+            {
+                break;
+            }
+        }
+        else
+        {
+            break;
+        }
+    }
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    v0 = stage_input.v0;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/asm/frag/srem.asm.frag b/reference/shaders-hlsl/asm/frag/srem.asm.frag
new file mode 100644
index 0000000000..db5e717457
--- /dev/null
+++ b/reference/shaders-hlsl/asm/frag/srem.asm.frag
@@ -0,0 +1,29 @@
+static float4 FragColor;
+static int4 vA;
+static int4 vB;
+
+struct SPIRV_Cross_Input
+{
+    nointerpolation int4 vA : TEXCOORD0;
+    nointerpolation int4 vB : TEXCOORD1;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    FragColor = float4(vA - vB * (vA / vB));
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    vA = stage_input.vA;
+    vB = stage_input.vB;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/asm/frag/texel-fetch-no-lod.asm.frag b/reference/shaders-hlsl/asm/frag/texel-fetch-no-lod.asm.frag
new file mode 100644
index 0000000000..695d5fe9df
--- /dev/null
+++ b/reference/shaders-hlsl/asm/frag/texel-fetch-no-lod.asm.frag
@@ -0,0 +1,29 @@
+Texture2D<float4> uTexture : register(t0);
+SamplerState _uTexture_sampler : register(s0);
+
+static float4 gl_FragCoord;
+static float4 FragColor;
+
+struct SPIRV_Cross_Input
+{
+    float4 gl_FragCoord : SV_Position;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    FragColor = uTexture.Load(int3(int2(gl_FragCoord.xy), 0));
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    gl_FragCoord = stage_input.gl_FragCoord;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/asm/vert/empty-struct-composite.asm.vert b/reference/shaders-hlsl/asm/vert/empty-struct-composite.asm.vert
deleted file mode 100644
index 103ff46a3f..0000000000
--- a/reference/shaders-hlsl/asm/vert/empty-struct-composite.asm.vert
+++ /dev/null
@@ -1,8 +0,0 @@
-void vert_main()
-{
-}
-
-void main()
-{
-    vert_main();
-}
diff --git a/reference/shaders-hlsl/asm/vert/spec-constant-op-composite.asm.vert b/reference/shaders-hlsl/asm/vert/spec-constant-op-composite.asm.vert
new file mode 100644
index 0000000000..ac446417b3
--- /dev/null
+++ b/reference/shaders-hlsl/asm/vert/spec-constant-op-composite.asm.vert
@@ -0,0 +1,38 @@
+static const int _7 = -10;
+static const uint _8 = 100u;
+static const float _9 = 3.141590118408203125f;
+static const int _20 = (_7 + 2);
+static const uint _25 = (_8 % 5u);
+static const int4 _30 = int4(20, 30, _20, _20);
+static const int2 _32 = int2(_30.y, _30.x);
+static const int _33 = _30.y;
+
+static float4 gl_Position;
+static int _4;
+
+struct SPIRV_Cross_Output
+{
+    nointerpolation int _4 : TEXCOORD0;
+    float4 gl_Position : SV_Position;
+};
+
+void vert_main()
+{
+    float4 pos = 0.0f.xxxx;
+    pos.y += float(_20);
+    pos.z += float(_25);
+    pos += float4(_30);
+    float2 _56 = pos.xy + float2(_32);
+    pos = float4(_56.x, _56.y, pos.z, pos.w);
+    gl_Position = pos;
+    _4 = _33;
+}
+
+SPIRV_Cross_Output main()
+{
+    vert_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.gl_Position = gl_Position;
+    stage_output._4 = _4;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/comp/bitfield.noopt.comp b/reference/shaders-hlsl/comp/bitfield.noopt.comp
deleted file mode 100644
index 6839d9569e..0000000000
--- a/reference/shaders-hlsl/comp/bitfield.noopt.comp
+++ /dev/null
@@ -1,113 +0,0 @@
-uint SPIRV_Cross_bitfieldInsert(uint Base, uint Insert, uint Offset, uint Count)
-{
-    uint Mask = Count == 32 ? 0xffffffff : (((1u << Count) - 1) << (Offset & 31));
-    return (Base & ~Mask) | ((Insert << Offset) & Mask);
-}
-
-uint2 SPIRV_Cross_bitfieldInsert(uint2 Base, uint2 Insert, uint Offset, uint Count)
-{
-    uint Mask = Count == 32 ? 0xffffffff : (((1u << Count) - 1) << (Offset & 31));
-    return (Base & ~Mask) | ((Insert << Offset) & Mask);
-}
-
-uint3 SPIRV_Cross_bitfieldInsert(uint3 Base, uint3 Insert, uint Offset, uint Count)
-{
-    uint Mask = Count == 32 ? 0xffffffff : (((1u << Count) - 1) << (Offset & 31));
-    return (Base & ~Mask) | ((Insert << Offset) & Mask);
-}
-
-uint4 SPIRV_Cross_bitfieldInsert(uint4 Base, uint4 Insert, uint Offset, uint Count)
-{
-    uint Mask = Count == 32 ? 0xffffffff : (((1u << Count) - 1) << (Offset & 31));
-    return (Base & ~Mask) | ((Insert << Offset) & Mask);
-}
-
-uint SPIRV_Cross_bitfieldUExtract(uint Base, uint Offset, uint Count)
-{
-    uint Mask = Count == 32 ? 0xffffffff : ((1 << Count) - 1);
-    return (Base >> Offset) & Mask;
-}
-
-uint2 SPIRV_Cross_bitfieldUExtract(uint2 Base, uint Offset, uint Count)
-{
-    uint Mask = Count == 32 ? 0xffffffff : ((1 << Count) - 1);
-    return (Base >> Offset) & Mask;
-}
-
-uint3 SPIRV_Cross_bitfieldUExtract(uint3 Base, uint Offset, uint Count)
-{
-    uint Mask = Count == 32 ? 0xffffffff : ((1 << Count) - 1);
-    return (Base >> Offset) & Mask;
-}
-
-uint4 SPIRV_Cross_bitfieldUExtract(uint4 Base, uint Offset, uint Count)
-{
-    uint Mask = Count == 32 ? 0xffffffff : ((1 << Count) - 1);
-    return (Base >> Offset) & Mask;
-}
-
-int SPIRV_Cross_bitfieldSExtract(int Base, int Offset, int Count)
-{
-    int Mask = Count == 32 ? -1 : ((1 << Count) - 1);
-    int Masked = (Base >> Offset) & Mask;
-    int ExtendShift = (32 - Count) & 31;
-    return (Masked << ExtendShift) >> ExtendShift;
-}
-
-int2 SPIRV_Cross_bitfieldSExtract(int2 Base, int Offset, int Count)
-{
-    int Mask = Count == 32 ? -1 : ((1 << Count) - 1);
-    int2 Masked = (Base >> Offset) & Mask;
-    int ExtendShift = (32 - Count) & 31;
-    return (Masked << ExtendShift) >> ExtendShift;
-}
-
-int3 SPIRV_Cross_bitfieldSExtract(int3 Base, int Offset, int Count)
-{
-    int Mask = Count == 32 ? -1 : ((1 << Count) - 1);
-    int3 Masked = (Base >> Offset) & Mask;
-    int ExtendShift = (32 - Count) & 31;
-    return (Masked << ExtendShift) >> ExtendShift;
-}
-
-int4 SPIRV_Cross_bitfieldSExtract(int4 Base, int Offset, int Count)
-{
-    int Mask = Count == 32 ? -1 : ((1 << Count) - 1);
-    int4 Masked = (Base >> Offset) & Mask;
-    int ExtendShift = (32 - Count) & 31;
-    return (Masked << ExtendShift) >> ExtendShift;
-}
-
-void comp_main()
-{
-    int signed_value = 0;
-    uint unsigned_value = 0u;
-    int3 signed_values = int3(0, 0, 0);
-    uint3 unsigned_values = uint3(0u, 0u, 0u);
-    int s = SPIRV_Cross_bitfieldSExtract(signed_value, 5, 20);
-    uint u = SPIRV_Cross_bitfieldUExtract(unsigned_value, 6, 21);
-    s = int(SPIRV_Cross_bitfieldInsert(s, 40, 5, 4));
-    u = SPIRV_Cross_bitfieldInsert(u, 60u, 5, 4);
-    u = reversebits(u);
-    s = reversebits(s);
-    int v0 = countbits(u);
-    int v1 = countbits(s);
-    int v2 = firstbithigh(u);
-    int v3 = firstbitlow(s);
-    int3 s_1 = SPIRV_Cross_bitfieldSExtract(signed_values, 5, 20);
-    uint3 u_1 = SPIRV_Cross_bitfieldUExtract(unsigned_values, 6, 21);
-    s_1 = int3(SPIRV_Cross_bitfieldInsert(s_1, int3(40, 40, 40), 5, 4));
-    u_1 = SPIRV_Cross_bitfieldInsert(u_1, uint3(60u, 60u, 60u), 5, 4);
-    u_1 = reversebits(u_1);
-    s_1 = reversebits(s_1);
-    int3 v0_1 = countbits(u_1);
-    int3 v1_1 = countbits(s_1);
-    int3 v2_1 = firstbithigh(u_1);
-    int3 v3_1 = firstbitlow(s_1);
-}
-
-[numthreads(1, 1, 1)]
-void main()
-{
-    comp_main();
-}
diff --git a/reference/shaders-hlsl/comp/rmw-matrix.comp b/reference/shaders-hlsl/comp/rmw-matrix.comp
new file mode 100644
index 0000000000..ed66669358
--- /dev/null
+++ b/reference/shaders-hlsl/comp/rmw-matrix.comp
@@ -0,0 +1,20 @@
+RWByteAddressBuffer _11 : register(u0);
+
+void comp_main()
+{
+    _11.Store(0, asuint(asfloat(_11.Load(0)) * asfloat(_11.Load(96))));
+    _11.Store4(16, asuint(asfloat(_11.Load4(16)) * asfloat(_11.Load4(112))));
+    float4x4 _35 = asfloat(uint4x4(_11.Load4(128), _11.Load4(144), _11.Load4(160), _11.Load4(176)));
+    float4x4 _37 = asfloat(uint4x4(_11.Load4(32), _11.Load4(48), _11.Load4(64), _11.Load4(80)));
+    float4x4 _38 = mul(_35, _37);
+    _11.Store4(32, asuint(_38[0]));
+    _11.Store4(48, asuint(_38[1]));
+    _11.Store4(64, asuint(_38[2]));
+    _11.Store4(80, asuint(_38[3]));
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    comp_main();
+}
diff --git a/reference/shaders-hlsl/comp/subgroups.invalid.nofxc.sm60.comp b/reference/shaders-hlsl/comp/subgroups.invalid.nofxc.sm60.comp
new file mode 100644
index 0000000000..b87574f1a7
--- /dev/null
+++ b/reference/shaders-hlsl/comp/subgroups.invalid.nofxc.sm60.comp
@@ -0,0 +1,93 @@
+RWByteAddressBuffer _9 : register(u0, space0);
+
+static uint4 gl_SubgroupEqMask;
+static uint4 gl_SubgroupGeMask;
+static uint4 gl_SubgroupGtMask;
+static uint4 gl_SubgroupLeMask;
+static uint4 gl_SubgroupLtMask;
+void comp_main()
+{
+    _9.Store(0, asuint(float(WaveGetLaneCount())));
+    _9.Store(0, asuint(float(WaveGetLaneIndex())));
+    bool elected = WaveIsFirstLane();
+    _9.Store(0, asuint(float4(gl_SubgroupEqMask).x));
+    _9.Store(0, asuint(float4(gl_SubgroupGeMask).x));
+    _9.Store(0, asuint(float4(gl_SubgroupGtMask).x));
+    _9.Store(0, asuint(float4(gl_SubgroupLeMask).x));
+    _9.Store(0, asuint(float4(gl_SubgroupLtMask).x));
+    float4 broadcasted = WaveReadLaneAt(10.0f.xxxx, 8u);
+    float3 first = WaveReadLaneFirst(20.0f.xxx);
+    uint4 ballot_value = WaveActiveBallot(true);
+    uint bit_count = countbits(ballot_value.x) + countbits(ballot_value.y) + countbits(ballot_value.z) + countbits(ballot_value.w);
+    bool has_all = WaveActiveAllTrue(true);
+    bool has_any = WaveActiveAnyTrue(true);
+    bool has_equal = WaveActiveAllEqualBool(true);
+    float4 added = WaveActiveSum(20.0f.xxxx);
+    int4 iadded = WaveActiveSum(int4(20, 20, 20, 20));
+    float4 multiplied = WaveActiveProduct(20.0f.xxxx);
+    int4 imultiplied = WaveActiveProduct(int4(20, 20, 20, 20));
+    float4 lo = WaveActiveMin(20.0f.xxxx);
+    float4 hi = WaveActiveMax(20.0f.xxxx);
+    int4 slo = WaveActiveMin(int4(20, 20, 20, 20));
+    int4 shi = WaveActiveMax(int4(20, 20, 20, 20));
+    uint4 ulo = WaveActiveMin(uint4(20u, 20u, 20u, 20u));
+    uint4 uhi = WaveActiveMax(uint4(20u, 20u, 20u, 20u));
+    uint4 anded = WaveActiveBitAnd(ballot_value);
+    uint4 ored = WaveActiveBitOr(ballot_value);
+    uint4 xored = WaveActiveBitXor(ballot_value);
+    added = WavePrefixSum(added) + added;
+    iadded = WavePrefixSum(iadded) + iadded;
+    multiplied = WavePrefixProduct(multiplied) * multiplied;
+    imultiplied = WavePrefixProduct(imultiplied) * imultiplied;
+    added = WavePrefixSum(multiplied);
+    multiplied = WavePrefixProduct(multiplied);
+    iadded = WavePrefixSum(imultiplied);
+    imultiplied = WavePrefixProduct(imultiplied);
+    float4 swap_horiz = QuadReadAcrossX(20.0f.xxxx);
+    float4 swap_vertical = QuadReadAcrossY(20.0f.xxxx);
+    float4 swap_diagonal = QuadReadAcrossDiagonal(20.0f.xxxx);
+    float4 quad_broadcast = QuadReadLaneAt(20.0f.xxxx, 3u);
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    gl_SubgroupEqMask = 1u << (WaveGetLaneIndex() - uint4(0, 32, 64, 96));
+    if (WaveGetLaneIndex() >= 32) gl_SubgroupEqMask.x = 0;
+    if (WaveGetLaneIndex() >= 64 || WaveGetLaneIndex() < 32) gl_SubgroupEqMask.y = 0;
+    if (WaveGetLaneIndex() >= 96 || WaveGetLaneIndex() < 64) gl_SubgroupEqMask.z = 0;
+    if (WaveGetLaneIndex() < 96) gl_SubgroupEqMask.w = 0;
+    gl_SubgroupGeMask = ~((1u << (WaveGetLaneIndex() - uint4(0, 32, 64, 96))) - 1u);
+    if (WaveGetLaneIndex() >= 32) gl_SubgroupGeMask.x = 0u;
+    if (WaveGetLaneIndex() >= 64) gl_SubgroupGeMask.y = 0u;
+    if (WaveGetLaneIndex() >= 96) gl_SubgroupGeMask.z = 0u;
+    if (WaveGetLaneIndex() < 32) gl_SubgroupGeMask.y = ~0u;
+    if (WaveGetLaneIndex() < 64) gl_SubgroupGeMask.z = ~0u;
+    if (WaveGetLaneIndex() < 96) gl_SubgroupGeMask.w = ~0u;
+    uint gt_lane_index = WaveGetLaneIndex() + 1;
+    gl_SubgroupGtMask = ~((1u << (gt_lane_index - uint4(0, 32, 64, 96))) - 1u);
+    if (gt_lane_index >= 32) gl_SubgroupGtMask.x = 0u;
+    if (gt_lane_index >= 64) gl_SubgroupGtMask.y = 0u;
+    if (gt_lane_index >= 96) gl_SubgroupGtMask.z = 0u;
+    if (gt_lane_index >= 128) gl_SubgroupGtMask.w = 0u;
+    if (gt_lane_index < 32) gl_SubgroupGtMask.y = ~0u;
+    if (gt_lane_index < 64) gl_SubgroupGtMask.z = ~0u;
+    if (gt_lane_index < 96) gl_SubgroupGtMask.w = ~0u;
+    uint le_lane_index = WaveGetLaneIndex() + 1;
+    gl_SubgroupLeMask = (1u << (le_lane_index - uint4(0, 32, 64, 96))) - 1u;
+    if (le_lane_index >= 32) gl_SubgroupLeMask.x = ~0u;
+    if (le_lane_index >= 64) gl_SubgroupLeMask.y = ~0u;
+    if (le_lane_index >= 96) gl_SubgroupLeMask.z = ~0u;
+    if (le_lane_index >= 128) gl_SubgroupLeMask.w = ~0u;
+    if (le_lane_index < 32) gl_SubgroupLeMask.y = 0u;
+    if (le_lane_index < 64) gl_SubgroupLeMask.z = 0u;
+    if (le_lane_index < 96) gl_SubgroupLeMask.w = 0u;
+    gl_SubgroupLtMask = (1u << (WaveGetLaneIndex() - uint4(0, 32, 64, 96))) - 1u;
+    if (WaveGetLaneIndex() >= 32) gl_SubgroupLtMask.x = ~0u;
+    if (WaveGetLaneIndex() >= 64) gl_SubgroupLtMask.y = ~0u;
+    if (WaveGetLaneIndex() >= 96) gl_SubgroupLtMask.z = ~0u;
+    if (WaveGetLaneIndex() < 32) gl_SubgroupLtMask.y = 0u;
+    if (WaveGetLaneIndex() < 64) gl_SubgroupLtMask.z = 0u;
+    if (WaveGetLaneIndex() < 96) gl_SubgroupLtMask.w = 0u;
+    comp_main();
+}
diff --git a/reference/shaders-hlsl/frag/array-lut-no-loop-variable.frag b/reference/shaders-hlsl/frag/array-lut-no-loop-variable.frag
new file mode 100644
index 0000000000..04d4d7fa75
--- /dev/null
+++ b/reference/shaders-hlsl/frag/array-lut-no-loop-variable.frag
@@ -0,0 +1,31 @@
+static const float _17[5] = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f };
+
+static float4 FragColor;
+static float4 v0;
+
+struct SPIRV_Cross_Input
+{
+    float4 v0 : TEXCOORD0;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    float lut[5] = _17;
+    for (int i = 0; i < 4; i++, FragColor += lut[i].xxxx)
+    {
+    }
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    v0 = stage_input.v0;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/frag/control-dependent-in-branch.desktop.frag b/reference/shaders-hlsl/frag/control-dependent-in-branch.desktop.frag
new file mode 100644
index 0000000000..322102ce86
--- /dev/null
+++ b/reference/shaders-hlsl/frag/control-dependent-in-branch.desktop.frag
@@ -0,0 +1,55 @@
+Texture2D<float4> uSampler : register(t0);
+SamplerState _uSampler_sampler : register(s0);
+
+static float4 FragColor;
+static float4 vInput;
+
+struct SPIRV_Cross_Input
+{
+    float4 vInput : TEXCOORD0;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    FragColor = vInput;
+    float4 t = uSampler.Sample(_uSampler_sampler, vInput.xy);
+    float4 d0 = ddx(vInput);
+    float4 d1 = ddy(vInput);
+    float4 d2 = fwidth(vInput);
+    float4 d3 = ddx_coarse(vInput);
+    float4 d4 = ddy_coarse(vInput);
+    float4 d5 = fwidth(vInput);
+    float4 d6 = ddx_fine(vInput);
+    float4 d7 = ddy_fine(vInput);
+    float4 d8 = fwidth(vInput);
+    float _56_tmp = uSampler.CalculateLevelOfDetail(_uSampler_sampler, vInput.zw);
+    float2 lod = float2(_56_tmp, _56_tmp);
+    if (vInput.y > 10.0f)
+    {
+        FragColor += t;
+        FragColor += d0;
+        FragColor += d1;
+        FragColor += d2;
+        FragColor += d3;
+        FragColor += d4;
+        FragColor += d5;
+        FragColor += d6;
+        FragColor += d7;
+        FragColor += d8;
+        FragColor += lod.xyxy;
+    }
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    vInput = stage_input.vInput;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/frag/fp16.desktop.frag b/reference/shaders-hlsl/frag/fp16.desktop.frag
new file mode 100644
index 0000000000..e10d6724e0
--- /dev/null
+++ b/reference/shaders-hlsl/frag/fp16.desktop.frag
@@ -0,0 +1,179 @@
+static min16float4 v4;
+static min16float3 v3;
+static min16float v1;
+static min16float2 v2;
+static float o1;
+static float2 o2;
+static float3 o3;
+static float4 o4;
+
+struct SPIRV_Cross_Input
+{
+    min16float v1 : TEXCOORD0;
+    min16float2 v2 : TEXCOORD1;
+    min16float3 v3 : TEXCOORD2;
+    min16float4 v4 : TEXCOORD3;
+};
+
+struct SPIRV_Cross_Output
+{
+    float o1 : SV_Target0;
+    float2 o2 : SV_Target1;
+    float3 o3 : SV_Target2;
+    float4 o4 : SV_Target3;
+};
+
+float mod(float x, float y)
+{
+    return x - y * floor(x / y);
+}
+
+float2 mod(float2 x, float2 y)
+{
+    return x - y * floor(x / y);
+}
+
+float3 mod(float3 x, float3 y)
+{
+    return x - y * floor(x / y);
+}
+
+float4 mod(float4 x, float4 y)
+{
+    return x - y * floor(x / y);
+}
+
+uint SPIRV_Cross_packFloat2x16(min16float2 value)
+{
+    uint2 Packed = f32tof16(value);
+    return Packed.x | (Packed.y << 16);
+}
+
+min16float2 SPIRV_Cross_unpackFloat2x16(uint value)
+{
+    return min16float2(f16tof32(uint2(value & 0xffff, value >> 16)));
+}
+
+void test_constants()
+{
+    min16float a = min16float(1.0);
+    min16float b = min16float(1.5);
+    min16float c = min16float(-1.5);
+    min16float d = min16float(0.0 / 0.0);
+    min16float e = min16float(1.0 / 0.0);
+    min16float f = min16float(-1.0 / 0.0);
+    min16float g = min16float(1014.0);
+    min16float h = min16float(9.5367431640625e-07);
+}
+
+min16float test_result()
+{
+    return min16float(1.0);
+}
+
+void test_conversions()
+{
+    min16float one = test_result();
+    int a = int(one);
+    uint b = uint(one);
+    bool c = one != min16float(0.0);
+    float d = float(one);
+    double e = double(one);
+    min16float a2 = min16float(a);
+    min16float b2 = min16float(b);
+    min16float c2 = min16float(c);
+    min16float d2 = min16float(d);
+    min16float e2 = min16float(e);
+}
+
+void test_builtins()
+{
+    min16float4 res = radians(v4);
+    res = degrees(v4);
+    res = sin(v4);
+    res = cos(v4);
+    res = tan(v4);
+    res = asin(v4);
+    res = atan2(v4, v3.xyzz);
+    res = atan(v4);
+    res = sinh(v4);
+    res = cosh(v4);
+    res = tanh(v4);
+    res = pow(v4, v4);
+    res = exp(v4);
+    res = log(v4);
+    res = exp2(v4);
+    res = log2(v4);
+    res = sqrt(v4);
+    res = rsqrt(v4);
+    res = abs(v4);
+    res = sign(v4);
+    res = floor(v4);
+    res = trunc(v4);
+    res = round(v4);
+    res = ceil(v4);
+    res = frac(v4);
+    res = mod(v4, v4);
+    min16float4 tmp;
+    min16float4 _144 = modf(v4, tmp);
+    res = _144;
+    res = min(v4, v4);
+    res = max(v4, v4);
+    res = clamp(v4, v4, v4);
+    res = lerp(v4, v4, v4);
+    bool4 _164 = bool4(v4.x < v4.x, v4.y < v4.y, v4.z < v4.z, v4.w < v4.w);
+    res = min16float4(_164.x ? v4.x : v4.x, _164.y ? v4.y : v4.y, _164.z ? v4.z : v4.z, _164.w ? v4.w : v4.w);
+    res = step(v4, v4);
+    res = smoothstep(v4, v4, v4);
+    bool4 btmp = isnan(v4);
+    btmp = isinf(v4);
+    res = mad(v4, v4, v4);
+    uint pack0 = SPIRV_Cross_packFloat2x16(v4.xy);
+    uint pack1 = SPIRV_Cross_packFloat2x16(v4.zw);
+    res = min16float4(SPIRV_Cross_unpackFloat2x16(pack0), SPIRV_Cross_unpackFloat2x16(pack1));
+    min16float t0 = length(v4);
+    t0 = distance(v4, v4);
+    t0 = dot(v4, v4);
+    min16float3 res3 = cross(v3, v3);
+    res = normalize(v4);
+    res = faceforward(v4, v4, v4);
+    res = reflect(v4, v4);
+    res = refract(v4, v4, v1);
+    btmp = bool4(v4.x < v4.x, v4.y < v4.y, v4.z < v4.z, v4.w < v4.w);
+    btmp = bool4(v4.x <= v4.x, v4.y <= v4.y, v4.z <= v4.z, v4.w <= v4.w);
+    btmp = bool4(v4.x > v4.x, v4.y > v4.y, v4.z > v4.z, v4.w > v4.w);
+    btmp = bool4(v4.x >= v4.x, v4.y >= v4.y, v4.z >= v4.z, v4.w >= v4.w);
+    btmp = bool4(v4.x == v4.x, v4.y == v4.y, v4.z == v4.z, v4.w == v4.w);
+    btmp = bool4(v4.x != v4.x, v4.y != v4.y, v4.z != v4.z, v4.w != v4.w);
+    res = ddx(v4);
+    res = ddy(v4);
+    res = ddx_fine(v4);
+    res = ddy_fine(v4);
+    res = ddx_coarse(v4);
+    res = ddy_coarse(v4);
+    res = fwidth(v4);
+    res = fwidth(v4);
+    res = fwidth(v4);
+}
+
+void frag_main()
+{
+    test_constants();
+    test_conversions();
+    test_builtins();
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    v4 = stage_input.v4;
+    v3 = stage_input.v3;
+    v1 = stage_input.v1;
+    v2 = stage_input.v2;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.o1 = o1;
+    stage_output.o2 = o2;
+    stage_output.o3 = o3;
+    stage_output.o4 = o4;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/frag/row-major-layout-in-struct.frag b/reference/shaders-hlsl/frag/row-major-layout-in-struct.frag
new file mode 100644
index 0000000000..f97e2174ec
--- /dev/null
+++ b/reference/shaders-hlsl/frag/row-major-layout-in-struct.frag
@@ -0,0 +1,46 @@
+struct NonFoo
+{
+    float4x4 v;
+    float4x4 w;
+};
+
+struct Foo
+{
+    row_major float4x4 v;
+    row_major float4x4 w;
+};
+
+cbuffer _17 : register(b0)
+{
+    Foo _17_foo : packoffset(c0);
+};
+
+static float4 FragColor;
+static float4 vUV;
+
+struct SPIRV_Cross_Input
+{
+    float4 vUV : TEXCOORD0;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    NonFoo f;
+    f.v = _17_foo.v;
+    f.w = _17_foo.w;
+    FragColor = mul(mul(vUV, f.w), f.v);
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    vUV = stage_input.vUV;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/frag/sampler-image-arrays.frag b/reference/shaders-hlsl/frag/sampler-image-arrays.frag
new file mode 100644
index 0000000000..856f04cf47
--- /dev/null
+++ b/reference/shaders-hlsl/frag/sampler-image-arrays.frag
@@ -0,0 +1,54 @@
+Texture2D<float4> uSampler[4] : register(t0);
+SamplerState _uSampler_sampler[4] : register(s0);
+Texture2D<float4> uTextures[4] : register(t8);
+SamplerState uSamplers[4] : register(s4);
+
+static int vIndex;
+static float2 vTex;
+static float4 FragColor;
+
+struct SPIRV_Cross_Input
+{
+    nointerpolation float2 vTex : TEXCOORD0;
+    nointerpolation int vIndex : TEXCOORD1;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+float4 sample_from_global()
+{
+    return uSampler[vIndex].Sample(_uSampler_sampler[vIndex], vTex + 0.100000001490116119384765625f.xx);
+}
+
+float4 sample_from_argument(Texture2D<float4> samplers[4], SamplerState _samplers_sampler[4])
+{
+    return samplers[vIndex].Sample(_samplers_sampler[vIndex], vTex + 0.20000000298023223876953125f.xx);
+}
+
+float4 sample_single_from_argument(Texture2D<float4> samp, SamplerState _samp_sampler)
+{
+    return samp.Sample(_samp_sampler, vTex + 0.300000011920928955078125f.xx);
+}
+
+void frag_main()
+{
+    FragColor = 0.0f.xxxx;
+    FragColor += uTextures[2].Sample(uSamplers[1], vTex);
+    FragColor += uSampler[vIndex].Sample(_uSampler_sampler[vIndex], vTex);
+    FragColor += sample_from_global();
+    FragColor += sample_from_argument(uSampler, _uSampler_sampler);
+    FragColor += sample_single_from_argument(uSampler[3], _uSampler_sampler[3]);
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    vIndex = stage_input.vIndex;
+    vTex = stage_input.vTex;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/frag/spec-constant.frag b/reference/shaders-hlsl/frag/spec-constant.frag
deleted file mode 100644
index 70b029dc68..0000000000
--- a/reference/shaders-hlsl/frag/spec-constant.frag
+++ /dev/null
@@ -1,79 +0,0 @@
-static const float a = 1.0f;
-static const float b = 2.0f;
-static const int c = 3;
-static const int d = 4;
-static const uint e = 5u;
-static const uint f = 6u;
-static const bool g = false;
-static const bool h = true;
-
-struct Foo
-{
-    float elems[(d + 2)];
-};
-
-static float4 FragColor;
-
-struct SPIRV_Cross_Output
-{
-    float4 FragColor : SV_Target0;
-};
-
-void frag_main()
-{
-    float t0 = a;
-    float t1 = b;
-    uint c0 = (uint(c) + 0u);
-    int c1 = (-c);
-    int c2 = (~c);
-    int c3 = (c + d);
-    int c4 = (c - d);
-    int c5 = (c * d);
-    int c6 = (c / d);
-    uint c7 = (e / f);
-    int c8 = (c % d);
-    uint c9 = (e % f);
-    int c10 = (c >> d);
-    uint c11 = (e >> f);
-    int c12 = (c << d);
-    int c13 = (c | d);
-    int c14 = (c ^ d);
-    int c15 = (c & d);
-    bool c16 = (g || h);
-    bool c17 = (g && h);
-    bool c18 = (!g);
-    bool c19 = (g == h);
-    bool c20 = (g != h);
-    bool c21 = (c == d);
-    bool c22 = (c != d);
-    bool c23 = (c < d);
-    bool c24 = (e < f);
-    bool c25 = (c > d);
-    bool c26 = (e > f);
-    bool c27 = (c <= d);
-    bool c28 = (e <= f);
-    bool c29 = (c >= d);
-    bool c30 = (e >= f);
-    int c31 = c8 + c3;
-    int c32 = int(e + 0u);
-    bool c33 = (c != int(0u));
-    bool c34 = (e != 0u);
-    int c35 = int(g);
-    uint c36 = uint(g);
-    float c37 = float(g);
-    float vec0[(c + 3)][8];
-    vec0[0][0] = 10.0f;
-    float vec1[(c + 2)];
-    vec1[0] = 20.0f;
-    Foo foo;
-    foo.elems[c] = 10.0f;
-    FragColor = (((t0 + t1).xxxx + vec0[0][0].xxxx) + vec1[0].xxxx) + foo.elems[c].xxxx;
-}
-
-SPIRV_Cross_Output main()
-{
-    frag_main();
-    SPIRV_Cross_Output stage_output;
-    stage_output.FragColor = FragColor;
-    return stage_output;
-}
diff --git a/reference/shaders-hlsl/frag/tex-sampling-ms.frag b/reference/shaders-hlsl/frag/tex-sampling-ms.frag
new file mode 100644
index 0000000000..1435315383
--- /dev/null
+++ b/reference/shaders-hlsl/frag/tex-sampling-ms.frag
@@ -0,0 +1,32 @@
+Texture2DMS<float4> uTex : register(t0);
+SamplerState _uTex_sampler : register(s0);
+
+static float4 gl_FragCoord;
+static float4 FragColor;
+
+struct SPIRV_Cross_Input
+{
+    float4 gl_FragCoord : SV_Position;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    FragColor = uTex.Load(int2(gl_FragCoord.xy), 0);
+    FragColor += uTex.Load(int2(gl_FragCoord.xy), 1);
+    FragColor += uTex.Load(int2(gl_FragCoord.xy), 2);
+    FragColor += uTex.Load(int2(gl_FragCoord.xy), 3);
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    gl_FragCoord = stage_input.gl_FragCoord;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/frag/tex-sampling.frag b/reference/shaders-hlsl/frag/tex-sampling.frag
index 7e10bfdd24..7fa60957dc 100644
--- a/reference/shaders-hlsl/frag/tex-sampling.frag
+++ b/reference/shaders-hlsl/frag/tex-sampling.frag
@@ -1,27 +1,27 @@
-Texture1D<float4> tex1d;
-SamplerState _tex1d_sampler;
-Texture2D<float4> tex2d;
-SamplerState _tex2d_sampler;
-Texture3D<float4> tex3d;
-SamplerState _tex3d_sampler;
-TextureCube<float4> texCube;
-SamplerState _texCube_sampler;
-Texture1D<float4> tex1dShadow;
-SamplerComparisonState _tex1dShadow_sampler;
-Texture2D<float4> tex2dShadow;
-SamplerComparisonState _tex2dShadow_sampler;
-TextureCube<float4> texCubeShadow;
-SamplerComparisonState _texCubeShadow_sampler;
-Texture1DArray<float4> tex1dArray;
-SamplerState _tex1dArray_sampler;
-Texture2DArray<float4> tex2dArray;
-SamplerState _tex2dArray_sampler;
-TextureCubeArray<float4> texCubeArray;
-SamplerState _texCubeArray_sampler;
-Texture2D<float4> separateTex2d;
-SamplerState samplerNonDepth;
-Texture2D<float4> separateTex2dDepth;
-SamplerComparisonState samplerDepth;
+Texture1D<float4> tex1d : register(t0);
+SamplerState _tex1d_sampler : register(s0);
+Texture2D<float4> tex2d : register(t1);
+SamplerState _tex2d_sampler : register(s1);
+Texture3D<float4> tex3d : register(t2);
+SamplerState _tex3d_sampler : register(s2);
+TextureCube<float4> texCube : register(t3);
+SamplerState _texCube_sampler : register(s3);
+Texture1D<float4> tex1dShadow : register(t4);
+SamplerComparisonState _tex1dShadow_sampler : register(s4);
+Texture2D<float4> tex2dShadow : register(t5);
+SamplerComparisonState _tex2dShadow_sampler : register(s5);
+TextureCube<float4> texCubeShadow : register(t6);
+SamplerComparisonState _texCubeShadow_sampler : register(s6);
+Texture1DArray<float4> tex1dArray : register(t7);
+SamplerState _tex1dArray_sampler : register(s7);
+Texture2DArray<float4> tex2dArray : register(t8);
+SamplerState _tex2dArray_sampler : register(s8);
+TextureCubeArray<float4> texCubeArray : register(t9);
+SamplerState _texCubeArray_sampler : register(s9);
+Texture2D<float4> separateTex2d : register(t12);
+SamplerState samplerNonDepth : register(s11);
+Texture2D<float4> separateTex2dDepth : register(t13);
+SamplerComparisonState samplerDepth : register(s10);
 
 static float texCoord1d;
 static float2 texCoord2d;
diff --git a/reference/shaders-hlsl/vert/read-from-row-major-array.vert b/reference/shaders-hlsl/vert/read-from-row-major-array.vert
new file mode 100644
index 0000000000..79758a4697
--- /dev/null
+++ b/reference/shaders-hlsl/vert/read-from-row-major-array.vert
@@ -0,0 +1,64 @@
+cbuffer _104 : register(b0)
+{
+    column_major float2x3 _104_var[3][4] : packoffset(c0);
+};
+
+static float4 gl_Position;
+static float4 a_position;
+static float v_vtxResult;
+
+struct SPIRV_Cross_Input
+{
+    float4 a_position : TEXCOORD0;
+};
+
+struct SPIRV_Cross_Output
+{
+    float v_vtxResult : TEXCOORD0;
+    float4 gl_Position : SV_Position;
+};
+
+float compare_float(float a, float b)
+{
+    return float(abs(a - b) < 0.0500000007450580596923828125f);
+}
+
+float compare_vec3(float3 a, float3 b)
+{
+    float param = a.x;
+    float param_1 = b.x;
+    float param_2 = a.y;
+    float param_3 = b.y;
+    float param_4 = a.z;
+    float param_5 = b.z;
+    return (compare_float(param, param_1) * compare_float(param_2, param_3)) * compare_float(param_4, param_5);
+}
+
+float compare_mat2x3(float2x3 a, float2x3 b)
+{
+    float3 param = a[0];
+    float3 param_1 = b[0];
+    float3 param_2 = a[1];
+    float3 param_3 = b[1];
+    return compare_vec3(param, param_1) * compare_vec3(param_2, param_3);
+}
+
+void vert_main()
+{
+    gl_Position = a_position;
+    float result = 1.0f;
+    float2x3 param = _104_var[0][0];
+    float2x3 param_1 = float2x3(float3(2.0f, 6.0f, -6.0f), float3(0.0f, 5.0f, 5.0f));
+    result *= compare_mat2x3(param, param_1);
+    v_vtxResult = result;
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    a_position = stage_input.a_position;
+    vert_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.gl_Position = gl_Position;
+    stage_output.v_vtxResult = v_vtxResult;
+    return stage_output;
+}
diff --git a/reference/opt/shaders-msl/asm/frag/inliner-dominator-inside-loop.asm.frag b/reference/shaders-msl-no-opt/asm/frag/inliner-dominator-inside-loop.asm.frag
similarity index 93%
rename from reference/opt/shaders-msl/asm/frag/inliner-dominator-inside-loop.asm.frag
rename to reference/shaders-msl-no-opt/asm/frag/inliner-dominator-inside-loop.asm.frag
index 2ac8cbe015..ce44c564fd 100644
--- a/reference/opt/shaders-msl/asm/frag/inliner-dominator-inside-loop.asm.frag
+++ b/reference/shaders-msl-no-opt/asm/frag/inliner-dominator-inside-loop.asm.frag
@@ -83,25 +83,25 @@ constant float4 _192 = {};
 constant float4 _219 = {};
 constant float4 _297 = {};
 
-struct main0_in
-{
-    float IN_studIndex [[user(locn8)]];
-    float4 IN_PosLightSpace_Reflectance [[user(locn7)]];
-    float3 IN_Tangent [[user(locn6)]];
-    float4 IN_Normal_SpecPower [[user(locn5)]];
-    float4 IN_View_Depth [[user(locn4)]];
-    float4 IN_LightPosition_Fog [[user(locn3)]];
-    float4 IN_Color [[user(locn2)]];
-    float4 IN_UvStuds_EdgeDistance2 [[user(locn1)]];
-    float4 IN_Uv_EdgeDistance1 [[user(locn0)]];
-};
-
 struct main0_out
 {
     float4 _entryPointOutput [[color(0)]];
 };
 
-fragment main0_out main0(main0_in in [[stage_in]], constant CB0& _19 [[buffer(0)]], texture3d<float> LightMapTexture [[texture(0)]], texture2d<float> ShadowMapTexture [[texture(1)]], texturecube<float> EnvironmentMapTexture [[texture(2)]], texture2d<float> DiffuseMapTexture [[texture(3)]], texture2d<float> NormalMapTexture [[texture(4)]], texture2d<float> NormalDetailMapTexture [[texture(5)]], texture2d<float> StudsMapTexture [[texture(6)]], texture2d<float> SpecularMapTexture [[texture(7)]], sampler LightMapSampler [[sampler(0)]], sampler ShadowMapSampler [[sampler(1)]], sampler EnvironmentMapSampler [[sampler(2)]], sampler DiffuseMapSampler [[sampler(3)]], sampler NormalMapSampler [[sampler(4)]], sampler NormalDetailMapSampler [[sampler(5)]], sampler StudsMapSampler [[sampler(6)]], sampler SpecularMapSampler [[sampler(7)]], float4 gl_FragCoord [[position]])
+struct main0_in
+{
+    float4 IN_Uv_EdgeDistance1 [[user(locn0)]];
+    float4 IN_UvStuds_EdgeDistance2 [[user(locn1)]];
+    float4 IN_Color [[user(locn2)]];
+    float4 IN_LightPosition_Fog [[user(locn3)]];
+    float4 IN_View_Depth [[user(locn4)]];
+    float4 IN_Normal_SpecPower [[user(locn5)]];
+    float3 IN_Tangent [[user(locn6)]];
+    float4 IN_PosLightSpace_Reflectance [[user(locn7)]];
+    float IN_studIndex [[user(locn8)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], constant CB0& _19 [[buffer(0)]], texture2d<float> StudsMapTexture [[texture(0)]], texture2d<float> ShadowMapTexture [[texture(1)]], texturecube<float> EnvironmentMapTexture [[texture(2)]], texture2d<float> DiffuseMapTexture [[texture(3)]], texture2d<float> NormalMapTexture [[texture(4)]], texture2d<float> SpecularMapTexture [[texture(5)]], texture3d<float> LightMapTexture [[texture(6)]], texture2d<float> NormalDetailMapTexture [[texture(8)]], sampler StudsMapSampler [[sampler(0)]], sampler ShadowMapSampler [[sampler(1)]], sampler EnvironmentMapSampler [[sampler(2)]], sampler DiffuseMapSampler [[sampler(3)]], sampler NormalMapSampler [[sampler(4)]], sampler SpecularMapSampler [[sampler(5)]], sampler LightMapSampler [[sampler(6)]], sampler NormalDetailMapSampler [[sampler(8)]], float4 gl_FragCoord [[position]])
 {
     main0_out out = {};
     VertexOutput _128 = _121;
@@ -154,6 +154,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant CB0& _19 [[buffer(0)
         _193 = _192;
         break;
     } while (false);
+    float4 _194 = _193 * 1.0;
     float4 _220;
     do
     {
@@ -179,7 +180,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant CB0& _19 [[buffer(0)
     float3 _253 = float3(_252.x, _252.y, _232.z);
     float2 _255 = _253.xy * _165;
     float3 _256 = float3(_255.x, _255.y, _253.z);
-    float3 _271 = ((in.IN_Color.xyz * (_193 * 1.0).xyz) * (1.0 + (_256.x * 0.300000011920928955078125))) * (StudsMapTexture.sample(StudsMapSampler, _156.UvStuds).x * 2.0);
+    float3 _271 = ((in.IN_Color.xyz * _194.xyz) * (1.0 + (_256.x * 0.300000011920928955078125))) * (StudsMapTexture.sample(StudsMapSampler, _156.UvStuds).x * 2.0);
     float4 _298;
     do
     {
diff --git a/reference/shaders-msl-no-opt/asm/vert/empty-struct-composite.asm.vert b/reference/shaders-msl-no-opt/asm/vert/empty-struct-composite.asm.vert
new file mode 100644
index 0000000000..e9cd6a540a
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/vert/empty-struct-composite.asm.vert
@@ -0,0 +1,16 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct Test
+{
+    int empty_struct_member;
+};
+
+vertex void main0()
+{
+    Test _14 = Test{ 0 };
+    Test t = _14;
+}
+
diff --git a/reference/opt/shaders-msl/comp/bitfield.noopt.comp b/reference/shaders-msl-no-opt/comp/bitfield.comp
similarity index 100%
rename from reference/opt/shaders-msl/comp/bitfield.noopt.comp
rename to reference/shaders-msl-no-opt/comp/bitfield.comp
diff --git a/reference/opt/shaders-msl/comp/loop.noopt.comp b/reference/shaders-msl-no-opt/comp/loop.comp
similarity index 91%
rename from reference/opt/shaders-msl/comp/loop.noopt.comp
rename to reference/shaders-msl-no-opt/comp/loop.comp
index 00ed570b31..d7677fb436 100644
--- a/reference/opt/shaders-msl/comp/loop.noopt.comp
+++ b/reference/shaders-msl-no-opt/comp/loop.comp
@@ -14,7 +14,7 @@ struct SSBO2
     float4 out_data[1];
 };
 
-kernel void main0(device SSBO& _24 [[buffer(0)]], device SSBO2& _177 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+kernel void main0(const device SSBO& _24 [[buffer(0)]], device SSBO2& _177 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
     uint ident = gl_GlobalInvocationID.x;
     float4 idat = _24.in_data[ident];
diff --git a/reference/shaders-msl/comp/return.comp b/reference/shaders-msl-no-opt/comp/return.comp
similarity index 90%
rename from reference/shaders-msl/comp/return.comp
rename to reference/shaders-msl-no-opt/comp/return.comp
index 71fcfbe391..4015ddb37b 100644
--- a/reference/shaders-msl/comp/return.comp
+++ b/reference/shaders-msl-no-opt/comp/return.comp
@@ -8,7 +8,7 @@ struct SSBO2
     float4 out_data[1];
 };
 
-kernel void main0(device SSBO2& _27 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+kernel void main0(device SSBO2& _27 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
     uint ident = gl_GlobalInvocationID.x;
     if (ident == 2u)
diff --git a/reference/opt/shaders-msl/frag/in_block_assign.noopt.frag b/reference/shaders-msl-no-opt/frag/in_block_assign.frag
similarity index 100%
rename from reference/opt/shaders-msl/frag/in_block_assign.noopt.frag
rename to reference/shaders-msl-no-opt/frag/in_block_assign.frag
index d06863d99c..3449dcc077 100644
--- a/reference/opt/shaders-msl/frag/in_block_assign.noopt.frag
+++ b/reference/shaders-msl-no-opt/frag/in_block_assign.frag
@@ -8,16 +8,16 @@ struct VOUT
     float4 a;
 };
 
-struct main0_in
-{
-    float4 VOUT_a [[user(locn0)]];
-};
-
 struct main0_out
 {
     float4 FragColor [[color(0)]];
 };
 
+struct main0_in
+{
+    float4 VOUT_a [[user(locn0)]];
+};
+
 fragment main0_out main0(main0_in in [[stage_in]])
 {
     main0_out out = {};
diff --git a/reference/shaders-msl-no-opt/vert/functions_nested.vert b/reference/shaders-msl-no-opt/vert/functions_nested.vert
index aa66304682..43a508613b 100644
--- a/reference/shaders-msl-no-opt/vert/functions_nested.vert
+++ b/reference/shaders-msl-no-opt/vert/functions_nested.vert
@@ -114,16 +114,16 @@ float4 fetch_attr(thread const attr_desc& desc, thread const int& vertex_id, thr
             }
         }
     }
-    float4 _209;
+    float4 _210;
     if (reverse_order)
     {
-        _209 = result.wzyx;
+        _210 = result.wzyx;
     }
     else
     {
-        _209 = result;
+        _210 = result;
     }
-    return _209;
+    return _210;
 }
 
 float4 read_location(thread const int& location, constant VertexBuffer& v_227, thread uint& gl_VertexIndex, thread texture2d<uint> buff_in_2, thread texture2d<uint> buff_in_1)
@@ -167,7 +167,7 @@ void vs_adjust(thread float4& dst_reg0, thread float4& dst_reg1, thread float4&
     dst_reg0.x = float4(dot(float4(tmp0.xyz, 1.0), v_309.vc[0])).x;
 }
 
-vertex main0_out main0(constant VertexBuffer& v_227 [[buffer(0)]], constant VertexConstantsBuffer& v_309 [[buffer(1)]], texture2d<uint> buff_in_2 [[texture(0)]], texture2d<uint> buff_in_1 [[texture(1)]], uint gl_VertexIndex [[vertex_id]])
+vertex main0_out main0(constant VertexBuffer& v_227 [[buffer(0)]], constant VertexConstantsBuffer& v_309 [[buffer(1)]], texture2d<uint> buff_in_1 [[texture(3)]], texture2d<uint> buff_in_2 [[texture(4)]], uint gl_VertexIndex [[vertex_id]])
 {
     main0_out out = {};
     float4 dst_reg0 = float4(0.0, 0.0, 0.0, 1.0);
diff --git a/reference/shaders-msl/asm/comp/bitcast_sar.asm.comp b/reference/shaders-msl/asm/comp/bitcast_sar.asm.comp
index 20d6fe9e9d..4176830588 100644
--- a/reference/shaders-msl/asm/comp/bitcast_sar.asm.comp
+++ b/reference/shaders-msl/asm/comp/bitcast_sar.asm.comp
@@ -17,13 +17,15 @@ struct _4
 
 kernel void main0(device _3& _5 [[buffer(0)]], device _4& _6 [[buffer(1)]])
 {
-    _6._m0 = uint4(int4(_5._m1) >> _5._m0);
-    _6._m0 = uint4(_5._m0 >> int4(_5._m1));
-    _6._m0 = uint4(int4(_5._m1) >> int4(_5._m1));
-    _6._m0 = uint4(_5._m0 >> _5._m0);
-    _6._m1 = int4(_5._m1) >> int4(_5._m1);
-    _6._m1 = _5._m0 >> _5._m0;
-    _6._m1 = int4(_5._m1) >> _5._m0;
-    _6._m1 = _5._m0 >> int4(_5._m1);
+    int4 _22 = _5._m0;
+    uint4 _23 = _5._m1;
+    _6._m0 = uint4(int4(_23) >> _22);
+    _6._m0 = uint4(_22 >> int4(_23));
+    _6._m0 = uint4(int4(_23) >> int4(_23));
+    _6._m0 = uint4(_22 >> _22);
+    _6._m1 = int4(_23) >> int4(_23);
+    _6._m1 = _22 >> _22;
+    _6._m1 = int4(_23) >> _22;
+    _6._m1 = _22 >> int4(_23);
 }
 
diff --git a/reference/shaders-msl/asm/comp/bitcast_sdiv.asm.comp b/reference/shaders-msl/asm/comp/bitcast_sdiv.asm.comp
index f18b318bbb..6b80dff310 100644
--- a/reference/shaders-msl/asm/comp/bitcast_sdiv.asm.comp
+++ b/reference/shaders-msl/asm/comp/bitcast_sdiv.asm.comp
@@ -17,13 +17,15 @@ struct _4
 
 kernel void main0(device _3& _5 [[buffer(0)]], device _4& _6 [[buffer(1)]])
 {
-    _6._m0 = uint4(int4(_5._m1) / _5._m0);
-    _6._m0 = uint4(_5._m0 / int4(_5._m1));
-    _6._m0 = uint4(int4(_5._m1) / int4(_5._m1));
-    _6._m0 = uint4(_5._m0 / _5._m0);
-    _6._m1 = int4(_5._m1) / int4(_5._m1);
-    _6._m1 = _5._m0 / _5._m0;
-    _6._m1 = int4(_5._m1) / _5._m0;
-    _6._m1 = _5._m0 / int4(_5._m1);
+    int4 _22 = _5._m0;
+    uint4 _23 = _5._m1;
+    _6._m0 = uint4(int4(_23) / _22);
+    _6._m0 = uint4(_22 / int4(_23));
+    _6._m0 = uint4(int4(_23) / int4(_23));
+    _6._m0 = uint4(_22 / _22);
+    _6._m1 = int4(_23) / int4(_23);
+    _6._m1 = _22 / _22;
+    _6._m1 = int4(_23) / _22;
+    _6._m1 = _22 / int4(_23);
 }
 
diff --git a/reference/shaders-msl/asm/comp/bitcast_slr.asm.comp b/reference/shaders-msl/asm/comp/bitcast_slr.asm.comp
index 9fd60bef26..1dfca39181 100644
--- a/reference/shaders-msl/asm/comp/bitcast_slr.asm.comp
+++ b/reference/shaders-msl/asm/comp/bitcast_slr.asm.comp
@@ -17,13 +17,15 @@ struct _4
 
 kernel void main0(device _3& _5 [[buffer(0)]], device _4& _6 [[buffer(1)]])
 {
-    _6._m0 = _5._m1 >> uint4(_5._m0);
-    _6._m0 = uint4(_5._m0) >> _5._m1;
-    _6._m0 = _5._m1 >> _5._m1;
-    _6._m0 = uint4(_5._m0) >> uint4(_5._m0);
-    _6._m1 = int4(_5._m1 >> _5._m1);
-    _6._m1 = int4(uint4(_5._m0) >> uint4(_5._m0));
-    _6._m1 = int4(_5._m1 >> uint4(_5._m0));
-    _6._m1 = int4(uint4(_5._m0) >> _5._m1);
+    int4 _22 = _5._m0;
+    uint4 _23 = _5._m1;
+    _6._m0 = _23 >> uint4(_22);
+    _6._m0 = uint4(_22) >> _23;
+    _6._m0 = _23 >> _23;
+    _6._m0 = uint4(_22) >> uint4(_22);
+    _6._m1 = int4(_23 >> _23);
+    _6._m1 = int4(uint4(_22) >> uint4(_22));
+    _6._m1 = int4(_23 >> uint4(_22));
+    _6._m1 = int4(uint4(_22) >> _23);
 }
 
diff --git a/reference/opt/shaders-msl/asm/comp/storage-buffer-basic.asm.comp b/reference/shaders-msl/asm/comp/storage-buffer-basic.invalid.asm.comp
similarity index 94%
rename from reference/opt/shaders-msl/asm/comp/storage-buffer-basic.asm.comp
rename to reference/shaders-msl/asm/comp/storage-buffer-basic.invalid.asm.comp
index 3df6161fc2..2c9b038b20 100644
--- a/reference/opt/shaders-msl/asm/comp/storage-buffer-basic.asm.comp
+++ b/reference/shaders-msl/asm/comp/storage-buffer-basic.invalid.asm.comp
@@ -17,5 +17,6 @@ struct _6
 kernel void main0(device _6& _8 [[buffer(0)]], device _6& _9 [[buffer(1)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
 {
     _8._m0[gl_WorkGroupID.x] = _9._m0[gl_WorkGroupID.x] + _8._m0[gl_WorkGroupID.x];
+    uint3 _23 = gl_WorkGroupSize;
 }
 
diff --git a/reference/shaders-msl/asm/frag/combined-sampler-reuse.asm.frag b/reference/shaders-msl/asm/frag/combined-sampler-reuse.asm.frag
new file mode 100644
index 0000000000..e420153bf1
--- /dev/null
+++ b/reference/shaders-msl/asm/frag/combined-sampler-reuse.asm.frag
@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float2 vUV [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], texture2d<float> uTex [[texture(1)]], sampler uSampler [[sampler(0)]])
+{
+    main0_out out = {};
+    out.FragColor = uTex.sample(uSampler, in.vUV);
+    out.FragColor += uTex.sample(uSampler, in.vUV, int2(1));
+    return out;
+}
+
diff --git a/reference/shaders-msl/asm/frag/empty-struct.asm.frag b/reference/shaders-msl/asm/frag/empty-struct.asm.frag
new file mode 100644
index 0000000000..366ab16577
--- /dev/null
+++ b/reference/shaders-msl/asm/frag/empty-struct.asm.frag
@@ -0,0 +1,30 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct EmptyStructTest
+{
+    int empty_struct_member;
+};
+
+float GetValue(thread const EmptyStructTest& self)
+{
+    return 0.0;
+}
+
+float GetValue_1(thread const EmptyStructTest& self)
+{
+    return 0.0;
+}
+
+fragment void main0()
+{
+    EmptyStructTest _23 = EmptyStructTest{ 0 };
+    EmptyStructTest emptyStruct;
+    float value = GetValue(emptyStruct);
+    value = GetValue_1(_23);
+}
+
diff --git a/reference/shaders-msl/asm/frag/frem.asm.frag b/reference/shaders-msl/asm/frag/frem.asm.frag
index f7c1f2ce88..ebc73d52df 100644
--- a/reference/shaders-msl/asm/frag/frem.asm.frag
+++ b/reference/shaders-msl/asm/frag/frem.asm.frag
@@ -3,17 +3,17 @@
 
 using namespace metal;
 
-struct main0_in
-{
-    float4 vB [[user(locn1)]];
-    float4 vA [[user(locn0)]];
-};
-
 struct main0_out
 {
     float4 FragColor [[color(0)]];
 };
 
+struct main0_in
+{
+    float4 vA [[user(locn0)]];
+    float4 vB [[user(locn1)]];
+};
+
 fragment main0_out main0(main0_in in [[stage_in]])
 {
     main0_out out = {};
diff --git a/reference/shaders-msl/asm/frag/image-extract-reuse.asm.frag b/reference/shaders-msl/asm/frag/image-extract-reuse.asm.frag
new file mode 100644
index 0000000000..0d691b306d
--- /dev/null
+++ b/reference/shaders-msl/asm/frag/image-extract-reuse.asm.frag
@@ -0,0 +1,17 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    int2 Size [[color(0)]];
+};
+
+fragment main0_out main0(texture2d<float> uTexture [[texture(0)]], sampler uTextureSmplr [[sampler(0)]])
+{
+    main0_out out = {};
+    out.Size = int2(uTexture.get_width(), uTexture.get_height()) + int2(uTexture.get_width(1), uTexture.get_height(1));
+    return out;
+}
+
diff --git a/reference/shaders-msl/asm/frag/implicit-read-dep-phi.asm.frag b/reference/shaders-msl/asm/frag/implicit-read-dep-phi.asm.frag
new file mode 100644
index 0000000000..830df0c7e9
--- /dev/null
+++ b/reference/shaders-msl/asm/frag/implicit-read-dep-phi.asm.frag
@@ -0,0 +1,49 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float4 v0 [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], texture2d<float> uImage [[texture(0)]], sampler uImageSmplr [[sampler(0)]])
+{
+    main0_out out = {};
+    int i = 0;
+    float phi;
+    float4 _36;
+    phi = 1.0;
+    _36 = float4(1.0, 2.0, 1.0, 2.0);
+    for (;;)
+    {
+        out.FragColor = _36;
+        if (i < 4)
+        {
+            if (in.v0[i] > 0.0)
+            {
+                float2 _48 = float2(phi);
+                i++;
+                phi += 2.0;
+                _36 = uImage.sample(uImageSmplr, _48, level(0.0));
+                continue;
+            }
+            else
+            {
+                break;
+            }
+        }
+        else
+        {
+            break;
+        }
+    }
+    return out;
+}
+
diff --git a/reference/shaders-msl/asm/frag/inliner-dominator-inside-loop.asm.frag b/reference/shaders-msl/asm/frag/inliner-dominator-inside-loop.asm.frag
deleted file mode 100644
index 2ac8cbe015..0000000000
--- a/reference/shaders-msl/asm/frag/inliner-dominator-inside-loop.asm.frag
+++ /dev/null
@@ -1,235 +0,0 @@
-#include <metal_stdlib>
-#include <simd/simd.h>
-
-using namespace metal;
-
-struct VertexOutput
-{
-    float4 HPosition;
-    float4 Uv_EdgeDistance1;
-    float4 UvStuds_EdgeDistance2;
-    float4 Color;
-    float4 LightPosition_Fog;
-    float4 View_Depth;
-    float4 Normal_SpecPower;
-    float3 Tangent;
-    float4 PosLightSpace_Reflectance;
-    float studIndex;
-};
-
-struct Surface
-{
-    float3 albedo;
-    float3 normal;
-    float specular;
-    float gloss;
-    float reflectance;
-    float opacity;
-};
-
-struct SurfaceInput
-{
-    float4 Color;
-    float2 Uv;
-    float2 UvStuds;
-};
-
-struct Globals
-{
-    float4x4 ViewProjection;
-    float4 ViewRight;
-    float4 ViewUp;
-    float4 ViewDir;
-    float3 CameraPosition;
-    float3 AmbientColor;
-    float3 Lamp0Color;
-    float3 Lamp0Dir;
-    float3 Lamp1Color;
-    float4 FogParams;
-    float3 FogColor;
-    float4 LightBorder;
-    float4 LightConfig0;
-    float4 LightConfig1;
-    float4 LightConfig2;
-    float4 LightConfig3;
-    float4 RefractionBias_FadeDistance_GlowFactor;
-    float4 OutlineBrightness_ShadowInfo;
-    float4 ShadowMatrix0;
-    float4 ShadowMatrix1;
-    float4 ShadowMatrix2;
-};
-
-struct Params
-{
-    float4 LqmatFarTilingFactor;
-};
-
-struct CB0
-{
-    Globals CB0;
-};
-
-struct CB2
-{
-    Params CB2;
-};
-
-constant VertexOutput _121 = {};
-constant SurfaceInput _122 = {};
-constant float2 _123 = {};
-constant float4 _124 = {};
-constant Surface _125 = {};
-constant float4 _192 = {};
-constant float4 _219 = {};
-constant float4 _297 = {};
-
-struct main0_in
-{
-    float IN_studIndex [[user(locn8)]];
-    float4 IN_PosLightSpace_Reflectance [[user(locn7)]];
-    float3 IN_Tangent [[user(locn6)]];
-    float4 IN_Normal_SpecPower [[user(locn5)]];
-    float4 IN_View_Depth [[user(locn4)]];
-    float4 IN_LightPosition_Fog [[user(locn3)]];
-    float4 IN_Color [[user(locn2)]];
-    float4 IN_UvStuds_EdgeDistance2 [[user(locn1)]];
-    float4 IN_Uv_EdgeDistance1 [[user(locn0)]];
-};
-
-struct main0_out
-{
-    float4 _entryPointOutput [[color(0)]];
-};
-
-fragment main0_out main0(main0_in in [[stage_in]], constant CB0& _19 [[buffer(0)]], texture3d<float> LightMapTexture [[texture(0)]], texture2d<float> ShadowMapTexture [[texture(1)]], texturecube<float> EnvironmentMapTexture [[texture(2)]], texture2d<float> DiffuseMapTexture [[texture(3)]], texture2d<float> NormalMapTexture [[texture(4)]], texture2d<float> NormalDetailMapTexture [[texture(5)]], texture2d<float> StudsMapTexture [[texture(6)]], texture2d<float> SpecularMapTexture [[texture(7)]], sampler LightMapSampler [[sampler(0)]], sampler ShadowMapSampler [[sampler(1)]], sampler EnvironmentMapSampler [[sampler(2)]], sampler DiffuseMapSampler [[sampler(3)]], sampler NormalMapSampler [[sampler(4)]], sampler NormalDetailMapSampler [[sampler(5)]], sampler StudsMapSampler [[sampler(6)]], sampler SpecularMapSampler [[sampler(7)]], float4 gl_FragCoord [[position]])
-{
-    main0_out out = {};
-    VertexOutput _128 = _121;
-    _128.HPosition = gl_FragCoord;
-    VertexOutput _130 = _128;
-    _130.Uv_EdgeDistance1 = in.IN_Uv_EdgeDistance1;
-    VertexOutput _132 = _130;
-    _132.UvStuds_EdgeDistance2 = in.IN_UvStuds_EdgeDistance2;
-    VertexOutput _134 = _132;
-    _134.Color = in.IN_Color;
-    VertexOutput _136 = _134;
-    _136.LightPosition_Fog = in.IN_LightPosition_Fog;
-    VertexOutput _138 = _136;
-    _138.View_Depth = in.IN_View_Depth;
-    VertexOutput _140 = _138;
-    _140.Normal_SpecPower = in.IN_Normal_SpecPower;
-    VertexOutput _142 = _140;
-    _142.Tangent = in.IN_Tangent;
-    VertexOutput _144 = _142;
-    _144.PosLightSpace_Reflectance = in.IN_PosLightSpace_Reflectance;
-    VertexOutput _146 = _144;
-    _146.studIndex = in.IN_studIndex;
-    SurfaceInput _147 = _122;
-    _147.Color = in.IN_Color;
-    SurfaceInput _149 = _147;
-    _149.Uv = in.IN_Uv_EdgeDistance1.xy;
-    SurfaceInput _151 = _149;
-    _151.UvStuds = in.IN_UvStuds_EdgeDistance2.xy;
-    SurfaceInput _156 = _151;
-    _156.UvStuds.y = (fract(_151.UvStuds.y) + in.IN_studIndex) * 0.25;
-    float _163 = _146.View_Depth.w * _19.CB0.RefractionBias_FadeDistance_GlowFactor.y;
-    float _165 = clamp(1.0 - _163, 0.0, 1.0);
-    float2 _166 = in.IN_Uv_EdgeDistance1.xy * 1.0;
-    bool _173;
-    float4 _193;
-    do
-    {
-        _173 = 0.0 == 0.0;
-        if (_173)
-        {
-            _193 = DiffuseMapTexture.sample(DiffuseMapSampler, _166);
-            break;
-        }
-        else
-        {
-            float _180 = 1.0 / (1.0 - 0.0);
-            _193 = mix(DiffuseMapTexture.sample(DiffuseMapSampler, (_166 * 0.25)), DiffuseMapTexture.sample(DiffuseMapSampler, _166), float4(clamp((clamp(1.0 - (_146.View_Depth.w * 0.00333332992158830165863037109375), 0.0, 1.0) * _180) - (0.0 * _180), 0.0, 1.0)));
-            break;
-        }
-        _193 = _192;
-        break;
-    } while (false);
-    float4 _220;
-    do
-    {
-        if (_173)
-        {
-            _220 = NormalMapTexture.sample(NormalMapSampler, _166);
-            break;
-        }
-        else
-        {
-            float _207 = 1.0 / (1.0 - 0.0);
-            _220 = mix(NormalMapTexture.sample(NormalMapSampler, (_166 * 0.25)), NormalMapTexture.sample(NormalMapSampler, _166), float4(clamp((_165 * _207) - (0.0 * _207), 0.0, 1.0)));
-            break;
-        }
-        _220 = _219;
-        break;
-    } while (false);
-    float2 _223 = float2(1.0);
-    float2 _224 = (_220.wy * 2.0) - _223;
-    float3 _232 = float3(_224, sqrt(clamp(1.0 + dot(-_224, _224), 0.0, 1.0)));
-    float2 _240 = (NormalDetailMapTexture.sample(NormalDetailMapSampler, (_166 * 0.0)).wy * 2.0) - _223;
-    float2 _252 = _232.xy + (float3(_240, sqrt(clamp(1.0 + dot(-_240, _240), 0.0, 1.0))).xy * 0.0);
-    float3 _253 = float3(_252.x, _252.y, _232.z);
-    float2 _255 = _253.xy * _165;
-    float3 _256 = float3(_255.x, _255.y, _253.z);
-    float3 _271 = ((in.IN_Color.xyz * (_193 * 1.0).xyz) * (1.0 + (_256.x * 0.300000011920928955078125))) * (StudsMapTexture.sample(StudsMapSampler, _156.UvStuds).x * 2.0);
-    float4 _298;
-    do
-    {
-        if (0.75 == 0.0)
-        {
-            _298 = SpecularMapTexture.sample(SpecularMapSampler, _166);
-            break;
-        }
-        else
-        {
-            float _285 = 1.0 / (1.0 - 0.75);
-            _298 = mix(SpecularMapTexture.sample(SpecularMapSampler, (_166 * 0.25)), SpecularMapTexture.sample(SpecularMapSampler, _166), float4(clamp((_165 * _285) - (0.75 * _285), 0.0, 1.0)));
-            break;
-        }
-        _298 = _297;
-        break;
-    } while (false);
-    float2 _303 = mix(float2(0.800000011920928955078125, 120.0), (_298.xy * float2(2.0, 256.0)) + float2(0.0, 0.00999999977648258209228515625), float2(_165));
-    Surface _304 = _125;
-    _304.albedo = _271;
-    Surface _305 = _304;
-    _305.normal = _256;
-    float _306 = _303.x;
-    Surface _307 = _305;
-    _307.specular = _306;
-    float _308 = _303.y;
-    Surface _309 = _307;
-    _309.gloss = _308;
-    float _312 = (_298.xy.y * _165) * 0.0;
-    Surface _313 = _309;
-    _313.reflectance = _312;
-    float4 _318 = float4(_271, _146.Color.w);
-    float3 _329 = normalize(((in.IN_Tangent * _313.normal.x) + (cross(in.IN_Normal_SpecPower.xyz, in.IN_Tangent) * _313.normal.y)) + (in.IN_Normal_SpecPower.xyz * _313.normal.z));
-    float3 _332 = -_19.CB0.Lamp0Dir;
-    float _333 = dot(_329, _332);
-    float _357 = clamp(dot(step(_19.CB0.LightConfig3.xyz, abs(in.IN_LightPosition_Fog.xyz - _19.CB0.LightConfig2.xyz)), float3(1.0)), 0.0, 1.0);
-    float4 _368 = mix(LightMapTexture.sample(LightMapSampler, (in.IN_LightPosition_Fog.xyz.yzx - (in.IN_LightPosition_Fog.xyz.yzx * _357))), _19.CB0.LightBorder, float4(_357));
-    float2 _376 = ShadowMapTexture.sample(ShadowMapSampler, in.IN_PosLightSpace_Reflectance.xyz.xy).xy;
-    float _392 = (1.0 - (((step(_376.x, in.IN_PosLightSpace_Reflectance.xyz.z) * clamp(9.0 - (20.0 * abs(in.IN_PosLightSpace_Reflectance.xyz.z - 0.5)), 0.0, 1.0)) * _376.y) * _19.CB0.OutlineBrightness_ShadowInfo.w)) * _368.w;
-    float3 _403 = mix(_318.xyz, EnvironmentMapTexture.sample(EnvironmentMapSampler, reflect(-in.IN_View_Depth.xyz, _329)).xyz, float3(_312));
-    float4 _404 = float4(_403.x, _403.y, _403.z, _318.w);
-    float3 _422 = (((_19.CB0.AmbientColor + (((_19.CB0.Lamp0Color * clamp(_333, 0.0, 1.0)) + (_19.CB0.Lamp1Color * max(-_333, 0.0))) * _392)) + _368.xyz) * _404.xyz) + (_19.CB0.Lamp0Color * (((step(0.0, _333) * _306) * _392) * pow(clamp(dot(_329, normalize(_332 + normalize(in.IN_View_Depth.xyz))), 0.0, 1.0), _308)));
-    float4 _425 = float4(_422.x, _422.y, _422.z, _124.w);
-    _425.w = _404.w;
-    float2 _435 = min(in.IN_Uv_EdgeDistance1.wz, in.IN_UvStuds_EdgeDistance2.wz);
-    float _439 = min(_435.x, _435.y) / _163;
-    float3 _445 = _425.xyz * clamp((clamp((_163 * _19.CB0.OutlineBrightness_ShadowInfo.x) + _19.CB0.OutlineBrightness_ShadowInfo.y, 0.0, 1.0) * (1.5 - _439)) + _439, 0.0, 1.0);
-    float4 _446 = float4(_445.x, _445.y, _445.z, _425.w);
-    float3 _453 = mix(_19.CB0.FogColor, _446.xyz, float3(clamp(_146.LightPosition_Fog.w, 0.0, 1.0)));
-    out._entryPointOutput = float4(_453.x, _453.y, _453.z, _446.w);
-    return out;
-}
-
diff --git a/reference/shaders-msl/asm/frag/srem.asm.frag b/reference/shaders-msl/asm/frag/srem.asm.frag
new file mode 100644
index 0000000000..f0cdd574de
--- /dev/null
+++ b/reference/shaders-msl/asm/frag/srem.asm.frag
@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    int4 vA [[user(locn0)]];
+    int4 vB [[user(locn1)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    out.FragColor = float4(in.vA - in.vB * (in.vA / in.vB));
+    return out;
+}
+
diff --git a/reference/shaders-msl/asm/frag/texel-fetch-no-lod.asm.frag b/reference/shaders-msl/asm/frag/texel-fetch-no-lod.asm.frag
new file mode 100644
index 0000000000..dd308c32ad
--- /dev/null
+++ b/reference/shaders-msl/asm/frag/texel-fetch-no-lod.asm.frag
@@ -0,0 +1,17 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+fragment main0_out main0(texture2d<float> uTexture [[texture(0)]], sampler uTextureSmplr [[sampler(0)]], float4 gl_FragCoord [[position]])
+{
+    main0_out out = {};
+    out.FragColor = uTexture.read(uint2(int2(gl_FragCoord.xy)), 0);
+    return out;
+}
+
diff --git a/reference/shaders-msl/asm/frag/unreachable.asm.frag b/reference/shaders-msl/asm/frag/unreachable.asm.frag
index 7a98487221..7ae4aa5e60 100644
--- a/reference/shaders-msl/asm/frag/unreachable.asm.frag
+++ b/reference/shaders-msl/asm/frag/unreachable.asm.frag
@@ -5,16 +5,16 @@ using namespace metal;
 
 constant float4 _21 = {};
 
-struct main0_in
-{
-    int counter [[user(locn0)]];
-};
-
 struct main0_out
 {
     float4 FragColor [[color(0)]];
 };
 
+struct main0_in
+{
+    int counter [[user(locn0)]];
+};
+
 fragment main0_out main0(main0_in in [[stage_in]])
 {
     main0_out out = {};
diff --git a/reference/shaders-msl/asm/frag/vector-shuffle-oom.asm.frag b/reference/shaders-msl/asm/frag/vector-shuffle-oom.asm.frag
index 97daea5d90..9f9b827ce3 100644
--- a/reference/shaders-msl/asm/frag/vector-shuffle-oom.asm.frag
+++ b/reference/shaders-msl/asm/frag/vector-shuffle-oom.asm.frag
@@ -95,7 +95,7 @@ struct main0_out
     float4 m_5 [[color(0)]];
 };
 
-fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buffer(1)]], constant _18& _19 [[buffer(2)]], texture2d<float> _8 [[texture(0)]], texture2d<float> _12 [[texture(1)]], texture2d<float> _14 [[texture(2)]], sampler _9 [[sampler(0)]], sampler _13 [[sampler(1)]], sampler _15 [[sampler(2)]], float4 gl_FragCoord [[position]])
+fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _18& _19 [[buffer(1)]], constant _10& _11 [[buffer(2)]], texture2d<float> _14 [[texture(4)]], texture2d<float> _12 [[texture(13)]], texture2d<float> _8 [[texture(14)]], sampler _15 [[sampler(3)]], sampler _13 [[sampler(5)]], sampler _9 [[sampler(6)]], float4 gl_FragCoord [[position]])
 {
     main0_out out = {};
     _28 _77 = _74;
@@ -103,7 +103,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff
     float2 _82 = gl_FragCoord.xy * _19._m23.xy;
     float4 _88 = _7._m2 * _7._m0.xyxy;
     float2 _97 = clamp(_82 + (float3(0.0, -2.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
-    float3 _109 = _11._m5 * clamp(_8.sample(_9, _97, level(0.0)).w * _7._m1, 0.0, 1.0);
+    float3 _109 = float3(_11._m5) * clamp(_8.sample(_9, _97, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _113 = _12.sample(_13, _97, level(0.0));
     float3 _129;
     if (_113.y > 0.0)
@@ -114,12 +114,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff
     {
         _129 = _109;
     }
-    float3 _133 = float4(0.0).xyz + (_129 * 0.5);
+    float3 _130 = _129 * 0.5;
+    float3 _133 = float4(0.0).xyz + _130;
     float4 _134 = float4(_133.x, _133.y, _133.z, float4(0.0).w);
     _28 _135 = _77;
     _135._m0 = _134;
     float2 _144 = clamp(_82 + (float3(-1.0, -1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
-    float3 _156 = _11._m5 * clamp(_8.sample(_9, _144, level(0.0)).w * _7._m1, 0.0, 1.0);
+    float3 _156 = float3(_11._m5) * clamp(_8.sample(_9, _144, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _160 = _12.sample(_13, _144, level(0.0));
     float3 _176;
     if (_160.y > 0.0)
@@ -130,12 +131,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff
     {
         _176 = _156;
     }
-    float3 _180 = _134.xyz + (_176 * 0.5);
+    float3 _177 = _176 * 0.5;
+    float3 _180 = _134.xyz + _177;
     float4 _181 = float4(_180.x, _180.y, _180.z, _134.w);
     _28 _182 = _135;
     _182._m0 = _181;
     float2 _191 = clamp(_82 + (float3(0.0, -1.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw);
-    float3 _203 = _11._m5 * clamp(_8.sample(_9, _191, level(0.0)).w * _7._m1, 0.0, 1.0);
+    float3 _203 = float3(_11._m5) * clamp(_8.sample(_9, _191, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _207 = _12.sample(_13, _191, level(0.0));
     float3 _223;
     if (_207.y > 0.0)
@@ -146,12 +148,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff
     {
         _223 = _203;
     }
-    float3 _227 = _181.xyz + (_223 * 0.75);
+    float3 _224 = _223 * 0.75;
+    float3 _227 = _181.xyz + _224;
     float4 _228 = float4(_227.x, _227.y, _227.z, _181.w);
     _28 _229 = _182;
     _229._m0 = _228;
     float2 _238 = clamp(_82 + (float3(1.0, -1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
-    float3 _250 = _11._m5 * clamp(_8.sample(_9, _238, level(0.0)).w * _7._m1, 0.0, 1.0);
+    float3 _250 = float3(_11._m5) * clamp(_8.sample(_9, _238, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _254 = _12.sample(_13, _238, level(0.0));
     float3 _270;
     if (_254.y > 0.0)
@@ -162,12 +165,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff
     {
         _270 = _250;
     }
-    float3 _274 = _228.xyz + (_270 * 0.5);
+    float3 _271 = _270 * 0.5;
+    float3 _274 = _228.xyz + _271;
     float4 _275 = float4(_274.x, _274.y, _274.z, _228.w);
     _28 _276 = _229;
     _276._m0 = _275;
     float2 _285 = clamp(_82 + (float3(-2.0, 0.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
-    float3 _297 = _11._m5 * clamp(_8.sample(_9, _285, level(0.0)).w * _7._m1, 0.0, 1.0);
+    float3 _297 = float3(_11._m5) * clamp(_8.sample(_9, _285, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _301 = _12.sample(_13, _285, level(0.0));
     float3 _317;
     if (_301.y > 0.0)
@@ -178,12 +182,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff
     {
         _317 = _297;
     }
-    float3 _321 = _275.xyz + (_317 * 0.5);
+    float3 _318 = _317 * 0.5;
+    float3 _321 = _275.xyz + _318;
     float4 _322 = float4(_321.x, _321.y, _321.z, _275.w);
     _28 _323 = _276;
     _323._m0 = _322;
     float2 _332 = clamp(_82 + (float3(-1.0, 0.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw);
-    float3 _344 = _11._m5 * clamp(_8.sample(_9, _332, level(0.0)).w * _7._m1, 0.0, 1.0);
+    float3 _344 = float3(_11._m5) * clamp(_8.sample(_9, _332, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _348 = _12.sample(_13, _332, level(0.0));
     float3 _364;
     if (_348.y > 0.0)
@@ -194,12 +199,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff
     {
         _364 = _344;
     }
-    float3 _368 = _322.xyz + (_364 * 0.75);
+    float3 _365 = _364 * 0.75;
+    float3 _368 = _322.xyz + _365;
     float4 _369 = float4(_368.x, _368.y, _368.z, _322.w);
     _28 _370 = _323;
     _370._m0 = _369;
     float2 _379 = clamp(_82 + (float3(0.0, 0.0, 1.0).xy * _7._m0.xy), _88.xy, _88.zw);
-    float3 _391 = _11._m5 * clamp(_8.sample(_9, _379, level(0.0)).w * _7._m1, 0.0, 1.0);
+    float3 _391 = float3(_11._m5) * clamp(_8.sample(_9, _379, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _395 = _12.sample(_13, _379, level(0.0));
     float3 _411;
     if (_395.y > 0.0)
@@ -210,12 +216,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff
     {
         _411 = _391;
     }
-    float3 _415 = _369.xyz + (_411 * 1.0);
+    float3 _412 = _411 * 1.0;
+    float3 _415 = _369.xyz + _412;
     float4 _416 = float4(_415.x, _415.y, _415.z, _369.w);
     _28 _417 = _370;
     _417._m0 = _416;
     float2 _426 = clamp(_82 + (float3(1.0, 0.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw);
-    float3 _438 = _11._m5 * clamp(_8.sample(_9, _426, level(0.0)).w * _7._m1, 0.0, 1.0);
+    float3 _438 = float3(_11._m5) * clamp(_8.sample(_9, _426, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _442 = _12.sample(_13, _426, level(0.0));
     float3 _458;
     if (_442.y > 0.0)
@@ -226,12 +233,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff
     {
         _458 = _438;
     }
-    float3 _462 = _416.xyz + (_458 * 0.75);
+    float3 _459 = _458 * 0.75;
+    float3 _462 = _416.xyz + _459;
     float4 _463 = float4(_462.x, _462.y, _462.z, _416.w);
     _28 _464 = _417;
     _464._m0 = _463;
     float2 _473 = clamp(_82 + (float3(2.0, 0.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
-    float3 _485 = _11._m5 * clamp(_8.sample(_9, _473, level(0.0)).w * _7._m1, 0.0, 1.0);
+    float3 _485 = float3(_11._m5) * clamp(_8.sample(_9, _473, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _489 = _12.sample(_13, _473, level(0.0));
     float3 _505;
     if (_489.y > 0.0)
@@ -242,12 +250,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff
     {
         _505 = _485;
     }
-    float3 _509 = _463.xyz + (_505 * 0.5);
+    float3 _506 = _505 * 0.5;
+    float3 _509 = _463.xyz + _506;
     float4 _510 = float4(_509.x, _509.y, _509.z, _463.w);
     _28 _511 = _464;
     _511._m0 = _510;
     float2 _520 = clamp(_82 + (float3(-1.0, 1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
-    float3 _532 = _11._m5 * clamp(_8.sample(_9, _520, level(0.0)).w * _7._m1, 0.0, 1.0);
+    float3 _532 = float3(_11._m5) * clamp(_8.sample(_9, _520, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _536 = _12.sample(_13, _520, level(0.0));
     float3 _552;
     if (_536.y > 0.0)
@@ -258,12 +267,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff
     {
         _552 = _532;
     }
-    float3 _556 = _510.xyz + (_552 * 0.5);
+    float3 _553 = _552 * 0.5;
+    float3 _556 = _510.xyz + _553;
     float4 _557 = float4(_556.x, _556.y, _556.z, _510.w);
     _28 _558 = _511;
     _558._m0 = _557;
     float2 _567 = clamp(_82 + (float3(0.0, 1.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw);
-    float3 _579 = _11._m5 * clamp(_8.sample(_9, _567, level(0.0)).w * _7._m1, 0.0, 1.0);
+    float3 _579 = float3(_11._m5) * clamp(_8.sample(_9, _567, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _583 = _12.sample(_13, _567, level(0.0));
     float3 _599;
     if (_583.y > 0.0)
@@ -274,12 +284,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff
     {
         _599 = _579;
     }
-    float3 _603 = _557.xyz + (_599 * 0.75);
+    float3 _600 = _599 * 0.75;
+    float3 _603 = _557.xyz + _600;
     float4 _604 = float4(_603.x, _603.y, _603.z, _557.w);
     _28 _605 = _558;
     _605._m0 = _604;
     float2 _614 = clamp(_82 + (float3(1.0, 1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
-    float3 _626 = _11._m5 * clamp(_8.sample(_9, _614, level(0.0)).w * _7._m1, 0.0, 1.0);
+    float3 _626 = float3(_11._m5) * clamp(_8.sample(_9, _614, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _630 = _12.sample(_13, _614, level(0.0));
     float3 _646;
     if (_630.y > 0.0)
@@ -290,12 +301,13 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff
     {
         _646 = _626;
     }
-    float3 _650 = _604.xyz + (_646 * 0.5);
+    float3 _647 = _646 * 0.5;
+    float3 _650 = _604.xyz + _647;
     float4 _651 = float4(_650.x, _650.y, _650.z, _604.w);
     _28 _652 = _605;
     _652._m0 = _651;
     float2 _661 = clamp(_82 + (float3(0.0, 2.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
-    float3 _673 = _11._m5 * clamp(_8.sample(_9, _661, level(0.0)).w * _7._m1, 0.0, 1.0);
+    float3 _673 = float3(_11._m5) * clamp(_8.sample(_9, _661, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _677 = _12.sample(_13, _661, level(0.0));
     float3 _693;
     if (_677.y > 0.0)
diff --git a/reference/shaders-msl/asm/vert/packing-test.asm.vert b/reference/shaders-msl/asm/vert/packing-test.asm.vert
new file mode 100644
index 0000000000..ac2d30c8df
--- /dev/null
+++ b/reference/shaders-msl/asm/vert/packing-test.asm.vert
@@ -0,0 +1,19 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct TestStruct
+{
+    float4x4 transforms[6];
+};
+
+struct CB0
+{
+    TestStruct CB0[16];
+};
+
+vertex void main0()
+{
+}
+
diff --git a/reference/shaders-msl/asm/vert/spec-constant-op-composite.asm.vert b/reference/shaders-msl/asm/vert/spec-constant-op-composite.asm.vert
new file mode 100644
index 0000000000..67485d7324
--- /dev/null
+++ b/reference/shaders-msl/asm/vert/spec-constant-op-composite.asm.vert
@@ -0,0 +1,37 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+constant int _7_tmp [[function_constant(201)]];
+constant int _7 = is_function_constant_defined(_7_tmp) ? _7_tmp : -10;
+constant uint _8_tmp [[function_constant(202)]];
+constant uint _8 = is_function_constant_defined(_8_tmp) ? _8_tmp : 100u;
+constant float _9_tmp [[function_constant(200)]];
+constant float _9 = is_function_constant_defined(_9_tmp) ? _9_tmp : 3.141590118408203125;
+constant int _20 = (_7 + 2);
+constant uint _25 = (_8 % 5u);
+constant int4 _30 = int4(20, 30, _20, _20);
+constant int2 _32 = int2(_30.y, _30.x);
+constant int _33 = _30.y;
+
+struct main0_out
+{
+    int m_4 [[user(locn0)]];
+    float4 gl_Position [[position]];
+};
+
+vertex main0_out main0()
+{
+    main0_out out = {};
+    float4 pos = float4(0.0);
+    pos.y += float(_20);
+    pos.z += float(_25);
+    pos += float4(_30);
+    float2 _56 = pos.xy + float2(_32);
+    pos = float4(_56.x, _56.y, pos.z, pos.w);
+    out.gl_Position = pos;
+    out.m_4 = _33;
+    return out;
+}
+
diff --git a/reference/shaders-msl/comp/access-private-workgroup-in-function.comp b/reference/shaders-msl/comp/access-private-workgroup-in-function.comp
new file mode 100644
index 0000000000..17acda9678
--- /dev/null
+++ b/reference/shaders-msl/comp/access-private-workgroup-in-function.comp
@@ -0,0 +1,34 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+void set_f(thread int& f)
+{
+    f = 40;
+}
+
+void set_shared_u(threadgroup int& u)
+{
+    u = 50;
+}
+
+kernel void main0(uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]])
+{
+    threadgroup int u;
+    int f;
+    set_f(f);
+    set_shared_u(u);
+    if (gl_LocalInvocationIndex == 0u)
+    {
+        f = 10;
+    }
+    else
+    {
+        f = 30;
+        u = 20;
+    }
+}
+
diff --git a/reference/shaders-msl/comp/atomic.comp b/reference/shaders-msl/comp/atomic.comp
index 90a39ec643..f77922aca0 100644
--- a/reference/shaders-msl/comp/atomic.comp
+++ b/reference/shaders-msl/comp/atomic.comp
@@ -12,25 +12,59 @@ struct SSBO
     int i32;
 };
 
-kernel void main0(device SSBO& ssbo [[buffer(0)]])
+kernel void main0(device SSBO& ssbo [[buffer(2)]])
 {
-    uint _16 = atomic_fetch_add_explicit((volatile device atomic_uint*)&(ssbo.u32), 1u, memory_order_relaxed);
-    uint _18 = atomic_fetch_or_explicit((volatile device atomic_uint*)&(ssbo.u32), 1u, memory_order_relaxed);
-    uint _20 = atomic_fetch_xor_explicit((volatile device atomic_uint*)&(ssbo.u32), 1u, memory_order_relaxed);
-    uint _22 = atomic_fetch_and_explicit((volatile device atomic_uint*)&(ssbo.u32), 1u, memory_order_relaxed);
-    uint _24 = atomic_fetch_min_explicit((volatile device atomic_uint*)&(ssbo.u32), 1u, memory_order_relaxed);
-    uint _26 = atomic_fetch_max_explicit((volatile device atomic_uint*)&(ssbo.u32), 1u, memory_order_relaxed);
-    uint _28 = atomic_exchange_explicit((volatile device atomic_uint*)&(ssbo.u32), 1u, memory_order_relaxed);
-    uint _30 = 10u;
-    uint _32 = atomic_compare_exchange_weak_explicit((volatile device atomic_uint*)&(ssbo.u32), &(_30), 2u, memory_order_relaxed, memory_order_relaxed);
-    int _36 = atomic_fetch_add_explicit((volatile device atomic_int*)&(ssbo.i32), 1, memory_order_relaxed);
-    int _38 = atomic_fetch_or_explicit((volatile device atomic_int*)&(ssbo.i32), 1, memory_order_relaxed);
-    int _40 = atomic_fetch_xor_explicit((volatile device atomic_int*)&(ssbo.i32), 1, memory_order_relaxed);
-    int _42 = atomic_fetch_and_explicit((volatile device atomic_int*)&(ssbo.i32), 1, memory_order_relaxed);
-    int _44 = atomic_fetch_min_explicit((volatile device atomic_int*)&(ssbo.i32), 1, memory_order_relaxed);
-    int _46 = atomic_fetch_max_explicit((volatile device atomic_int*)&(ssbo.i32), 1, memory_order_relaxed);
-    int _48 = atomic_exchange_explicit((volatile device atomic_int*)&(ssbo.i32), 1, memory_order_relaxed);
-    int _50 = 10;
-    int _52 = atomic_compare_exchange_weak_explicit((volatile device atomic_int*)&(ssbo.i32), &(_50), 2, memory_order_relaxed, memory_order_relaxed);
+    threadgroup uint shared_u32;
+    threadgroup int shared_i32;
+    uint _16 = atomic_fetch_add_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
+    uint _18 = atomic_fetch_or_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
+    uint _20 = atomic_fetch_xor_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
+    uint _22 = atomic_fetch_and_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
+    uint _24 = atomic_fetch_min_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
+    uint _26 = atomic_fetch_max_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
+    uint _28 = atomic_exchange_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
+    uint _32;
+    do
+    {
+        _32 = 10u;
+    } while (!atomic_compare_exchange_weak_explicit((volatile device atomic_uint*)&ssbo.u32, &_32, 2u, memory_order_relaxed, memory_order_relaxed));
+    int _36 = atomic_fetch_add_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
+    int _38 = atomic_fetch_or_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
+    int _40 = atomic_fetch_xor_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
+    int _42 = atomic_fetch_and_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
+    int _44 = atomic_fetch_min_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
+    int _46 = atomic_fetch_max_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
+    int _48 = atomic_exchange_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
+    int _52;
+    do
+    {
+        _52 = 10;
+    } while (!atomic_compare_exchange_weak_explicit((volatile device atomic_int*)&ssbo.i32, &_52, 2, memory_order_relaxed, memory_order_relaxed));
+    shared_u32 = 10u;
+    shared_i32 = 10;
+    uint _57 = atomic_fetch_add_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
+    uint _58 = atomic_fetch_or_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
+    uint _59 = atomic_fetch_xor_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
+    uint _60 = atomic_fetch_and_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
+    uint _61 = atomic_fetch_min_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
+    uint _62 = atomic_fetch_max_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
+    uint _63 = atomic_exchange_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
+    uint _64;
+    do
+    {
+        _64 = 10u;
+    } while (!atomic_compare_exchange_weak_explicit((volatile threadgroup atomic_uint*)&shared_u32, &_64, 2u, memory_order_relaxed, memory_order_relaxed));
+    int _65 = atomic_fetch_add_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
+    int _66 = atomic_fetch_or_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
+    int _67 = atomic_fetch_xor_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
+    int _68 = atomic_fetch_and_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
+    int _69 = atomic_fetch_min_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
+    int _70 = atomic_fetch_max_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
+    int _71 = atomic_exchange_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
+    int _72;
+    do
+    {
+        _72 = 10;
+    } while (!atomic_compare_exchange_weak_explicit((volatile threadgroup atomic_int*)&shared_i32, &_72, 2, memory_order_relaxed, memory_order_relaxed));
 }
 
diff --git a/reference/shaders-msl/comp/bake_gradient.comp b/reference/shaders-msl/comp/bake_gradient.comp
deleted file mode 100644
index 1118f18f8e..0000000000
--- a/reference/shaders-msl/comp/bake_gradient.comp
+++ /dev/null
@@ -1,40 +0,0 @@
-#pragma clang diagnostic ignored "-Wmissing-prototypes"
-
-#include <metal_stdlib>
-#include <simd/simd.h>
-
-using namespace metal;
-
-constant uint3 gl_WorkGroupSize = uint3(8u, 8u, 1u);
-
-struct UBO
-{
-    float4 uInvSize;
-    float4 uScale;
-};
-
-float jacobian(thread const float2& dDdx, thread const float2& dDdy)
-{
-    return ((1.0 + dDdx.x) * (1.0 + dDdy.y)) - (dDdx.y * dDdy.x);
-}
-
-kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], constant UBO& _46 [[buffer(0)]], texture2d<float> uHeight [[texture(0)]], sampler uHeightSmplr [[sampler(0)]], texture2d<float> uDisplacement [[texture(1)]], sampler uDisplacementSmplr [[sampler(1)]], texture2d<float, access::write> iHeightDisplacement [[texture(2)]], texture2d<float, access::write> iGradJacobian [[texture(3)]])
-{
-    float4 uv = (float2(gl_GlobalInvocationID.xy) * _46.uInvSize.xy).xyxy + (_46.uInvSize * 0.5);
-    float h = uHeight.sample(uHeightSmplr, uv.xy, level(0.0)).x;
-    float x0 = uHeight.sample(uHeightSmplr, uv.xy, level(0.0), int2(-1, 0)).x;
-    float x1 = uHeight.sample(uHeightSmplr, uv.xy, level(0.0), int2(1, 0)).x;
-    float y0 = uHeight.sample(uHeightSmplr, uv.xy, level(0.0), int2(0, -1)).x;
-    float y1 = uHeight.sample(uHeightSmplr, uv.xy, level(0.0), int2(0, 1)).x;
-    float2 grad = (_46.uScale.xy * 0.5) * float2(x1 - x0, y1 - y0);
-    float2 displacement = uDisplacement.sample(uDisplacementSmplr, uv.zw, level(0.0)).xy * 1.2000000476837158203125;
-    float2 dDdx = (uDisplacement.sample(uDisplacementSmplr, uv.zw, level(0.0), int2(1, 0)).xy - uDisplacement.sample(uDisplacementSmplr, uv.zw, level(0.0), int2(-1, 0)).xy) * 0.60000002384185791015625;
-    float2 dDdy = (uDisplacement.sample(uDisplacementSmplr, uv.zw, level(0.0), int2(0, 1)).xy - uDisplacement.sample(uDisplacementSmplr, uv.zw, level(0.0), int2(0, -1)).xy) * 0.60000002384185791015625;
-    float2 param = dDdx * _46.uScale.z;
-    float2 param_1 = dDdy * _46.uScale.z;
-    float j = jacobian(param, param_1);
-    displacement = float2(0.0);
-    iHeightDisplacement.write(float4(h, displacement, 0.0), uint2(int2(gl_GlobalInvocationID.xy)));
-    iGradJacobian.write(float4(grad, j, 0.0), uint2(int2(gl_GlobalInvocationID.xy)));
-}
-
diff --git a/reference/shaders-msl/comp/basic.comp b/reference/shaders-msl/comp/basic.comp
index 732b1cb257..6410894ba0 100644
--- a/reference/shaders-msl/comp/basic.comp
+++ b/reference/shaders-msl/comp/basic.comp
@@ -21,13 +21,13 @@ struct SSBO3
     uint counter;
 };
 
-kernel void main0(device SSBO& _23 [[buffer(0)]], device SSBO2& _45 [[buffer(1)]], device SSBO3& _48 [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+kernel void main0(const device SSBO& _23 [[buffer(0)]], device SSBO2& _45 [[buffer(1)]], device SSBO3& _48 [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
     uint ident = gl_GlobalInvocationID.x;
     float4 idata = _23.in_data[ident];
     if (dot(idata, float4(1.0, 5.0, 6.0, 2.0)) > 8.19999980926513671875)
     {
-        uint _52 = atomic_fetch_add_explicit((volatile device atomic_uint*)&(_48.counter), 1u, memory_order_relaxed);
+        uint _52 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_48.counter, 1u, memory_order_relaxed);
         _45.out_data[_52] = idata;
     }
 }
diff --git a/reference/shaders-msl/comp/bitfield.noopt.comp b/reference/shaders-msl/comp/bitfield.noopt.comp
deleted file mode 100644
index 62ef02c997..0000000000
--- a/reference/shaders-msl/comp/bitfield.noopt.comp
+++ /dev/null
@@ -1,47 +0,0 @@
-#pragma clang diagnostic ignored "-Wmissing-prototypes"
-
-#include <metal_stdlib>
-#include <simd/simd.h>
-
-using namespace metal;
-
-// Implementation of the GLSL findLSB() function
-template<typename T>
-T findLSB(T x)
-{
-    return select(ctz(x), T(-1), x == T(0));
-}
-
-// Implementation of the signed GLSL findMSB() function
-template<typename T>
-T findSMSB(T x)
-{
-    T v = select(x, T(-1) - x, x < T(0));
-    return select(clz(T(0)) - (clz(v) + T(1)), T(-1), v == T(0));
-}
-
-// Implementation of the unsigned GLSL findMSB() function
-template<typename T>
-T findUMSB(T x)
-{
-    return select(clz(T(0)) - (clz(x) + T(1)), T(-1), x == T(0));
-}
-
-kernel void main0()
-{
-    int signed_value = 0;
-    uint unsigned_value = 0u;
-    int s = extract_bits(signed_value, 5, 20);
-    uint u = extract_bits(unsigned_value, 6, 21);
-    s = insert_bits(s, 40, 5, 4);
-    u = insert_bits(u, 60u, 5, 4);
-    u = reverse_bits(u);
-    s = reverse_bits(s);
-    int v0 = popcount(u);
-    int v1 = popcount(s);
-    int v2 = findUMSB(u);
-    int v3 = findSMSB(s);
-    int v4 = findLSB(u);
-    int v5 = findLSB(s);
-}
-
diff --git a/reference/shaders-msl/comp/coherent-block.comp b/reference/shaders-msl/comp/coherent-block.comp
index bec9b218c7..963574acd6 100644
--- a/reference/shaders-msl/comp/coherent-block.comp
+++ b/reference/shaders-msl/comp/coherent-block.comp
@@ -8,7 +8,7 @@ struct SSBO
     float4 value;
 };
 
-kernel void main0(device SSBO& _10 [[buffer(0)]])
+kernel void main0(device SSBO& _10 [[buffer(1)]])
 {
     _10.value = float4(20.0);
 }
diff --git a/reference/shaders-msl/comp/coherent-image.comp b/reference/shaders-msl/comp/coherent-image.comp
index 0fe044fb9a..827a247125 100644
--- a/reference/shaders-msl/comp/coherent-image.comp
+++ b/reference/shaders-msl/comp/coherent-image.comp
@@ -8,7 +8,7 @@ struct SSBO
     int4 value;
 };
 
-kernel void main0(device SSBO& _10 [[buffer(0)]], texture2d<int> uImage [[texture(0)]])
+kernel void main0(device SSBO& _10 [[buffer(1)]], texture2d<int> uImage [[texture(3)]])
 {
     _10.value = uImage.read(uint2(int2(10)));
 }
diff --git a/reference/shaders-msl/comp/composite-construct.comp b/reference/shaders-msl/comp/composite-construct.comp
new file mode 100644
index 0000000000..fb4ed1f101
--- /dev/null
+++ b/reference/shaders-msl/comp/composite-construct.comp
@@ -0,0 +1,50 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO0
+{
+    float4 as[1];
+};
+
+struct SSBO1
+{
+    float4 bs[1];
+};
+
+struct Composite
+{
+    float4 a;
+    float4 b;
+};
+
+constant float4 _43[2] = {float4(20.0), float4(40.0)};
+
+// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment.
+template<typename T, uint N>
+void spvArrayCopy(thread T (&dst)[N], thread const T (&src)[N])
+{
+    for (uint i = 0; i < N; dst[i] = src[i], i++);
+}
+
+// An overload for constant arrays.
+template<typename T, uint N>
+void spvArrayCopyConstant(thread T (&dst)[N], constant T (&src)[N])
+{
+    for (uint i = 0; i < N; dst[i] = src[i], i++);
+}
+
+kernel void main0(device SSBO0& _16 [[buffer(0)]], device SSBO1& _32 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]])
+{
+    float4 _37[2] = { _16.as[gl_GlobalInvocationID.x], _32.bs[gl_GlobalInvocationID.x] };
+    float4 values[2];
+    spvArrayCopy(values, _37);
+    float4 copy_values[2] = {float4(20.0), float4(40.0)};
+    Composite c = Composite{ values[0], copy_values[1] };
+    _16.as[0] = values[gl_LocalInvocationIndex];
+    _32.bs[1] = c.b;
+}
+
diff --git a/reference/shaders-msl/comp/culling.comp b/reference/shaders-msl/comp/culling.comp
index ef84f1d19d..1f6bdcbee6 100644
--- a/reference/shaders-msl/comp/culling.comp
+++ b/reference/shaders-msl/comp/culling.comp
@@ -23,13 +23,13 @@ struct SSBO3
     uint count;
 };
 
-kernel void main0(device SSBO& _22 [[buffer(0)]], device SSBO2& _38 [[buffer(1)]], device SSBO3& _41 [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+kernel void main0(const device SSBO& _22 [[buffer(0)]], device SSBO2& _38 [[buffer(1)]], device SSBO3& _41 [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
     uint ident = gl_GlobalInvocationID.x;
     float idata = _22.in_data[ident];
     if (idata > 12.0)
     {
-        uint _45 = atomic_fetch_add_explicit((volatile device atomic_uint*)&(_41.count), 1u, memory_order_relaxed);
+        uint _45 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_41.count, 1u, memory_order_relaxed);
         _38.out_data[_45] = idata;
     }
 }
diff --git a/reference/shaders-msl/comp/dowhile.comp b/reference/shaders-msl/comp/dowhile.comp
index 5decd415ff..3482fb355b 100644
--- a/reference/shaders-msl/comp/dowhile.comp
+++ b/reference/shaders-msl/comp/dowhile.comp
@@ -14,7 +14,7 @@ struct SSBO2
     float4 out_data[1];
 };
 
-kernel void main0(device SSBO& _28 [[buffer(0)]], device SSBO2& _52 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+kernel void main0(const device SSBO& _28 [[buffer(0)]], device SSBO2& _52 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
     uint ident = gl_GlobalInvocationID.x;
     int i = 0;
diff --git a/reference/shaders-msl/comp/image-cube-array-load-store.comp b/reference/shaders-msl/comp/image-cube-array-load-store.comp
new file mode 100644
index 0000000000..ef67a326f5
--- /dev/null
+++ b/reference/shaders-msl/comp/image-cube-array-load-store.comp
@@ -0,0 +1,12 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+kernel void main0(texturecube_array<float> uImageIn [[texture(0)]], texturecube_array<float, access::write> uImageOut [[texture(1)]])
+{
+    int3 coord = int3(9, 7, 11);
+    float4 indata = uImageIn.read(uint2(coord.xy), uint(coord.z) % 6u, uint(coord.z) / 6u);
+    uImageOut.write(indata, uint2(coord.xy), uint(coord.z) % 6u, uint(coord.z) / 6u);
+}
+
diff --git a/reference/shaders-msl/comp/inverse.comp b/reference/shaders-msl/comp/inverse.comp
index 567dba2c21..f2f499b91e 100644
--- a/reference/shaders-msl/comp/inverse.comp
+++ b/reference/shaders-msl/comp/inverse.comp
@@ -114,7 +114,7 @@ float2x2 spvInverse2x2(float2x2 m)
     return (det != 0.0f) ? (adj * (1.0f / det)) : m;
 }
 
-kernel void main0(device MatrixOut& _15 [[buffer(0)]], device MatrixIn& _20 [[buffer(1)]])
+kernel void main0(device MatrixOut& _15 [[buffer(0)]], const device MatrixIn& _20 [[buffer(1)]])
 {
     _15.m2out = spvInverse2x2(_20.m2in);
     _15.m3out = spvInverse3x3(_20.m3in);
diff --git a/reference/shaders-msl/comp/loop.noopt.comp b/reference/shaders-msl/comp/loop.noopt.comp
deleted file mode 100644
index 00ed570b31..0000000000
--- a/reference/shaders-msl/comp/loop.noopt.comp
+++ /dev/null
@@ -1,107 +0,0 @@
-#include <metal_stdlib>
-#include <simd/simd.h>
-
-using namespace metal;
-
-struct SSBO
-{
-    float4x4 mvp;
-    float4 in_data[1];
-};
-
-struct SSBO2
-{
-    float4 out_data[1];
-};
-
-kernel void main0(device SSBO& _24 [[buffer(0)]], device SSBO2& _177 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
-{
-    uint ident = gl_GlobalInvocationID.x;
-    float4 idat = _24.in_data[ident];
-    int k = 0;
-    uint i = 0u;
-    if (idat.y == 20.0)
-    {
-        do
-        {
-            k *= 2;
-            i++;
-        } while (i < ident);
-    }
-    switch (k)
-    {
-        case 10:
-        {
-            for (;;)
-            {
-                i++;
-                if (i > 10u)
-                {
-                    break;
-                }
-                continue;
-            }
-            break;
-        }
-        default:
-        {
-            for (;;)
-            {
-                i += 2u;
-                if (i > 20u)
-                {
-                    break;
-                }
-                continue;
-            }
-            break;
-        }
-    }
-    while (k < 10)
-    {
-        idat *= 2.0;
-        k++;
-    }
-    for (uint i_1 = 0u; i_1 < 16u; i_1++, k++)
-    {
-        for (uint j = 0u; j < 30u; j++)
-        {
-            idat = _24.mvp * idat;
-        }
-    }
-    k = 0;
-    for (;;)
-    {
-        k++;
-        if (k > 10)
-        {
-            k += 2;
-        }
-        else
-        {
-            k += 3;
-            continue;
-        }
-        k += 10;
-        continue;
-    }
-    k = 0;
-    do
-    {
-        k++;
-    } while (k > 10);
-    int l = 0;
-    for (;;)
-    {
-        if (l == 5)
-        {
-            l++;
-            continue;
-        }
-        idat += float4(1.0);
-        l++;
-        continue;
-    }
-    _177.out_data[ident] = idat;
-}
-
diff --git a/reference/shaders-msl/comp/mat3.comp b/reference/shaders-msl/comp/mat3.comp
index c2d9a7c838..475d163bd7 100644
--- a/reference/shaders-msl/comp/mat3.comp
+++ b/reference/shaders-msl/comp/mat3.comp
@@ -8,7 +8,7 @@ struct SSBO2
     float3x3 out_data[1];
 };
 
-kernel void main0(device SSBO2& _22 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+kernel void main0(device SSBO2& _22 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
     uint ident = gl_GlobalInvocationID.x;
     _22.out_data[ident] = float3x3(float3(10.0), float3(20.0), float3(40.0));
diff --git a/reference/shaders-msl/comp/mod.comp b/reference/shaders-msl/comp/mod.comp
index 1a8c5c5fef..e0d290259c 100644
--- a/reference/shaders-msl/comp/mod.comp
+++ b/reference/shaders-msl/comp/mod.comp
@@ -22,7 +22,7 @@ Tx mod(Tx x, Ty y)
     return x - y * floor(x / y);
 }
 
-kernel void main0(device SSBO& _23 [[buffer(0)]], device SSBO2& _33 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+kernel void main0(const device SSBO& _23 [[buffer(0)]], device SSBO2& _33 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
     uint ident = gl_GlobalInvocationID.x;
     float4 v = mod(_23.in_data[ident], _33.out_data[ident]);
diff --git a/reference/shaders-msl/comp/modf.comp b/reference/shaders-msl/comp/modf.comp
index 9abd457cad..ef50a02135 100644
--- a/reference/shaders-msl/comp/modf.comp
+++ b/reference/shaders-msl/comp/modf.comp
@@ -13,7 +13,7 @@ struct SSBO2
     float4 out_data[1];
 };
 
-kernel void main0(device SSBO& _23 [[buffer(0)]], device SSBO2& _35 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+kernel void main0(const device SSBO& _23 [[buffer(0)]], device SSBO2& _35 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
     uint ident = gl_GlobalInvocationID.x;
     float4 i;
diff --git a/reference/shaders-msl/comp/packing-test-1.comp b/reference/shaders-msl/comp/packing-test-1.comp
new file mode 100644
index 0000000000..d98ad59aa7
--- /dev/null
+++ b/reference/shaders-msl/comp/packing-test-1.comp
@@ -0,0 +1,38 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+constant uint3 gl_WorkGroupSize = uint3(32u, 1u, 1u);
+
+struct T1
+{
+    float3 a;
+    float b;
+};
+
+struct T1_1
+{
+    packed_float3 a;
+    float b;
+};
+
+struct Buffer0
+{
+    T1_1 buf0[1];
+};
+
+struct Buffer1
+{
+    float buf1[1];
+};
+
+kernel void main0(device Buffer0& _15 [[buffer(1)]], device Buffer1& _34 [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+{
+    T1 v;
+    v.a = _15.buf0[0].a;
+    v.b = _15.buf0[0].b;
+    float x = v.b;
+    _34.buf1[gl_GlobalInvocationID.x] = x;
+}
+
diff --git a/reference/shaders-msl/comp/packing-test-2.comp b/reference/shaders-msl/comp/packing-test-2.comp
new file mode 100644
index 0000000000..dfccbf863f
--- /dev/null
+++ b/reference/shaders-msl/comp/packing-test-2.comp
@@ -0,0 +1,29 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+constant uint3 gl_WorkGroupSize = uint3(32u, 1u, 1u);
+
+struct T1
+{
+    packed_float3 a;
+    float b;
+};
+
+struct Buffer0
+{
+    T1 buf0[1];
+};
+
+struct Buffer1
+{
+    float buf1[1];
+};
+
+kernel void main0(device Buffer0& _14 [[buffer(1)]], device Buffer1& _24 [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+{
+    float x = _14.buf0[0].b;
+    _24.buf1[gl_GlobalInvocationID.x] = x;
+}
+
diff --git a/reference/shaders-msl/comp/read-write-only.comp b/reference/shaders-msl/comp/read-write-only.comp
index ba53b334ba..42c625092a 100644
--- a/reference/shaders-msl/comp/read-write-only.comp
+++ b/reference/shaders-msl/comp/read-write-only.comp
@@ -21,7 +21,7 @@ struct SSBO1
     float4 data3;
 };
 
-kernel void main0(device SSBO2& _10 [[buffer(0)]], device SSBO0& _15 [[buffer(1)]], device SSBO1& _21 [[buffer(2)]])
+kernel void main0(const device SSBO0& _15 [[buffer(0)]], device SSBO1& _21 [[buffer(1)]], device SSBO2& _10 [[buffer(2)]])
 {
     _10.data4 = _15.data0 + _21.data2;
     _10.data5 = _15.data1 + _21.data3;
diff --git a/reference/shaders-msl/comp/rmw-matrix.comp b/reference/shaders-msl/comp/rmw-matrix.comp
new file mode 100644
index 0000000000..150db7ede9
--- /dev/null
+++ b/reference/shaders-msl/comp/rmw-matrix.comp
@@ -0,0 +1,22 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO
+{
+    float a;
+    float4 b;
+    float4x4 c;
+    float a1;
+    float4 b1;
+    float4x4 c1;
+};
+
+kernel void main0(device SSBO& _11 [[buffer(0)]])
+{
+    _11.a *= _11.a1;
+    _11.b *= _11.b1;
+    _11.c = _11.c * _11.c1;
+}
+
diff --git a/reference/shaders-msl/comp/shared.comp b/reference/shaders-msl/comp/shared.comp
index 5aeaa4f8c1..e296190bdf 100644
--- a/reference/shaders-msl/comp/shared.comp
+++ b/reference/shaders-msl/comp/shared.comp
@@ -15,11 +15,11 @@ struct SSBO2
     float out_data[1];
 };
 
-kernel void main0(device SSBO& _22 [[buffer(0)]], device SSBO2& _44 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]])
+kernel void main0(const device SSBO& _22 [[buffer(0)]], device SSBO2& _44 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]])
 {
+    threadgroup float sShared[4];
     uint ident = gl_GlobalInvocationID.x;
     float idata = _22.in_data[ident];
-    threadgroup float sShared[4];
     sShared[gl_LocalInvocationIndex] = idata;
     threadgroup_barrier(mem_flags::mem_threadgroup);
     _44.out_data[ident] = sShared[(4u - gl_LocalInvocationIndex) - 1u];
diff --git a/reference/shaders-msl/comp/struct-layout.comp b/reference/shaders-msl/comp/struct-layout.comp
index 6c0f929480..3c44fe5416 100644
--- a/reference/shaders-msl/comp/struct-layout.comp
+++ b/reference/shaders-msl/comp/struct-layout.comp
@@ -18,7 +18,7 @@ struct SSBO
     Foo in_data[1];
 };
 
-kernel void main0(device SSBO2& _23 [[buffer(0)]], device SSBO& _30 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+kernel void main0(const device SSBO& _30 [[buffer(0)]], device SSBO2& _23 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
     uint ident = gl_GlobalInvocationID.x;
     _23.out_data[ident].m = _30.in_data[ident].m * _30.in_data[ident].m;
diff --git a/reference/shaders-msl/comp/struct-nested.comp b/reference/shaders-msl/comp/struct-nested.comp
index 93694e31c2..117e492d66 100644
--- a/reference/shaders-msl/comp/struct-nested.comp
+++ b/reference/shaders-msl/comp/struct-nested.comp
@@ -13,12 +13,22 @@ struct s2
     s1 b;
 };
 
-struct dstbuffer
+struct s1_1
 {
-    s2 test[1];
+    int a;
 };
 
-kernel void main0(device dstbuffer& _19 [[buffer(0)]])
+struct s2_1
+{
+    s1_1 b;
+};
+
+struct dstbuffer
+{
+    s2_1 test[1];
+};
+
+kernel void main0(device dstbuffer& _19 [[buffer(1)]])
 {
     s2 testVal;
     testVal.b.a = 0;
diff --git a/reference/shaders-msl/comp/struct-packing.comp b/reference/shaders-msl/comp/struct-packing.comp
index f59cba5b7d..2b37844fe7 100644
--- a/reference/shaders-msl/comp/struct-packing.comp
+++ b/reference/shaders-msl/comp/struct-packing.comp
@@ -67,39 +67,83 @@ struct SSBO1
     float array[1];
 };
 
+struct S0_1
+{
+    float2 a[1];
+    float b;
+};
+
+struct S1_1
+{
+    packed_float3 a;
+    float b;
+};
+
+struct S2_1
+{
+    float3 a[1];
+    float b;
+};
+
+struct S3_1
+{
+    float2 a;
+    float b;
+};
+
+struct S4_1
+{
+    float2 c;
+};
+
+struct Content_1
+{
+    S0_1 m0s[1];
+    S1_1 m1s[1];
+    S2_1 m2s[1];
+    S0_1 m0;
+    S1_1 m1;
+    S2_1 m2;
+    S3_1 m3;
+    char pad7[4];
+    float m4;
+    S4_1 m3s[8];
+};
+
 struct SSBO0
 {
-    Content content;
-    Content content1[2];
-    Content content2;
+    Content_1 content;
+    Content_1 content1[2];
+    Content_1 content2;
     float array[1];
 };
 
-kernel void main0(device SSBO1& ssbo_430 [[buffer(0)]], device SSBO0& ssbo_140 [[buffer(1)]])
+kernel void main0(device SSBO0& ssbo_140 [[buffer(0)]], device SSBO1& ssbo_430 [[buffer(1)]])
 {
-    ssbo_430.content.m0s[0].a[0] = ssbo_140.content.m0s[0].a[0];
-    ssbo_430.content.m0s[0].b = ssbo_140.content.m0s[0].b;
-    ssbo_430.content.m1s[0].a = ssbo_140.content.m1s[0].a;
-    ssbo_430.content.m1s[0].b = ssbo_140.content.m1s[0].b;
-    ssbo_430.content.m2s[0].a[0] = ssbo_140.content.m2s[0].a[0];
-    ssbo_430.content.m2s[0].b = ssbo_140.content.m2s[0].b;
-    ssbo_430.content.m0.a[0] = ssbo_140.content.m0.a[0];
-    ssbo_430.content.m0.b = ssbo_140.content.m0.b;
-    ssbo_430.content.m1.a = ssbo_140.content.m1.a;
-    ssbo_430.content.m1.b = ssbo_140.content.m1.b;
-    ssbo_430.content.m2.a[0] = ssbo_140.content.m2.a[0];
-    ssbo_430.content.m2.b = ssbo_140.content.m2.b;
-    ssbo_430.content.m3.a = ssbo_140.content.m3.a;
-    ssbo_430.content.m3.b = ssbo_140.content.m3.b;
-    ssbo_430.content.m4 = ssbo_140.content.m4;
-    ssbo_430.content.m3s[0].c = ssbo_140.content.m3s[0].c;
-    ssbo_430.content.m3s[1].c = ssbo_140.content.m3s[1].c;
-    ssbo_430.content.m3s[2].c = ssbo_140.content.m3s[2].c;
-    ssbo_430.content.m3s[3].c = ssbo_140.content.m3s[3].c;
-    ssbo_430.content.m3s[4].c = ssbo_140.content.m3s[4].c;
-    ssbo_430.content.m3s[5].c = ssbo_140.content.m3s[5].c;
-    ssbo_430.content.m3s[6].c = ssbo_140.content.m3s[6].c;
-    ssbo_430.content.m3s[7].c = ssbo_140.content.m3s[7].c;
+    Content_1 _60 = ssbo_140.content;
+    ssbo_430.content.m0s[0].a[0] = _60.m0s[0].a[0];
+    ssbo_430.content.m0s[0].b = _60.m0s[0].b;
+    ssbo_430.content.m1s[0].a = _60.m1s[0].a;
+    ssbo_430.content.m1s[0].b = _60.m1s[0].b;
+    ssbo_430.content.m2s[0].a[0] = _60.m2s[0].a[0];
+    ssbo_430.content.m2s[0].b = _60.m2s[0].b;
+    ssbo_430.content.m0.a[0] = _60.m0.a[0];
+    ssbo_430.content.m0.b = _60.m0.b;
+    ssbo_430.content.m1.a = _60.m1.a;
+    ssbo_430.content.m1.b = _60.m1.b;
+    ssbo_430.content.m2.a[0] = _60.m2.a[0];
+    ssbo_430.content.m2.b = _60.m2.b;
+    ssbo_430.content.m3.a = _60.m3.a;
+    ssbo_430.content.m3.b = _60.m3.b;
+    ssbo_430.content.m4 = _60.m4;
+    ssbo_430.content.m3s[0].c = _60.m3s[0].c;
+    ssbo_430.content.m3s[1].c = _60.m3s[1].c;
+    ssbo_430.content.m3s[2].c = _60.m3s[2].c;
+    ssbo_430.content.m3s[3].c = _60.m3s[3].c;
+    ssbo_430.content.m3s[4].c = _60.m3s[4].c;
+    ssbo_430.content.m3s[5].c = _60.m3s[5].c;
+    ssbo_430.content.m3s[6].c = _60.m3s[6].c;
+    ssbo_430.content.m3s[7].c = _60.m3s[7].c;
     ssbo_430.content.m1.a = ssbo_430.content.m3.a * ssbo_430.m6[1][1];
 }
 
diff --git a/reference/shaders-msl/comp/torture-loop.comp b/reference/shaders-msl/comp/torture-loop.comp
index 9257088161..1b65a3afab 100644
--- a/reference/shaders-msl/comp/torture-loop.comp
+++ b/reference/shaders-msl/comp/torture-loop.comp
@@ -14,7 +14,7 @@ struct SSBO2
     float4 out_data[1];
 };
 
-kernel void main0(device SSBO& _24 [[buffer(0)]], device SSBO2& _89 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+kernel void main0(const device SSBO& _24 [[buffer(0)]], device SSBO2& _89 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
     uint ident = gl_GlobalInvocationID.x;
     float4 idat = _24.in_data[ident];
diff --git a/reference/shaders-msl/comp/type-alias.comp b/reference/shaders-msl/comp/type-alias.comp
index d842132995..25a49f59f0 100644
--- a/reference/shaders-msl/comp/type-alias.comp
+++ b/reference/shaders-msl/comp/type-alias.comp
@@ -15,14 +15,24 @@ struct S1
     float4 a;
 };
 
+struct S0_1
+{
+    float4 a;
+};
+
 struct SSBO0
 {
-    S0 s0s[1];
+    S0_1 s0s[1];
+};
+
+struct S1_1
+{
+    float4 a;
 };
 
 struct SSBO1
 {
-    S1 s1s[1];
+    S1_1 s1s[1];
 };
 
 struct SSBO2
diff --git a/reference/shaders-msl/comp/udiv.comp b/reference/shaders-msl/comp/udiv.comp
index 32874ad787..a298ecdb7d 100644
--- a/reference/shaders-msl/comp/udiv.comp
+++ b/reference/shaders-msl/comp/udiv.comp
@@ -13,7 +13,7 @@ struct SSBO
     uint inputs[1];
 };
 
-kernel void main0(device SSBO2& _10 [[buffer(0)]], device SSBO& _23 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+kernel void main0(device SSBO& _23 [[buffer(0)]], device SSBO2& _10 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
     _10.outputs[gl_GlobalInvocationID.x] = _23.inputs[gl_GlobalInvocationID.x] / 29u;
 }
diff --git a/reference/shaders-msl/desktop-only/frag/image-ms.desktop.frag b/reference/shaders-msl/desktop-only/frag/image-ms.desktop.frag
index 7957b209d6..b7b2cc1589 100644
--- a/reference/shaders-msl/desktop-only/frag/image-ms.desktop.frag
+++ b/reference/shaders-msl/desktop-only/frag/image-ms.desktop.frag
@@ -3,7 +3,7 @@
 
 using namespace metal;
 
-fragment void main0(texture2d_ms<float> uImageMS [[texture(0)]], texture2d_array<float, access::read_write> uImageArray [[texture(1)]], texture2d<float, access::write> uImage [[texture(2)]])
+fragment void main0(texture2d<float, access::write> uImage [[texture(0)]], texture2d_array<float, access::read_write> uImageArray [[texture(1)]], texture2d_ms<float> uImageMS [[texture(2)]])
 {
     float4 a = uImageMS.read(uint2(int2(1, 2)), 2);
     float4 b = uImageArray.read(uint2(int3(1, 2, 4).xy), uint(int3(1, 2, 4).z));
diff --git a/reference/shaders-msl/desktop-only/vert/basic.desktop.sso.vert b/reference/shaders-msl/desktop-only/vert/basic.desktop.sso.vert
index 1592b5c5cf..ffb4357126 100644
--- a/reference/shaders-msl/desktop-only/vert/basic.desktop.sso.vert
+++ b/reference/shaders-msl/desktop-only/vert/basic.desktop.sso.vert
@@ -8,18 +8,18 @@ struct UBO
     float4x4 uMVP;
 };
 
-struct main0_in
-{
-    float3 aNormal [[attribute(1)]];
-    float4 aVertex [[attribute(0)]];
-};
-
 struct main0_out
 {
     float3 vNormal [[user(locn0)]];
     float4 gl_Position [[position]];
 };
 
+struct main0_in
+{
+    float4 aVertex [[attribute(0)]];
+    float3 aNormal [[attribute(1)]];
+};
+
 vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _16 [[buffer(0)]])
 {
     main0_out out = {};
diff --git a/reference/shaders-msl/flatten/basic.flatten.vert b/reference/shaders-msl/flatten/basic.flatten.vert
index 1592b5c5cf..ffb4357126 100644
--- a/reference/shaders-msl/flatten/basic.flatten.vert
+++ b/reference/shaders-msl/flatten/basic.flatten.vert
@@ -8,18 +8,18 @@ struct UBO
     float4x4 uMVP;
 };
 
-struct main0_in
-{
-    float3 aNormal [[attribute(1)]];
-    float4 aVertex [[attribute(0)]];
-};
-
 struct main0_out
 {
     float3 vNormal [[user(locn0)]];
     float4 gl_Position [[position]];
 };
 
+struct main0_in
+{
+    float4 aVertex [[attribute(0)]];
+    float3 aNormal [[attribute(1)]];
+};
+
 vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _16 [[buffer(0)]])
 {
     main0_out out = {};
diff --git a/reference/shaders-msl/flatten/multiindex.flatten.vert b/reference/shaders-msl/flatten/multiindex.flatten.vert
index 84c4b408b2..f4549abab2 100644
--- a/reference/shaders-msl/flatten/multiindex.flatten.vert
+++ b/reference/shaders-msl/flatten/multiindex.flatten.vert
@@ -8,16 +8,16 @@ struct UBO
     float4 Data[3][5];
 };
 
-struct main0_in
-{
-    int2 aIndex [[attribute(0)]];
-};
-
 struct main0_out
 {
     float4 gl_Position [[position]];
 };
 
+struct main0_in
+{
+    int2 aIndex [[attribute(0)]];
+};
+
 vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _20 [[buffer(0)]])
 {
     main0_out out = {};
diff --git a/reference/shaders-msl/flatten/push-constant.flatten.vert b/reference/shaders-msl/flatten/push-constant.flatten.vert
index 83def9c0bb..8f2e8c173f 100644
--- a/reference/shaders-msl/flatten/push-constant.flatten.vert
+++ b/reference/shaders-msl/flatten/push-constant.flatten.vert
@@ -10,18 +10,18 @@ struct PushMe
     float Arr[4];
 };
 
-struct main0_in
-{
-    float4 Pos [[attribute(1)]];
-    float2 Rot [[attribute(0)]];
-};
-
 struct main0_out
 {
     float2 vRot [[user(locn0)]];
     float4 gl_Position [[position]];
 };
 
+struct main0_in
+{
+    float2 Rot [[attribute(0)]];
+    float4 Pos [[attribute(1)]];
+};
+
 vertex main0_out main0(main0_in in [[stage_in]], constant PushMe& registers [[buffer(0)]])
 {
     main0_out out = {};
diff --git a/reference/shaders-msl/flatten/rowmajor.flatten.vert b/reference/shaders-msl/flatten/rowmajor.flatten.vert
index 3ea6d78b8a..b5df8b064f 100644
--- a/reference/shaders-msl/flatten/rowmajor.flatten.vert
+++ b/reference/shaders-msl/flatten/rowmajor.flatten.vert
@@ -12,16 +12,16 @@ struct UBO
     float2x4 uMVP;
 };
 
-struct main0_in
-{
-    float4 aVertex [[attribute(0)]];
-};
-
 struct main0_out
 {
     float4 gl_Position [[position]];
 };
 
+struct main0_in
+{
+    float4 aVertex [[attribute(0)]];
+};
+
 // Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization.
 float2x4 spvConvertFromRowMajor2x4(float2x4 m)
 {
diff --git a/reference/shaders-msl/flatten/struct.flatten.vert b/reference/shaders-msl/flatten/struct.flatten.vert
index 75f58e1e29..291b1f7ac6 100644
--- a/reference/shaders-msl/flatten/struct.flatten.vert
+++ b/reference/shaders-msl/flatten/struct.flatten.vert
@@ -16,24 +16,24 @@ struct UBO
     Light light;
 };
 
-struct main0_in
-{
-    float3 aNormal [[attribute(1)]];
-    float4 aVertex [[attribute(0)]];
-};
-
 struct main0_out
 {
     float4 vColor [[user(locn0)]];
     float4 gl_Position [[position]];
 };
 
+struct main0_in
+{
+    float4 aVertex [[attribute(0)]];
+    float3 aNormal [[attribute(1)]];
+};
+
 vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _18 [[buffer(0)]])
 {
     main0_out out = {};
     out.gl_Position = _18.uMVP * in.aVertex;
     out.vColor = float4(0.0);
-    float3 L = in.aVertex.xyz - _18.light.Position;
+    float3 L = in.aVertex.xyz - float3(_18.light.Position);
     out.vColor += ((_18.light.Color * clamp(1.0 - (length(L) / _18.light.Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(L)));
     return out;
 }
diff --git a/reference/shaders-msl/flatten/swizzle.flatten.vert b/reference/shaders-msl/flatten/swizzle.flatten.vert
index 53fc21f99e..05a6bbaeb3 100644
--- a/reference/shaders-msl/flatten/swizzle.flatten.vert
+++ b/reference/shaders-msl/flatten/swizzle.flatten.vert
@@ -39,7 +39,7 @@ vertex main0_out main0(constant UBO& _22 [[buffer(0)]])
     out.oA = _22.A;
     out.oB = float4(_22.B0, _22.B1);
     out.oC = float4(_22.C0, _22.C1) + float4(_22.C1.xy, _22.C1.z, _22.C0);
-    out.oD = float4(_22.D0, _22.D1) + float4(float3(_22.D0).xy, float3(_22.D0).z, _22.D1);
+    out.oD = float4(_22.D0[0], _22.D0[1], _22.D0[2], _22.D1) + float4(float2(_22.D0[0], _22.D0[1]), _22.D0[2u], _22.D1);
     out.oE = float4(_22.E0, _22.E1, _22.E2, _22.E3);
     out.oF = float4(_22.F0, _22.F1, _22.F2);
     return out;
diff --git a/reference/shaders-msl/flatten/types.flatten.frag b/reference/shaders-msl/flatten/types.flatten.frag
index cee53d9e58..540c5baeb1 100644
--- a/reference/shaders-msl/flatten/types.flatten.frag
+++ b/reference/shaders-msl/flatten/types.flatten.frag
@@ -26,7 +26,7 @@ struct main0_out
     float4 FragColor [[color(0)]];
 };
 
-fragment main0_out main0(constant UBO1& _14 [[buffer(0)]], constant UBO2& _29 [[buffer(1)]], constant UBO0& _41 [[buffer(2)]])
+fragment main0_out main0(constant UBO0& _41 [[buffer(0)]], constant UBO1& _14 [[buffer(1)]], constant UBO2& _29 [[buffer(2)]])
 {
     main0_out out = {};
     out.FragColor = ((((float4(_14.c) + float4(_14.d)) + float4(_29.e)) + float4(_29.f)) + _41.a) + _41.b;
diff --git a/reference/shaders-msl/frag/array-lut-no-loop-variable.frag b/reference/shaders-msl/frag/array-lut-no-loop-variable.frag
new file mode 100644
index 0000000000..8943a8e5a1
--- /dev/null
+++ b/reference/shaders-msl/frag/array-lut-no-loop-variable.frag
@@ -0,0 +1,38 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+constant float _17[5] = {1.0, 2.0, 3.0, 4.0, 5.0};
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment.
+template<typename T, uint N>
+void spvArrayCopy(thread T (&dst)[N], thread const T (&src)[N])
+{
+    for (uint i = 0; i < N; dst[i] = src[i], i++);
+}
+
+// An overload for constant arrays.
+template<typename T, uint N>
+void spvArrayCopyConstant(thread T (&dst)[N], constant T (&src)[N])
+{
+    for (uint i = 0; i < N; dst[i] = src[i], i++);
+}
+
+fragment main0_out main0()
+{
+    main0_out out = {};
+    float lut[5] = {1.0, 2.0, 3.0, 4.0, 5.0};
+    for (int i = 0; i < 4; i++, out.FragColor += float4(lut[i]))
+    {
+    }
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/basic.frag b/reference/shaders-msl/frag/basic.frag
index 4d33ee7bca..f33db61eba 100644
--- a/reference/shaders-msl/frag/basic.frag
+++ b/reference/shaders-msl/frag/basic.frag
@@ -3,17 +3,17 @@
 
 using namespace metal;
 
-struct main0_in
-{
-    float2 vTex [[user(locn1)]];
-    float4 vColor [[user(locn0)]];
-};
-
 struct main0_out
 {
     float4 FragColor [[color(0)]];
 };
 
+struct main0_in
+{
+    float4 vColor [[user(locn0)]];
+    float2 vTex [[user(locn1)]];
+};
+
 fragment main0_out main0(main0_in in [[stage_in]], texture2d<float> uTex [[texture(0)]], sampler uTexSmplr [[sampler(0)]])
 {
     main0_out out = {};
diff --git a/reference/shaders-msl/frag/binary-func-unpack-pack-arguments.frag b/reference/shaders-msl/frag/binary-func-unpack-pack-arguments.frag
new file mode 100644
index 0000000000..134cfe1847
--- /dev/null
+++ b/reference/shaders-msl/frag/binary-func-unpack-pack-arguments.frag
@@ -0,0 +1,28 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct UBO
+{
+    packed_float3 color;
+    float v;
+};
+
+struct main0_out
+{
+    float FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float3 vIn [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], constant UBO& _15 [[buffer(0)]])
+{
+    main0_out out = {};
+    out.FragColor = dot(in.vIn, float3(_15.color));
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/binary-unpack-pack-arguments.frag b/reference/shaders-msl/frag/binary-unpack-pack-arguments.frag
new file mode 100644
index 0000000000..8bd538bec6
--- /dev/null
+++ b/reference/shaders-msl/frag/binary-unpack-pack-arguments.frag
@@ -0,0 +1,28 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct UBO
+{
+    packed_float3 color;
+    float v;
+};
+
+struct main0_out
+{
+    float3 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float3 vIn [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], constant UBO& _15 [[buffer(0)]])
+{
+    main0_out out = {};
+    out.FragColor = cross(in.vIn, float3(_15.color) - in.vIn);
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/bitcasting.frag b/reference/shaders-msl/frag/bitcasting.frag
index a2d624510f..475b573a23 100644
--- a/reference/shaders-msl/frag/bitcasting.frag
+++ b/reference/shaders-msl/frag/bitcasting.frag
@@ -3,17 +3,17 @@
 
 using namespace metal;
 
-struct main0_in
-{
-    float4 VertGeom [[user(locn0)]];
-};
-
 struct main0_out
 {
     float4 FragColor0 [[color(0)]];
     float4 FragColor1 [[color(1)]];
 };
 
+struct main0_in
+{
+    float4 VertGeom [[user(locn0)]];
+};
+
 fragment main0_out main0(main0_in in [[stage_in]], texture2d<float> TextureBase [[texture(0)]], texture2d<float> TextureDetail [[texture(1)]], sampler TextureBaseSmplr [[sampler(0)]], sampler TextureDetailSmplr [[sampler(1)]])
 {
     main0_out out = {};
diff --git a/reference/shaders-msl/frag/builtins.frag b/reference/shaders-msl/frag/builtins.frag
index 9283d1a66b..f9085252b3 100644
--- a/reference/shaders-msl/frag/builtins.frag
+++ b/reference/shaders-msl/frag/builtins.frag
@@ -3,17 +3,17 @@
 
 using namespace metal;
 
-struct main0_in
-{
-    float4 vColor [[user(locn0)]];
-};
-
 struct main0_out
 {
     float4 FragColor [[color(0)]];
     float gl_FragDepth [[depth(any)]];
 };
 
+struct main0_in
+{
+    float4 vColor [[user(locn0)]];
+};
+
 fragment main0_out main0(main0_in in [[stage_in]], float4 gl_FragCoord [[position]])
 {
     main0_out out = {};
diff --git a/reference/shaders-msl/frag/composite-extract-forced-temporary.frag b/reference/shaders-msl/frag/composite-extract-forced-temporary.frag
index 2d68f01299..dfab4d26f0 100644
--- a/reference/shaders-msl/frag/composite-extract-forced-temporary.frag
+++ b/reference/shaders-msl/frag/composite-extract-forced-temporary.frag
@@ -3,16 +3,16 @@
 
 using namespace metal;
 
-struct main0_in
-{
-    float2 vTexCoord [[user(locn0)]];
-};
-
 struct main0_out
 {
     float4 FragColor [[color(0)]];
 };
 
+struct main0_in
+{
+    float2 vTexCoord [[user(locn0)]];
+};
+
 fragment main0_out main0(main0_in in [[stage_in]], texture2d<float> Texture [[texture(0)]], sampler TextureSmplr [[sampler(0)]])
 {
     main0_out out = {};
diff --git a/reference/shaders-msl/frag/constant-array.frag b/reference/shaders-msl/frag/constant-array.frag
index 773d59407c..09f123b29a 100644
--- a/reference/shaders-msl/frag/constant-array.frag
+++ b/reference/shaders-msl/frag/constant-array.frag
@@ -17,16 +17,16 @@ constant float4 _54[2] = {float4(8.0), float4(10.0)};
 constant float4 _55[2][2] = {{float4(1.0), float4(2.0)}, {float4(8.0), float4(10.0)}};
 constant Foobar _75[2] = {{10.0, 40.0}, {90.0, 70.0}};
 
-struct main0_in
-{
-    int index [[user(locn0)]];
-};
-
 struct main0_out
 {
     float4 FragColor [[color(0)]];
 };
 
+struct main0_in
+{
+    int index [[user(locn0)]];
+};
+
 // Implementation of an array copy function to cover GLSL's ability to copy an array via assignment.
 template<typename T, uint N>
 void spvArrayCopy(thread T (&dst)[N], thread const T (&src)[N])
diff --git a/reference/shaders-msl/frag/constant-composites.frag b/reference/shaders-msl/frag/constant-composites.frag
index d216da6d13..ec5d66e86d 100644
--- a/reference/shaders-msl/frag/constant-composites.frag
+++ b/reference/shaders-msl/frag/constant-composites.frag
@@ -14,16 +14,16 @@ struct Foo
 constant float _16[4] = {1.0, 4.0, 3.0, 2.0};
 constant Foo _28[2] = {{10.0, 20.0}, {30.0, 40.0}};
 
-struct main0_in
-{
-    int line [[user(locn0)]];
-};
-
 struct main0_out
 {
     float4 FragColor [[color(0)]];
 };
 
+struct main0_in
+{
+    int line [[user(locn0)]];
+};
+
 // Implementation of an array copy function to cover GLSL's ability to copy an array via assignment.
 template<typename T, uint N>
 void spvArrayCopy(thread T (&dst)[N], thread const T (&src)[N])
diff --git a/reference/shaders-msl/frag/control-dependent-in-branch.desktop.frag b/reference/shaders-msl/frag/control-dependent-in-branch.desktop.frag
new file mode 100644
index 0000000000..4d10167130
--- /dev/null
+++ b/reference/shaders-msl/frag/control-dependent-in-branch.desktop.frag
@@ -0,0 +1,45 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float4 vInput [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], texture2d<float> uSampler [[texture(0)]], sampler uSamplerSmplr [[sampler(0)]])
+{
+    main0_out out = {};
+    out.FragColor = in.vInput;
+    float4 t = uSampler.sample(uSamplerSmplr, in.vInput.xy);
+    float4 d0 = dfdx(in.vInput);
+    float4 d1 = dfdy(in.vInput);
+    float4 d2 = fwidth(in.vInput);
+    float4 d3 = dfdx(in.vInput);
+    float4 d4 = dfdy(in.vInput);
+    float4 d5 = fwidth(in.vInput);
+    float4 d6 = dfdx(in.vInput);
+    float4 d7 = dfdy(in.vInput);
+    float4 d8 = fwidth(in.vInput);
+    if (in.vInput.y > 10.0)
+    {
+        out.FragColor += t;
+        out.FragColor += d0;
+        out.FragColor += d1;
+        out.FragColor += d2;
+        out.FragColor += d3;
+        out.FragColor += d4;
+        out.FragColor += d5;
+        out.FragColor += d6;
+        out.FragColor += d7;
+        out.FragColor += d8;
+    }
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/dual-source-blending.frag b/reference/shaders-msl/frag/dual-source-blending.frag
new file mode 100644
index 0000000000..37938bf8ca
--- /dev/null
+++ b/reference/shaders-msl/frag/dual-source-blending.frag
@@ -0,0 +1,19 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor0 [[color(0), index(0)]];
+    float4 FragColor1 [[color(0), index(1)]];
+};
+
+fragment main0_out main0()
+{
+    main0_out out = {};
+    out.FragColor0 = float4(1.0);
+    out.FragColor1 = float4(2.0);
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/false-loop-init.frag b/reference/shaders-msl/frag/false-loop-init.frag
index e0792474b5..7a4d6d5a3f 100644
--- a/reference/shaders-msl/frag/false-loop-init.frag
+++ b/reference/shaders-msl/frag/false-loop-init.frag
@@ -3,16 +3,16 @@
 
 using namespace metal;
 
-struct main0_in
-{
-    float4 accum [[user(locn0)]];
-};
-
 struct main0_out
 {
     float4 result [[color(0)]];
 };
 
+struct main0_in
+{
+    float4 accum [[user(locn0)]];
+};
+
 fragment main0_out main0(main0_in in [[stage_in]])
 {
     main0_out out = {};
diff --git a/reference/shaders-msl/frag/fp16-packing.frag b/reference/shaders-msl/frag/fp16-packing.frag
new file mode 100644
index 0000000000..358681f6dd
--- /dev/null
+++ b/reference/shaders-msl/frag/fp16-packing.frag
@@ -0,0 +1,25 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float2 FP32Out [[color(0)]];
+    uint FP16Out [[color(1)]];
+};
+
+struct main0_in
+{
+    uint FP16 [[user(locn0)]];
+    float2 FP32 [[user(locn1)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    out.FP32Out = float2(as_type<half2>(in.FP16));
+    out.FP16Out = as_type<uint>(half2(in.FP32));
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/fp16.desktop.frag b/reference/shaders-msl/frag/fp16.desktop.frag
new file mode 100644
index 0000000000..b914e4b8fc
--- /dev/null
+++ b/reference/shaders-msl/frag/fp16.desktop.frag
@@ -0,0 +1,180 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct ResType
+{
+    half4 _m0;
+    int4 _m1;
+};
+
+struct main0_in
+{
+    half v1 [[user(locn0)]];
+    half2 v2 [[user(locn1)]];
+    half3 v3 [[user(locn2)]];
+    half4 v4 [[user(locn3)]];
+};
+
+// Implementation of the GLSL mod() function, which is slightly different than Metal fmod()
+template<typename Tx, typename Ty>
+Tx mod(Tx x, Ty y)
+{
+    return x - y * floor(x / y);
+}
+
+// Implementation of the GLSL radians() function
+template<typename T>
+T radians(T d)
+{
+    return d * T(0.01745329251);
+}
+
+// Implementation of the GLSL degrees() function
+template<typename T>
+T degrees(T r)
+{
+    return r * T(57.2957795131);
+}
+
+half2x2 test_mat2(thread const half2& a, thread const half2& b, thread const half2& c, thread const half2& d)
+{
+    return half2x2(half2(a), half2(b)) * half2x2(half2(c), half2(d));
+}
+
+half3x3 test_mat3(thread const half3& a, thread const half3& b, thread const half3& c, thread const half3& d, thread const half3& e, thread const half3& f)
+{
+    return half3x3(half3(a), half3(b), half3(c)) * half3x3(half3(d), half3(e), half3(f));
+}
+
+void test_constants()
+{
+    half a = 1.0h;
+    half b = 1.5h;
+    half c = -1.5h;
+    half d = (0.0h / 0.0h);
+    half e = (1.0h / 0.0h);
+    half f = (-1.0h / 0.0h);
+    half g = 1014.0h;
+    half h = 9.5367431640625e-07h;
+}
+
+half test_result()
+{
+    return 1.0h;
+}
+
+void test_conversions()
+{
+    half one = test_result();
+    int a = int(one);
+    uint b = uint(one);
+    bool c = one != 0.0h;
+    float d = float(one);
+    half a2 = half(a);
+    half b2 = half(b);
+    half c2 = half(c);
+    half d2 = half(d);
+}
+
+void test_builtins(thread half4& v4, thread half3& v3, thread half& v1)
+{
+    half4 res = radians(v4);
+    res = degrees(v4);
+    res = sin(v4);
+    res = cos(v4);
+    res = tan(v4);
+    res = asin(v4);
+    res = atan2(v4, v3.xyzz);
+    res = atan(v4);
+    res = sinh(v4);
+    res = cosh(v4);
+    res = tanh(v4);
+    res = asinh(v4);
+    res = acosh(v4);
+    res = atanh(v4);
+    res = pow(v4, v4);
+    res = exp(v4);
+    res = log(v4);
+    res = exp2(v4);
+    res = log2(v4);
+    res = sqrt(v4);
+    res = rsqrt(v4);
+    res = abs(v4);
+    res = sign(v4);
+    res = floor(v4);
+    res = trunc(v4);
+    res = round(v4);
+    res = rint(v4);
+    res = ceil(v4);
+    res = fract(v4);
+    res = mod(v4, v4);
+    half4 tmp;
+    half4 _223 = modf(v4, tmp);
+    res = _223;
+    res = min(v4, v4);
+    res = max(v4, v4);
+    res = clamp(v4, v4, v4);
+    res = mix(v4, v4, v4);
+    bool4 _243 = v4 < v4;
+    res = half4(_243.x ? v4.x : v4.x, _243.y ? v4.y : v4.y, _243.z ? v4.z : v4.z, _243.w ? v4.w : v4.w);
+    res = step(v4, v4);
+    res = smoothstep(v4, v4, v4);
+    bool4 btmp = isnan(v4);
+    btmp = isinf(v4);
+    res = fma(v4, v4, v4);
+    ResType _267;
+    _267._m0 = frexp(v4, _267._m1);
+    int4 itmp = _267._m1;
+    res = _267._m0;
+    res = ldexp(res, itmp);
+    uint pack0 = as_type<uint>(v4.xy);
+    uint pack1 = as_type<uint>(v4.zw);
+    res = half4(as_type<half2>(pack0), as_type<half2>(pack1));
+    half t0 = length(v4);
+    t0 = distance(v4, v4);
+    t0 = dot(v4, v4);
+    half3 res3 = cross(v3, v3);
+    res = normalize(v4);
+    res = faceforward(v4, v4, v4);
+    res = reflect(v4, v4);
+    res = refract(v4, v4, v1);
+    btmp = v4 < v4;
+    btmp = v4 <= v4;
+    btmp = v4 > v4;
+    btmp = v4 >= v4;
+    btmp = v4 == v4;
+    btmp = v4 != v4;
+    res = dfdx(v4);
+    res = dfdy(v4);
+    res = dfdx(v4);
+    res = dfdy(v4);
+    res = dfdx(v4);
+    res = dfdy(v4);
+    res = fwidth(v4);
+    res = fwidth(v4);
+    res = fwidth(v4);
+}
+
+fragment void main0(main0_in in [[stage_in]])
+{
+    half2 param = in.v2;
+    half2 param_1 = in.v2;
+    half2 param_2 = in.v3.xy;
+    half2 param_3 = in.v3.xy;
+    half2x2 m0 = test_mat2(param, param_1, param_2, param_3);
+    half3 param_4 = in.v3;
+    half3 param_5 = in.v3;
+    half3 param_6 = in.v3;
+    half3 param_7 = in.v4.xyz;
+    half3 param_8 = in.v4.xyz;
+    half3 param_9 = in.v4.yzw;
+    half3x3 m1 = test_mat3(param_4, param_5, param_6, param_7, param_8, param_9);
+    test_constants();
+    test_conversions();
+    test_builtins(in.v4, in.v3, in.v1);
+}
+
diff --git a/reference/shaders-msl/frag/front-facing.frag b/reference/shaders-msl/frag/front-facing.frag
index 3856498943..2f83642492 100644
--- a/reference/shaders-msl/frag/front-facing.frag
+++ b/reference/shaders-msl/frag/front-facing.frag
@@ -3,17 +3,17 @@
 
 using namespace metal;
 
-struct main0_in
-{
-    float4 vB [[user(locn1)]];
-    float4 vA [[user(locn0)]];
-};
-
 struct main0_out
 {
     float4 FragColor [[color(0)]];
 };
 
+struct main0_in
+{
+    float4 vA [[user(locn0)]];
+    float4 vB [[user(locn1)]];
+};
+
 fragment main0_out main0(main0_in in [[stage_in]], bool gl_FrontFacing [[front_facing]])
 {
     main0_out out = {};
diff --git a/reference/shaders-msl/frag/gather-dref.frag b/reference/shaders-msl/frag/gather-dref.frag
new file mode 100644
index 0000000000..c5c5ccf0bb
--- /dev/null
+++ b/reference/shaders-msl/frag/gather-dref.frag
@@ -0,0 +1,22 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float3 vUV [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], depth2d<float> uT [[texture(0)]], sampler uTSmplr [[sampler(0)]])
+{
+    main0_out out = {};
+    out.FragColor = uT.gather_compare(uTSmplr, in.vUV.xy, in.vUV.z);
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/gather-offset.frag b/reference/shaders-msl/frag/gather-offset.frag
new file mode 100644
index 0000000000..02b80194b5
--- /dev/null
+++ b/reference/shaders-msl/frag/gather-offset.frag
@@ -0,0 +1,17 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+fragment main0_out main0(texture2d<float> uT [[texture(0)]], sampler uTSmplr [[sampler(0)]])
+{
+    main0_out out = {};
+    out.FragColor = uT.gather(uTSmplr, float2(0.5), int2(0), component::w);
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/illegal-name-test-0.frag b/reference/shaders-msl/frag/illegal-name-test-0.frag
new file mode 100644
index 0000000000..6b209b49d5
--- /dev/null
+++ b/reference/shaders-msl/frag/illegal-name-test-0.frag
@@ -0,0 +1,21 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+fragment main0_out main0()
+{
+    main0_out out = {};
+    float4 fragment0 = float4(10.0);
+    float4 compute0 = float4(10.0);
+    float4 kernel0 = float4(10.0);
+    float4 vertex0 = float4(10.0);
+    out.FragColor = ((fragment0 + compute0) + kernel0) + vertex0;
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/in_block.frag b/reference/shaders-msl/frag/in_block.frag
index 43b4a05897..2af2024f52 100644
--- a/reference/shaders-msl/frag/in_block.frag
+++ b/reference/shaders-msl/frag/in_block.frag
@@ -3,17 +3,17 @@
 
 using namespace metal;
 
-struct main0_in
-{
-    float4 VertexOut_color2 [[user(locn3)]];
-    float4 VertexOut_color [[user(locn2)]];
-};
-
 struct main0_out
 {
     float4 FragColor [[color(0)]];
 };
 
+struct main0_in
+{
+    float4 VertexOut_color [[user(locn2)]];
+    float4 VertexOut_color2 [[user(locn3)]];
+};
+
 fragment main0_out main0(main0_in in [[stage_in]])
 {
     main0_out out = {};
diff --git a/reference/shaders-msl/frag/in_mat.frag b/reference/shaders-msl/frag/in_mat.frag
new file mode 100644
index 0000000000..f0f4c4eeed
--- /dev/null
+++ b/reference/shaders-msl/frag/in_mat.frag
@@ -0,0 +1,37 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 outFragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float3 inPos [[user(locn0)]];
+    float3 inNormal [[user(locn1)]];
+    float4 inInvModelView_0 [[user(locn2)]];
+    float4 inInvModelView_1 [[user(locn3)]];
+    float4 inInvModelView_2 [[user(locn4)]];
+    float4 inInvModelView_3 [[user(locn5)]];
+    float inLodBias [[user(locn6)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], texturecube<float> samplerColor [[texture(1)]], sampler samplerColorSmplr [[sampler(1)]])
+{
+    main0_out out = {};
+    float4x4 inInvModelView = {};
+    inInvModelView[0] = in.inInvModelView_0;
+    inInvModelView[1] = in.inInvModelView_1;
+    inInvModelView[2] = in.inInvModelView_2;
+    inInvModelView[3] = in.inInvModelView_3;
+    float3 cI = normalize(in.inPos);
+    float3 cR = reflect(cI, normalize(in.inNormal));
+    cR = float3((inInvModelView * float4(cR, 0.0)).xyz);
+    cR.x *= (-1.0);
+    out.outFragColor = samplerColor.sample(samplerColorSmplr, cR, bias(in.inLodBias));
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/mix.frag b/reference/shaders-msl/frag/mix.frag
index 2d35766621..ad7c5adeeb 100644
--- a/reference/shaders-msl/frag/mix.frag
+++ b/reference/shaders-msl/frag/mix.frag
@@ -3,19 +3,19 @@
 
 using namespace metal;
 
-struct main0_in
-{
-    float vIn3 [[user(locn3)]];
-    float vIn2 [[user(locn2)]];
-    float4 vIn1 [[user(locn1)]];
-    float4 vIn0 [[user(locn0)]];
-};
-
 struct main0_out
 {
     float4 FragColor [[color(0)]];
 };
 
+struct main0_in
+{
+    float4 vIn0 [[user(locn0)]];
+    float4 vIn1 [[user(locn1)]];
+    float vIn2 [[user(locn2)]];
+    float vIn3 [[user(locn3)]];
+};
+
 fragment main0_out main0(main0_in in [[stage_in]])
 {
     main0_out out = {};
diff --git a/reference/shaders-msl/frag/mrt-array.frag b/reference/shaders-msl/frag/mrt-array.frag
new file mode 100644
index 0000000000..daf7edb4cc
--- /dev/null
+++ b/reference/shaders-msl/frag/mrt-array.frag
@@ -0,0 +1,53 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor_0 [[color(0)]];
+    float4 FragColor_1 [[color(1)]];
+    float4 FragColor_2 [[color(2)]];
+    float4 FragColor_3 [[color(3)]];
+};
+
+struct main0_in
+{
+    float4 vA [[user(locn0)]];
+    float4 vB [[user(locn1)]];
+};
+
+// Implementation of the GLSL mod() function, which is slightly different than Metal fmod()
+template<typename Tx, typename Ty>
+Tx mod(Tx x, Ty y)
+{
+    return x - y * floor(x / y);
+}
+
+void write_deeper_in_function(thread float4 (&FragColor)[4], thread float4& vA, thread float4& vB)
+{
+    FragColor[3] = vA * vB;
+}
+
+void write_in_function(thread float4 (&FragColor)[4], thread float4& vA, thread float4& vB)
+{
+    FragColor[2] = vA - vB;
+    write_deeper_in_function(FragColor, vA, vB);
+}
+
+fragment main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    float4 FragColor[4] = {};
+    FragColor[0] = mod(in.vA, in.vB);
+    FragColor[1] = in.vA + in.vB;
+    write_in_function(FragColor, in.vA, in.vB);
+    out.FragColor_0 = FragColor[0];
+    out.FragColor_1 = FragColor[1];
+    out.FragColor_2 = FragColor[2];
+    out.FragColor_3 = FragColor[3];
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/packed-expression-vector-shuffle.frag b/reference/shaders-msl/frag/packed-expression-vector-shuffle.frag
new file mode 100644
index 0000000000..dc8947425a
--- /dev/null
+++ b/reference/shaders-msl/frag/packed-expression-vector-shuffle.frag
@@ -0,0 +1,25 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct UBO
+{
+    packed_float3 color;
+    float v;
+};
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+fragment main0_out main0(constant UBO& _15 [[buffer(0)]])
+{
+    main0_out out = {};
+    float4 f = float4(1.0);
+    f = float4(_15.color[0], _15.color[1], _15.color[2], f.w);
+    out.FragColor = f;
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/packing-test-3.frag b/reference/shaders-msl/frag/packing-test-3.frag
new file mode 100644
index 0000000000..9c59bc164e
--- /dev/null
+++ b/reference/shaders-msl/frag/packing-test-3.frag
@@ -0,0 +1,54 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct VertexOutput
+{
+    float4 HPosition;
+};
+
+struct TestStruct
+{
+    float3 position;
+    float radius;
+};
+
+struct TestStruct_1
+{
+    packed_float3 position;
+    float radius;
+};
+
+struct CB0
+{
+    TestStruct_1 CB0[16];
+};
+
+struct main0_out
+{
+    float4 _entryPointOutput [[color(0)]];
+};
+
+float4 _main(thread const VertexOutput& IN, constant CB0& v_26)
+{
+    TestStruct st;
+    st.position = v_26.CB0[1].position;
+    st.radius = v_26.CB0[1].radius;
+    float4 col = float4(st.position, st.radius);
+    return col;
+}
+
+fragment main0_out main0(constant CB0& v_26 [[buffer(0)]], float4 gl_FragCoord [[position]])
+{
+    main0_out out = {};
+    VertexOutput IN;
+    IN.HPosition = gl_FragCoord;
+    VertexOutput param = IN;
+    VertexOutput param_1 = param;
+    out._entryPointOutput = _main(param_1, v_26);
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/pls.frag b/reference/shaders-msl/frag/pls.frag
index 42b5d2bf59..ee774a04af 100644
--- a/reference/shaders-msl/frag/pls.frag
+++ b/reference/shaders-msl/frag/pls.frag
@@ -3,14 +3,6 @@
 
 using namespace metal;
 
-struct main0_in
-{
-    float4 PLSIn3 [[user(locn3)]];
-    float4 PLSIn2 [[user(locn2)]];
-    float4 PLSIn1 [[user(locn1)]];
-    float4 PLSIn0 [[user(locn0)]];
-};
-
 struct main0_out
 {
     float4 PLSOut0 [[color(0)]];
@@ -19,6 +11,14 @@ struct main0_out
     float4 PLSOut3 [[color(3)]];
 };
 
+struct main0_in
+{
+    float4 PLSIn0 [[user(locn0)]];
+    float4 PLSIn1 [[user(locn1)]];
+    float4 PLSIn2 [[user(locn2)]];
+    float4 PLSIn3 [[user(locn3)]];
+};
+
 fragment main0_out main0(main0_in in [[stage_in]])
 {
     main0_out out = {};
diff --git a/reference/shaders-msl/frag/readonly-ssbo.frag b/reference/shaders-msl/frag/readonly-ssbo.frag
new file mode 100644
index 0000000000..771c225d6e
--- /dev/null
+++ b/reference/shaders-msl/frag/readonly-ssbo.frag
@@ -0,0 +1,29 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO
+{
+    float4 v;
+};
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+float4 read_from_function(const device SSBO& v_13)
+{
+    return v_13.v;
+}
+
+fragment main0_out main0(const device SSBO& v_13 [[buffer(0)]])
+{
+    main0_out out = {};
+    out.FragColor = v_13.v + read_from_function(v_13);
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/sample-depth-separate-image-sampler.frag b/reference/shaders-msl/frag/sample-depth-separate-image-sampler.frag
index a9c0f8b41b..ae84344748 100644
--- a/reference/shaders-msl/frag/sample-depth-separate-image-sampler.frag
+++ b/reference/shaders-msl/frag/sample-depth-separate-image-sampler.frag
@@ -20,7 +20,7 @@ float sample_color_from_function(thread const texture2d<float> uT, thread const
     return uT.sample(uS, float2(0.5)).x;
 }
 
-fragment main0_out main0(depth2d<float> uDepth [[texture(0)]], texture2d<float> uColor [[texture(1)]], sampler uSamplerShadow [[sampler(0)]], sampler uSampler [[sampler(1)]])
+fragment main0_out main0(depth2d<float> uDepth [[texture(0)]], texture2d<float> uColor [[texture(1)]], sampler uSampler [[sampler(2)]], sampler uSamplerShadow [[sampler(3)]])
 {
     main0_out out = {};
     out.FragColor = sample_depth_from_function(uDepth, uSamplerShadow) + sample_color_from_function(uColor, uSampler);
diff --git a/reference/shaders-msl/frag/sampler-1d-lod.frag b/reference/shaders-msl/frag/sampler-1d-lod.frag
new file mode 100644
index 0000000000..1da2036e3a
--- /dev/null
+++ b/reference/shaders-msl/frag/sampler-1d-lod.frag
@@ -0,0 +1,22 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float vTex [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], texture1d<float> uSampler [[texture(0)]], sampler uSamplerSmplr [[sampler(0)]])
+{
+    main0_out out = {};
+    out.FragColor += ((uSampler.sample(uSamplerSmplr, in.vTex) + uSampler.sample(uSamplerSmplr, in.vTex)) + uSampler.sample(uSamplerSmplr, in.vTex));
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/sampler-image-arrays.msl2.frag b/reference/shaders-msl/frag/sampler-image-arrays.msl2.frag
new file mode 100644
index 0000000000..2aac733138
--- /dev/null
+++ b/reference/shaders-msl/frag/sampler-image-arrays.msl2.frag
@@ -0,0 +1,45 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float2 vTex [[user(locn0)]];
+    int vIndex [[user(locn1)]];
+};
+
+float4 sample_from_global(thread int& vIndex, thread float2& vTex, thread const array<texture2d<float>, 4> uSampler, thread const array<sampler, 4> uSamplerSmplr)
+{
+    return uSampler[vIndex].sample(uSamplerSmplr[vIndex], (vTex + float2(0.100000001490116119384765625)));
+}
+
+float4 sample_from_argument(thread const array<texture2d<float>, 4> samplers, thread const array<sampler, 4> samplersSmplr, thread int& vIndex, thread float2& vTex)
+{
+    return samplers[vIndex].sample(samplersSmplr[vIndex], (vTex + float2(0.20000000298023223876953125)));
+}
+
+float4 sample_single_from_argument(thread const texture2d<float> samp, thread const sampler sampSmplr, thread float2& vTex)
+{
+    return samp.sample(sampSmplr, (vTex + float2(0.300000011920928955078125)));
+}
+
+fragment main0_out main0(main0_in in [[stage_in]], array<texture2d<float>, 4> uSampler [[texture(0)]], array<texture2d<float>, 4> uTextures [[texture(8)]], array<sampler, 4> uSamplerSmplr [[sampler(0)]], array<sampler, 4> uSamplers [[sampler(4)]])
+{
+    main0_out out = {};
+    out.FragColor = float4(0.0);
+    out.FragColor += uTextures[2].sample(uSamplers[1], in.vTex);
+    out.FragColor += uSampler[in.vIndex].sample(uSamplerSmplr[in.vIndex], in.vTex);
+    out.FragColor += sample_from_global(in.vIndex, in.vTex, uSampler, uSamplerSmplr);
+    out.FragColor += sample_from_argument(uSampler, uSamplerSmplr, in.vIndex, in.vTex);
+    out.FragColor += sample_single_from_argument(uSampler[3], uSamplerSmplr[3], in.vTex);
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/sampler.frag b/reference/shaders-msl/frag/sampler.frag
index 5d23492905..395854699e 100644
--- a/reference/shaders-msl/frag/sampler.frag
+++ b/reference/shaders-msl/frag/sampler.frag
@@ -5,18 +5,18 @@
 
 using namespace metal;
 
-struct main0_in
-{
-    float2 vTex [[user(locn1)]];
-    float4 vColor [[user(locn0)]];
-};
-
 struct main0_out
 {
     float4 FragColor [[color(0)]];
 };
 
-float4 sample_texture(thread const texture2d<float> tex, thread const sampler& texSmplr, thread const float2& uv)
+struct main0_in
+{
+    float4 vColor [[user(locn0)]];
+    float2 vTex [[user(locn1)]];
+};
+
+float4 sample_texture(thread const texture2d<float> tex, thread const sampler texSmplr, thread const float2& uv)
 {
     return tex.sample(texSmplr, uv);
 }
diff --git a/reference/shaders-msl/frag/separate-image-sampler-argument.frag b/reference/shaders-msl/frag/separate-image-sampler-argument.frag
index 46c0524ab7..c0c2ea11e7 100644
--- a/reference/shaders-msl/frag/separate-image-sampler-argument.frag
+++ b/reference/shaders-msl/frag/separate-image-sampler-argument.frag
@@ -15,7 +15,7 @@ float4 samp(thread const texture2d<float> t, thread const sampler s)
     return t.sample(s, float2(0.5));
 }
 
-fragment main0_out main0(texture2d<float> uDepth [[texture(0)]], sampler uSampler [[sampler(0)]])
+fragment main0_out main0(texture2d<float> uDepth [[texture(1)]], sampler uSampler [[sampler(0)]])
 {
     main0_out out = {};
     out.FragColor = samp(uDepth, uSampler);
diff --git a/reference/shaders-msl/frag/shadow-compare-global-alias.frag b/reference/shaders-msl/frag/shadow-compare-global-alias.frag
new file mode 100644
index 0000000000..2dd2d32dfd
--- /dev/null
+++ b/reference/shaders-msl/frag/shadow-compare-global-alias.frag
@@ -0,0 +1,53 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float3 vUV [[user(locn0)]];
+};
+
+float Samp(thread const float3& uv, thread depth2d<float> uTex, thread sampler uSamp)
+{
+    return uTex.sample_compare(uSamp, uv.xy, uv.z);
+}
+
+float Samp2(thread const float3& uv, thread depth2d<float> uSampler, thread const sampler uSamplerSmplr, thread float3& vUV)
+{
+    return uSampler.sample_compare(uSamplerSmplr, vUV.xy, vUV.z);
+}
+
+float Samp3(thread const depth2d<float> uT, thread const sampler uS, thread const float3& uv, thread float3& vUV)
+{
+    return uT.sample_compare(uS, vUV.xy, vUV.z);
+}
+
+float Samp4(thread const depth2d<float> uS, thread const sampler uSSmplr, thread const float3& uv, thread float3& vUV)
+{
+    return uS.sample_compare(uSSmplr, vUV.xy, vUV.z);
+}
+
+fragment main0_out main0(main0_in in [[stage_in]], depth2d<float> uSampler [[texture(0)]], depth2d<float> uTex [[texture(1)]], sampler uSamplerSmplr [[sampler(0)]], sampler uSamp [[sampler(2)]])
+{
+    main0_out out = {};
+    out.FragColor = uSampler.sample_compare(uSamplerSmplr, in.vUV.xy, in.vUV.z);
+    out.FragColor += uTex.sample_compare(uSamp, in.vUV.xy, in.vUV.z);
+    float3 param = in.vUV;
+    out.FragColor += Samp(param, uTex, uSamp);
+    float3 param_1 = in.vUV;
+    out.FragColor += Samp2(param_1, uSampler, uSamplerSmplr, in.vUV);
+    float3 param_2 = in.vUV;
+    out.FragColor += Samp3(uTex, uSamp, param_2, in.vUV);
+    float3 param_3 = in.vUV;
+    out.FragColor += Samp4(uSampler, uSamplerSmplr, param_3, in.vUV);
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/spec-constant-block-size.frag b/reference/shaders-msl/frag/spec-constant-block-size.frag
index 4237d941fe..445f4362de 100644
--- a/reference/shaders-msl/frag/spec-constant-block-size.frag
+++ b/reference/shaders-msl/frag/spec-constant-block-size.frag
@@ -8,16 +8,16 @@ struct SpecConstArray
     float4 samples[2];
 };
 
-struct main0_in
-{
-    int Index [[user(locn0)]];
-};
-
 struct main0_out
 {
     float4 FragColor [[color(0)]];
 };
 
+struct main0_in
+{
+    int Index [[user(locn0)]];
+};
+
 fragment main0_out main0(main0_in in [[stage_in]], constant SpecConstArray& _15 [[buffer(0)]])
 {
     main0_out out = {};
diff --git a/reference/shaders-msl/frag/swizzle.frag b/reference/shaders-msl/frag/swizzle.frag
index eb46111f00..7a0494e064 100644
--- a/reference/shaders-msl/frag/swizzle.frag
+++ b/reference/shaders-msl/frag/swizzle.frag
@@ -3,17 +3,17 @@
 
 using namespace metal;
 
-struct main0_in
-{
-    float2 vUV [[user(locn2)]];
-    float3 vNormal [[user(locn1)]];
-};
-
 struct main0_out
 {
     float4 FragColor [[color(0)]];
 };
 
+struct main0_in
+{
+    float3 vNormal [[user(locn1)]];
+    float2 vUV [[user(locn2)]];
+};
+
 fragment main0_out main0(main0_in in [[stage_in]], texture2d<float> samp [[texture(0)]], sampler sampSmplr [[sampler(0)]])
 {
     main0_out out = {};
diff --git a/reference/shaders-msl/frag/texture-proj-shadow.frag b/reference/shaders-msl/frag/texture-proj-shadow.frag
index 8b9b03a59e..c5ab0ee007 100644
--- a/reference/shaders-msl/frag/texture-proj-shadow.frag
+++ b/reference/shaders-msl/frag/texture-proj-shadow.frag
@@ -3,19 +3,19 @@
 
 using namespace metal;
 
-struct main0_in
-{
-    float2 vClip2 [[user(locn2)]];
-    float4 vClip4 [[user(locn1)]];
-    float3 vClip3 [[user(locn0)]];
-};
-
 struct main0_out
 {
     float FragColor [[color(0)]];
 };
 
-fragment main0_out main0(main0_in in [[stage_in]], depth2d<float> uShadow2D [[texture(0)]], texture1d<float> uSampler1D [[texture(1)]], texture2d<float> uSampler2D [[texture(2)]], texture3d<float> uSampler3D [[texture(3)]], sampler uShadow2DSmplr [[sampler(0)]], sampler uSampler1DSmplr [[sampler(1)]], sampler uSampler2DSmplr [[sampler(2)]], sampler uSampler3DSmplr [[sampler(3)]])
+struct main0_in
+{
+    float3 vClip3 [[user(locn0)]];
+    float4 vClip4 [[user(locn1)]];
+    float2 vClip2 [[user(locn2)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], depth2d<float> uShadow2D [[texture(1)]], texture1d<float> uSampler1D [[texture(2)]], texture2d<float> uSampler2D [[texture(3)]], texture3d<float> uSampler3D [[texture(4)]], sampler uShadow2DSmplr [[sampler(1)]], sampler uSampler1DSmplr [[sampler(2)]], sampler uSampler2DSmplr [[sampler(3)]], sampler uSampler3DSmplr [[sampler(4)]])
 {
     main0_out out = {};
     float4 _20 = in.vClip4;
diff --git a/reference/shaders-msl/frag/ubo_layout.frag b/reference/shaders-msl/frag/ubo_layout.frag
index 8c03e33b39..0bc27462b2 100644
--- a/reference/shaders-msl/frag/ubo_layout.frag
+++ b/reference/shaders-msl/frag/ubo_layout.frag
@@ -13,9 +13,14 @@ struct UBO1
     Str foo;
 };
 
+struct Str_1
+{
+    float4x4 foo;
+};
+
 struct UBO2
 {
-    Str foo;
+    Str_1 foo;
 };
 
 struct main0_out
diff --git a/reference/shaders-msl/frag/unary-enclose.frag b/reference/shaders-msl/frag/unary-enclose.frag
index 5a80f4d77c..c33269f2bf 100644
--- a/reference/shaders-msl/frag/unary-enclose.frag
+++ b/reference/shaders-msl/frag/unary-enclose.frag
@@ -3,17 +3,17 @@
 
 using namespace metal;
 
-struct main0_in
-{
-    int4 vIn1 [[user(locn1)]];
-    float4 vIn [[user(locn0)]];
-};
-
 struct main0_out
 {
     float4 FragColor [[color(0)]];
 };
 
+struct main0_in
+{
+    float4 vIn [[user(locn0)]];
+    int4 vIn1 [[user(locn1)]];
+};
+
 fragment main0_out main0(main0_in in [[stage_in]])
 {
     main0_out out = {};
diff --git a/reference/shaders-msl/legacy/vert/transpose.legacy.vert b/reference/shaders-msl/legacy/vert/transpose.legacy.vert
index ad9ed8d7fd..3837c8bc04 100644
--- a/reference/shaders-msl/legacy/vert/transpose.legacy.vert
+++ b/reference/shaders-msl/legacy/vert/transpose.legacy.vert
@@ -10,16 +10,16 @@ struct Buffer
     float4x4 M;
 };
 
-struct main0_in
-{
-    float4 Position [[attribute(0)]];
-};
-
 struct main0_out
 {
     float4 gl_Position [[position]];
 };
 
+struct main0_in
+{
+    float4 Position [[attribute(0)]];
+};
+
 vertex main0_out main0(main0_in in [[stage_in]], constant Buffer& _13 [[buffer(0)]])
 {
     main0_out out = {};
diff --git a/reference/shaders-msl/vert/basic.vert b/reference/shaders-msl/vert/basic.vert
index 1592b5c5cf..ffb4357126 100644
--- a/reference/shaders-msl/vert/basic.vert
+++ b/reference/shaders-msl/vert/basic.vert
@@ -8,18 +8,18 @@ struct UBO
     float4x4 uMVP;
 };
 
-struct main0_in
-{
-    float3 aNormal [[attribute(1)]];
-    float4 aVertex [[attribute(0)]];
-};
-
 struct main0_out
 {
     float3 vNormal [[user(locn0)]];
     float4 gl_Position [[position]];
 };
 
+struct main0_in
+{
+    float4 aVertex [[attribute(0)]];
+    float3 aNormal [[attribute(1)]];
+};
+
 vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _16 [[buffer(0)]])
 {
     main0_out out = {};
diff --git a/reference/shaders-msl/vert/copy.flatten.vert b/reference/shaders-msl/vert/copy.flatten.vert
index 9ae5fcdb17..23e520478d 100644
--- a/reference/shaders-msl/vert/copy.flatten.vert
+++ b/reference/shaders-msl/vert/copy.flatten.vert
@@ -16,10 +16,11 @@ struct UBO
     Light lights[4];
 };
 
-struct main0_in
+struct Light_1
 {
-    float3 aNormal [[attribute(1)]];
-    float4 aVertex [[attribute(0)]];
+    float3 Position;
+    float Radius;
+    float4 Color;
 };
 
 struct main0_out
@@ -28,6 +29,12 @@ struct main0_out
     float4 gl_Position [[position]];
 };
 
+struct main0_in
+{
+    float4 aVertex [[attribute(0)]];
+    float3 aNormal [[attribute(1)]];
+};
+
 vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _21 [[buffer(0)]])
 {
     main0_out out = {};
@@ -35,7 +42,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _21 [[buffer(0)]]
     out.vColor = float4(0.0);
     for (int i = 0; i < 4; i++)
     {
-        Light light;
+        Light_1 light;
         light.Position = _21.lights[i].Position;
         light.Radius = _21.lights[i].Radius;
         light.Color = _21.lights[i].Color;
diff --git a/reference/shaders-msl/vert/dynamic.flatten.vert b/reference/shaders-msl/vert/dynamic.flatten.vert
index 696966ca0b..de654a12b4 100644
--- a/reference/shaders-msl/vert/dynamic.flatten.vert
+++ b/reference/shaders-msl/vert/dynamic.flatten.vert
@@ -16,18 +16,18 @@ struct UBO
     Light lights[4];
 };
 
-struct main0_in
-{
-    float3 aNormal [[attribute(1)]];
-    float4 aVertex [[attribute(0)]];
-};
-
 struct main0_out
 {
     float4 vColor [[user(locn0)]];
     float4 gl_Position [[position]];
 };
 
+struct main0_in
+{
+    float4 aVertex [[attribute(0)]];
+    float3 aNormal [[attribute(1)]];
+};
+
 vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _21 [[buffer(0)]])
 {
     main0_out out = {};
@@ -35,7 +35,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _21 [[buffer(0)]]
     out.vColor = float4(0.0);
     for (int i = 0; i < 4; i++)
     {
-        float3 L = in.aVertex.xyz - _21.lights[i].Position;
+        float3 L = in.aVertex.xyz - float3(_21.lights[i].Position);
         out.vColor += ((_21.lights[i].Color * clamp(1.0 - (length(L) / _21.lights[i].Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(L)));
     }
     return out;
diff --git a/reference/shaders-msl/vert/functions.vert b/reference/shaders-msl/vert/functions.vert
index 6e07667b69..f710225261 100644
--- a/reference/shaders-msl/vert/functions.vert
+++ b/reference/shaders-msl/vert/functions.vert
@@ -13,12 +13,6 @@ struct UBO
     int2 bits;
 };
 
-struct main0_in
-{
-    float3 aNormal [[attribute(1)]];
-    float4 aVertex [[attribute(0)]];
-};
-
 struct main0_out
 {
     float3 vNormal [[user(locn0)]];
@@ -29,18 +23,24 @@ struct main0_out
     float4 gl_Position [[position]];
 };
 
+struct main0_in
+{
+    float4 aVertex [[attribute(0)]];
+    float3 aNormal [[attribute(1)]];
+};
+
 // Implementation of the GLSL radians() function
 template<typename T>
 T radians(T d)
 {
-    return d * 0.01745329251;
+    return d * T(0.01745329251);
 }
 
 // Implementation of the GLSL degrees() function
 template<typename T>
 T degrees(T r)
 {
-    return r * 57.2957795131;
+    return r * T(57.2957795131);
 }
 
 // Implementation of the GLSL findLSB() function
diff --git a/reference/shaders-msl/vert/in_out_array_mat.vert b/reference/shaders-msl/vert/in_out_array_mat.vert
new file mode 100644
index 0000000000..95be574a51
--- /dev/null
+++ b/reference/shaders-msl/vert/in_out_array_mat.vert
@@ -0,0 +1,78 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct UBO
+{
+    float4x4 projection;
+    float4x4 model;
+    float lodBias;
+};
+
+struct main0_out
+{
+    float3 outPos [[user(locn0)]];
+    float3 outNormal [[user(locn1)]];
+    float4 outTransModel_0 [[user(locn2)]];
+    float4 outTransModel_1 [[user(locn3)]];
+    float4 outTransModel_2 [[user(locn4)]];
+    float4 outTransModel_3 [[user(locn5)]];
+    float outLodBias [[user(locn6)]];
+    float4 color [[user(locn7)]];
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float3 inPos [[attribute(0)]];
+    float4 colors_0 [[attribute(1)]];
+    float4 colors_1 [[attribute(2)]];
+    float4 colors_2 [[attribute(3)]];
+    float3 inNormal [[attribute(4)]];
+    float4 inViewMat_0 [[attribute(5)]];
+    float4 inViewMat_1 [[attribute(6)]];
+    float4 inViewMat_2 [[attribute(7)]];
+    float4 inViewMat_3 [[attribute(8)]];
+};
+
+void write_deeper_in_function(thread float4x4& outTransModel, constant UBO& ubo, thread float4& color, thread float4 (&colors)[3])
+{
+    outTransModel[1].y = ubo.lodBias;
+    color = colors[2];
+}
+
+void write_in_function(thread float4x4& outTransModel, constant UBO& ubo, thread float4& color, thread float4 (&colors)[3], thread float3& inNormal)
+{
+    outTransModel[2] = float4(inNormal, 1.0);
+    write_deeper_in_function(outTransModel, ubo, color, colors);
+}
+
+vertex main0_out main0(main0_in in [[stage_in]], constant UBO& ubo [[buffer(0)]])
+{
+    main0_out out = {};
+    float4x4 outTransModel = {};
+    float4 colors[3] = {};
+    float4x4 inViewMat = {};
+    colors[0] = in.colors_0;
+    colors[1] = in.colors_1;
+    colors[2] = in.colors_2;
+    inViewMat[0] = in.inViewMat_0;
+    inViewMat[1] = in.inViewMat_1;
+    inViewMat[2] = in.inViewMat_2;
+    inViewMat[3] = in.inViewMat_3;
+    out.gl_Position = (ubo.projection * ubo.model) * float4(in.inPos, 1.0);
+    out.outPos = float3((ubo.model * float4(in.inPos, 1.0)).xyz);
+    out.outNormal = float3x3(float3(float3(ubo.model[0].x, ubo.model[0].y, ubo.model[0].z)), float3(float3(ubo.model[1].x, ubo.model[1].y, ubo.model[1].z)), float3(float3(ubo.model[2].x, ubo.model[2].y, ubo.model[2].z))) * in.inNormal;
+    out.outLodBias = ubo.lodBias;
+    outTransModel = transpose(ubo.model) * inViewMat;
+    write_in_function(outTransModel, ubo, out.color, colors, in.inNormal);
+    out.outTransModel_0 = outTransModel[0];
+    out.outTransModel_1 = outTransModel[1];
+    out.outTransModel_2 = outTransModel[2];
+    out.outTransModel_3 = outTransModel[3];
+    return out;
+}
+
diff --git a/reference/shaders-msl/vert/out_block.vert b/reference/shaders-msl/vert/out_block.vert
index 3ae18387a6..cf1334ec0b 100644
--- a/reference/shaders-msl/vert/out_block.vert
+++ b/reference/shaders-msl/vert/out_block.vert
@@ -8,12 +8,6 @@ struct Transform
     float4x4 transform;
 };
 
-struct main0_in
-{
-    float4 color [[attribute(1)]];
-    float3 position [[attribute(0)]];
-};
-
 struct main0_out
 {
     float4 VertexOut_color [[user(locn2)]];
@@ -21,6 +15,12 @@ struct main0_out
     float4 gl_Position [[position]];
 };
 
+struct main0_in
+{
+    float3 position [[attribute(0)]];
+    float4 color [[attribute(1)]];
+};
+
 vertex main0_out main0(main0_in in [[stage_in]], constant Transform& block [[buffer(0)]])
 {
     main0_out out = {};
diff --git a/reference/shaders-msl/vert/packed_matrix.vert b/reference/shaders-msl/vert/packed_matrix.vert
index 5d025c4cde..89638511d6 100644
--- a/reference/shaders-msl/vert/packed_matrix.vert
+++ b/reference/shaders-msl/vert/packed_matrix.vert
@@ -26,27 +26,28 @@ struct _42
     float2 _m9;
 };
 
-struct main0_in
-{
-    float4 m_25 [[attribute(0)]];
-};
-
 struct main0_out
 {
     float3 m_72 [[user(locn0)]];
     float4 gl_Position [[position]];
 };
 
-vertex main0_out main0(main0_in in [[stage_in]], constant _15& _17 [[buffer(0)]], constant _42& _44 [[buffer(1)]])
+struct main0_in
+{
+    float4 m_25 [[attribute(0)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]], constant _42& _44 [[buffer(12)]], constant _15& _17 [[buffer(13)]])
 {
     main0_out out = {};
+    float3 _91;
     float3 _13;
     do
     {
         _13 = normalize(float4(in.m_25.xyz, 0.0) * _17._m1);
         break;
     } while (false);
-    float4 _39 = _44._m0 * float4(_44._m3 + (in.m_25.xyz * (_44._m6 + _44._m7)), 1.0);
+    float4 _39 = _44._m0 * float4(float3(_44._m3) + (in.m_25.xyz * (_44._m6 + _44._m7)), 1.0);
     out.m_72 = _13;
     float4 _74 = _39;
     _74.y = -_39.y;
diff --git a/reference/shaders-msl/vert/pointsize.vert b/reference/shaders-msl/vert/pointsize.vert
index faf828b4d3..8e5782bde4 100644
--- a/reference/shaders-msl/vert/pointsize.vert
+++ b/reference/shaders-msl/vert/pointsize.vert
@@ -9,12 +9,6 @@ struct params
     float psize;
 };
 
-struct main0_in
-{
-    float4 color0 [[attribute(1)]];
-    float4 position [[attribute(0)]];
-};
-
 struct main0_out
 {
     float4 color [[user(locn0)]];
@@ -22,6 +16,12 @@ struct main0_out
     float gl_PointSize [[point_size]];
 };
 
+struct main0_in
+{
+    float4 position [[attribute(0)]];
+    float4 color0 [[attribute(1)]];
+};
+
 vertex main0_out main0(main0_in in [[stage_in]], constant params& _19 [[buffer(0)]])
 {
     main0_out out = {};
diff --git a/reference/shaders-msl/vert/read-from-row-major-array.vert b/reference/shaders-msl/vert/read-from-row-major-array.vert
new file mode 100644
index 0000000000..9a633c5fe6
--- /dev/null
+++ b/reference/shaders-msl/vert/read-from-row-major-array.vert
@@ -0,0 +1,66 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct Block
+{
+    float2x3 var[3][4];
+};
+
+struct main0_out
+{
+    float v_vtxResult [[user(locn0)]];
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float4 a_position [[attribute(0)]];
+};
+
+// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization.
+float2x3 spvConvertFromRowMajor2x3(float2x3 m)
+{
+    return float2x3(float3(m[0][0], m[0][2], m[1][1]), float3(m[0][1], m[1][0], m[1][2]));
+}
+
+float compare_float(thread const float& a, thread const float& b)
+{
+    return float(abs(a - b) < 0.0500000007450580596923828125);
+}
+
+float compare_vec3(thread const float3& a, thread const float3& b)
+{
+    float param = a.x;
+    float param_1 = b.x;
+    float param_2 = a.y;
+    float param_3 = b.y;
+    float param_4 = a.z;
+    float param_5 = b.z;
+    return (compare_float(param, param_1) * compare_float(param_2, param_3)) * compare_float(param_4, param_5);
+}
+
+float compare_mat2x3(thread const float2x3& a, thread const float2x3& b)
+{
+    float3 param = a[0];
+    float3 param_1 = b[0];
+    float3 param_2 = a[1];
+    float3 param_3 = b[1];
+    return compare_vec3(param, param_1) * compare_vec3(param_2, param_3);
+}
+
+vertex main0_out main0(main0_in in [[stage_in]], constant Block& _104 [[buffer(0)]])
+{
+    main0_out out = {};
+    out.gl_Position = in.a_position;
+    float result = 1.0;
+    float2x3 param = spvConvertFromRowMajor2x3(_104.var[0][0]);
+    float2x3 param_1 = float2x3(float3(2.0, 6.0, -6.0), float3(0.0, 5.0, 5.0));
+    result *= compare_mat2x3(param, param_1);
+    out.v_vtxResult = result;
+    return out;
+}
+
diff --git a/reference/shaders-msl/vert/return-array.vert b/reference/shaders-msl/vert/return-array.vert
index c3857b909a..b513a2fe57 100644
--- a/reference/shaders-msl/vert/return-array.vert
+++ b/reference/shaders-msl/vert/return-array.vert
@@ -7,17 +7,17 @@ using namespace metal;
 
 constant float4 _20[2] = {float4(10.0), float4(20.0)};
 
-struct main0_in
-{
-    float4 vInput1 [[attribute(1)]];
-    float4 vInput0 [[attribute(0)]];
-};
-
 struct main0_out
 {
     float4 gl_Position [[position]];
 };
 
+struct main0_in
+{
+    float4 vInput0 [[attribute(0)]];
+    float4 vInput1 [[attribute(1)]];
+};
+
 // Implementation of an array copy function to cover GLSL's ability to copy an array via assignment.
 template<typename T, uint N>
 void spvArrayCopy(thread T (&dst)[N], thread const T (&src)[N])
diff --git a/reference/shaders-msl/vert/texture_buffer.vert b/reference/shaders-msl/vert/texture_buffer.vert
index 690757b830..f7bcb7918b 100644
--- a/reference/shaders-msl/vert/texture_buffer.vert
+++ b/reference/shaders-msl/vert/texture_buffer.vert
@@ -8,7 +8,7 @@ struct main0_out
     float4 gl_Position [[position]];
 };
 
-vertex main0_out main0(texture2d<float> uSamp [[texture(0)]], texture2d<float> uSampo [[texture(1)]])
+vertex main0_out main0(texture2d<float> uSamp [[texture(4)]], texture2d<float> uSampo [[texture(5)]])
 {
     main0_out out = {};
     out.gl_Position = uSamp.read(uint2(10, 0)) + uSampo.read(uint2(100, 0));
diff --git a/reference/shaders-msl/vert/ubo.alignment.vert b/reference/shaders-msl/vert/ubo.alignment.vert
index 6e48ae0e42..9a7ea56c61 100644
--- a/reference/shaders-msl/vert/ubo.alignment.vert
+++ b/reference/shaders-msl/vert/ubo.alignment.vert
@@ -12,12 +12,6 @@ struct UBO
     float opacity;
 };
 
-struct main0_in
-{
-    float3 aNormal [[attribute(1)]];
-    float4 aVertex [[attribute(0)]];
-};
-
 struct main0_out
 {
     float3 vNormal [[user(locn0)]];
@@ -26,12 +20,18 @@ struct main0_out
     float4 gl_Position [[position]];
 };
 
+struct main0_in
+{
+    float4 aVertex [[attribute(0)]];
+    float3 aNormal [[attribute(1)]];
+};
+
 vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _18 [[buffer(0)]])
 {
     main0_out out = {};
     out.gl_Position = _18.mvp * in.aVertex;
     out.vNormal = in.aNormal;
-    out.vColor = _18.color * _18.opacity;
+    out.vColor = float3(_18.color) * _18.opacity;
     out.vSize = _18.targSize * _18.opacity;
     return out;
 }
diff --git a/reference/shaders-msl/vert/ubo.vert b/reference/shaders-msl/vert/ubo.vert
index 4a1adcd7f6..86ba1e9687 100644
--- a/reference/shaders-msl/vert/ubo.vert
+++ b/reference/shaders-msl/vert/ubo.vert
@@ -8,18 +8,18 @@ struct UBO
     float4x4 mvp;
 };
 
-struct main0_in
-{
-    float3 aNormal [[attribute(1)]];
-    float4 aVertex [[attribute(0)]];
-};
-
 struct main0_out
 {
     float3 vNormal [[user(locn0)]];
     float4 gl_Position [[position]];
 };
 
+struct main0_in
+{
+    float4 aVertex [[attribute(0)]];
+    float3 aNormal [[attribute(1)]];
+};
+
 vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _16 [[buffer(0)]])
 {
     main0_out out = {};
diff --git a/reference/shaders-msl/vulkan/frag/push-constant.vk.frag b/reference/shaders-msl/vulkan/frag/push-constant.vk.frag
index bc97e3cc51..7b8c502b32 100644
--- a/reference/shaders-msl/vulkan/frag/push-constant.vk.frag
+++ b/reference/shaders-msl/vulkan/frag/push-constant.vk.frag
@@ -9,16 +9,16 @@ struct PushConstants
     float4 value1;
 };
 
-struct main0_in
-{
-    float4 vColor [[user(locn0)]];
-};
-
 struct main0_out
 {
     float4 FragColor [[color(0)]];
 };
 
+struct main0_in
+{
+    float4 vColor [[user(locn0)]];
+};
+
 fragment main0_out main0(main0_in in [[stage_in]], constant PushConstants& push [[buffer(0)]])
 {
     main0_out out = {};
diff --git a/reference/shaders-msl/vulkan/frag/spec-constant.vk.frag b/reference/shaders-msl/vulkan/frag/spec-constant.vk.frag
index 47dabb1771..aa10a501a6 100644
--- a/reference/shaders-msl/vulkan/frag/spec-constant.vk.frag
+++ b/reference/shaders-msl/vulkan/frag/spec-constant.vk.frag
@@ -9,16 +9,52 @@ constant float b_tmp [[function_constant(2)]];
 constant float b = is_function_constant_defined(b_tmp) ? b_tmp : 2.0;
 constant int c_tmp [[function_constant(3)]];
 constant int c = is_function_constant_defined(c_tmp) ? c_tmp : 3;
+constant uint _18 = (uint(c) + 0u);
+constant int _21 = (-c);
+constant int _23 = (~c);
 constant int d_tmp [[function_constant(4)]];
 constant int d = is_function_constant_defined(d_tmp) ? d_tmp : 4;
+constant int _26 = (c + d);
+constant int _28 = (c - d);
+constant int _30 = (c * d);
+constant int _32 = (c / d);
 constant uint e_tmp [[function_constant(5)]];
 constant uint e = is_function_constant_defined(e_tmp) ? e_tmp : 5u;
 constant uint f_tmp [[function_constant(6)]];
 constant uint f = is_function_constant_defined(f_tmp) ? f_tmp : 6u;
+constant uint _36 = (e / f);
+constant int _38 = (c % d);
+constant uint _40 = (e % f);
+constant int _42 = (c >> d);
+constant uint _44 = (e >> f);
+constant int _46 = (c << d);
+constant int _48 = (c | d);
+constant int _50 = (c ^ d);
+constant int _52 = (c & d);
 constant bool g_tmp [[function_constant(7)]];
 constant bool g = is_function_constant_defined(g_tmp) ? g_tmp : false;
 constant bool h_tmp [[function_constant(8)]];
 constant bool h = is_function_constant_defined(h_tmp) ? h_tmp : true;
+constant bool _58 = (g || h);
+constant bool _60 = (g && h);
+constant bool _62 = (!g);
+constant bool _64 = (g == h);
+constant bool _66 = (g != h);
+constant bool _68 = (c == d);
+constant bool _70 = (c != d);
+constant bool _72 = (c < d);
+constant bool _74 = (e < f);
+constant bool _76 = (c > d);
+constant bool _78 = (e > f);
+constant bool _80 = (c <= d);
+constant bool _82 = (e <= f);
+constant bool _84 = (c >= d);
+constant bool _86 = (e >= f);
+constant int _92 = int(e + 0u);
+constant bool _94 = (c != int(0u));
+constant bool _96 = (e != 0u);
+constant int _100 = int(g);
+constant uint _103 = uint(g);
 
 struct main0_out
 {
@@ -30,43 +66,43 @@ fragment main0_out main0()
     main0_out out = {};
     float t0 = a;
     float t1 = b;
-    uint c0 = (uint(c) + 0u);
-    int c1 = (-c);
-    int c2 = (~c);
-    int c3 = (c + d);
-    int c4 = (c - d);
-    int c5 = (c * d);
-    int c6 = (c / d);
-    uint c7 = (e / f);
-    int c8 = (c % d);
-    uint c9 = (e % f);
-    int c10 = (c >> d);
-    uint c11 = (e >> f);
-    int c12 = (c << d);
-    int c13 = (c | d);
-    int c14 = (c ^ d);
-    int c15 = (c & d);
-    bool c16 = (g || h);
-    bool c17 = (g && h);
-    bool c18 = (!g);
-    bool c19 = (g == h);
-    bool c20 = (g != h);
-    bool c21 = (c == d);
-    bool c22 = (c != d);
-    bool c23 = (c < d);
-    bool c24 = (e < f);
-    bool c25 = (c > d);
-    bool c26 = (e > f);
-    bool c27 = (c <= d);
-    bool c28 = (e <= f);
-    bool c29 = (c >= d);
-    bool c30 = (e >= f);
+    uint c0 = _18;
+    int c1 = _21;
+    int c2 = _23;
+    int c3 = _26;
+    int c4 = _28;
+    int c5 = _30;
+    int c6 = _32;
+    uint c7 = _36;
+    int c8 = _38;
+    uint c9 = _40;
+    int c10 = _42;
+    uint c11 = _44;
+    int c12 = _46;
+    int c13 = _48;
+    int c14 = _50;
+    int c15 = _52;
+    bool c16 = _58;
+    bool c17 = _60;
+    bool c18 = _62;
+    bool c19 = _64;
+    bool c20 = _66;
+    bool c21 = _68;
+    bool c22 = _70;
+    bool c23 = _72;
+    bool c24 = _74;
+    bool c25 = _76;
+    bool c26 = _78;
+    bool c27 = _80;
+    bool c28 = _82;
+    bool c29 = _84;
+    bool c30 = _86;
     int c31 = c8 + c3;
-    int c32 = int(e + 0u);
-    bool c33 = (c != int(0u));
-    bool c34 = (e != 0u);
-    int c35 = int(g);
-    uint c36 = uint(g);
+    int c32 = _92;
+    bool c33 = _94;
+    bool c34 = _96;
+    int c35 = _100;
+    uint c36 = _103;
     float c37 = float(g);
     out.FragColor = float4(t0 + t1);
     return out;
diff --git a/reference/opt/shaders/asm/frag/inliner-dominator-inside-loop.asm.frag b/reference/shaders-no-opt/asm/frag/inliner-dominator-inside-loop.asm.frag
similarity index 97%
rename from reference/opt/shaders/asm/frag/inliner-dominator-inside-loop.asm.frag
rename to reference/shaders-no-opt/asm/frag/inliner-dominator-inside-loop.asm.frag
index 98116cfdc7..01797173f1 100644
--- a/reference/opt/shaders/asm/frag/inliner-dominator-inside-loop.asm.frag
+++ b/reference/shaders-no-opt/asm/frag/inliner-dominator-inside-loop.asm.frag
@@ -147,6 +147,7 @@ void main()
         _193 = _192;
         break;
     } while (false);
+    vec4 _194 = _193 * 1.0;
     vec4 _220;
     do
     {
@@ -172,7 +173,7 @@ void main()
     vec3 _253 = vec3(_252.x, _252.y, _232.z);
     vec2 _255 = _253.xy * _165;
     vec3 _256 = vec3(_255.x, _255.y, _253.z);
-    vec3 _271 = ((IN_Color.xyz * (_193 * 1.0).xyz) * (1.0 + (_256.x * 0.300000011920928955078125))) * (texture(SPIRV_Cross_CombinedStudsMapTextureStudsMapSampler, _156.UvStuds).x * 2.0);
+    vec3 _271 = ((IN_Color.xyz * _194.xyz) * (1.0 + (_256.x * 0.300000011920928955078125))) * (texture(SPIRV_Cross_CombinedStudsMapTextureStudsMapSampler, _156.UvStuds).x * 2.0);
     vec4 _298;
     do
     {
diff --git a/reference/shaders-no-opt/asm/vert/empty-struct-composite.asm.vert b/reference/shaders-no-opt/asm/vert/empty-struct-composite.asm.vert
new file mode 100644
index 0000000000..8f786d49e1
--- /dev/null
+++ b/reference/shaders-no-opt/asm/vert/empty-struct-composite.asm.vert
@@ -0,0 +1,13 @@
+#version 450
+
+struct Test
+{
+    int empty_struct_member;
+};
+
+void main()
+{
+    Test _14 = Test(0);
+    Test t = _14;
+}
+
diff --git a/reference/shaders-no-opt/asm/vert/semantic-decoration.asm.vert b/reference/shaders-no-opt/asm/vert/semantic-decoration.asm.vert
new file mode 100644
index 0000000000..9af0e241eb
--- /dev/null
+++ b/reference/shaders-no-opt/asm/vert/semantic-decoration.asm.vert
@@ -0,0 +1,25 @@
+#version 450
+
+struct VOut
+{
+    vec4 p;
+    vec4 c;
+};
+
+layout(location = 0) out vec4 _entryPointOutput_c;
+
+VOut _main()
+{
+    VOut v;
+    v.p = vec4(1.0);
+    v.c = vec4(2.0);
+    return v;
+}
+
+void main()
+{
+    VOut flattenTemp = _main();
+    gl_Position = flattenTemp.p;
+    _entryPointOutput_c = flattenTemp.c;
+}
+
diff --git a/reference/opt/shaders/comp/bitfield.noopt.comp b/reference/shaders-no-opt/comp/bitfield.comp
similarity index 100%
rename from reference/opt/shaders/comp/bitfield.noopt.comp
rename to reference/shaders-no-opt/comp/bitfield.comp
diff --git a/reference/opt/shaders/comp/loop.noopt.comp b/reference/shaders-no-opt/comp/loop.comp
similarity index 100%
rename from reference/opt/shaders/comp/loop.noopt.comp
rename to reference/shaders-no-opt/comp/loop.comp
diff --git a/reference/shaders/comp/return.comp b/reference/shaders-no-opt/comp/return.comp
similarity index 100%
rename from reference/shaders/comp/return.comp
rename to reference/shaders-no-opt/comp/return.comp
diff --git a/reference/shaders/vulkan/frag/spec-constant.vk.frag b/reference/shaders-no-opt/vulkan/frag/spec-constant.vk.frag
similarity index 100%
rename from reference/shaders/vulkan/frag/spec-constant.vk.frag
rename to reference/shaders-no-opt/vulkan/frag/spec-constant.vk.frag
diff --git a/reference/shaders-no-opt/vulkan/frag/spec-constant.vk.frag.vk b/reference/shaders-no-opt/vulkan/frag/spec-constant.vk.frag.vk
new file mode 100644
index 0000000000..c5ae60b276
--- /dev/null
+++ b/reference/shaders-no-opt/vulkan/frag/spec-constant.vk.frag.vk
@@ -0,0 +1,107 @@
+#version 310 es
+precision mediump float;
+precision highp int;
+
+layout(constant_id = 1) const float a = 1.0;
+layout(constant_id = 2) const float b = 2.0;
+layout(constant_id = 3) const int c = 3;
+const uint _18 = (uint(c) + 0u);
+const int _21 = (-c);
+const int _23 = (~c);
+layout(constant_id = 4) const int d = 4;
+const int _26 = (c + d);
+const int _28 = (c - d);
+const int _30 = (c * d);
+const int _32 = (c / d);
+layout(constant_id = 5) const uint e = 5u;
+layout(constant_id = 6) const uint f = 6u;
+const uint _36 = (e / f);
+const int _38 = (c % d);
+const uint _40 = (e % f);
+const int _42 = (c >> d);
+const uint _44 = (e >> f);
+const int _46 = (c << d);
+const int _48 = (c | d);
+const int _50 = (c ^ d);
+const int _52 = (c & d);
+layout(constant_id = 7) const bool g = false;
+layout(constant_id = 8) const bool h = true;
+const bool _58 = (g || h);
+const bool _60 = (g && h);
+const bool _62 = (!g);
+const bool _64 = (g == h);
+const bool _66 = (g != h);
+const bool _68 = (c == d);
+const bool _70 = (c != d);
+const bool _72 = (c < d);
+const bool _74 = (e < f);
+const bool _76 = (c > d);
+const bool _78 = (e > f);
+const bool _80 = (c <= d);
+const bool _82 = (e <= f);
+const bool _84 = (c >= d);
+const bool _86 = (e >= f);
+const int _92 = int(e + 0u);
+const bool _94 = (c != int(0u));
+const bool _96 = (e != 0u);
+const int _100 = int(g);
+const uint _103 = uint(g);
+const int _118 = (c + 3);
+const int _127 = (c + 2);
+const int _135 = (d + 2);
+
+struct Foo
+{
+    float elems[_135];
+};
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    float t0 = a;
+    float t1 = b;
+    mediump uint c0 = _18;
+    mediump int c1 = _21;
+    mediump int c2 = _23;
+    mediump int c3 = _26;
+    mediump int c4 = _28;
+    mediump int c5 = _30;
+    mediump int c6 = _32;
+    mediump uint c7 = _36;
+    mediump int c8 = _38;
+    mediump uint c9 = _40;
+    mediump int c10 = _42;
+    mediump uint c11 = _44;
+    mediump int c12 = _46;
+    mediump int c13 = _48;
+    mediump int c14 = _50;
+    mediump int c15 = _52;
+    bool c16 = _58;
+    bool c17 = _60;
+    bool c18 = _62;
+    bool c19 = _64;
+    bool c20 = _66;
+    bool c21 = _68;
+    bool c22 = _70;
+    bool c23 = _72;
+    bool c24 = _74;
+    bool c25 = _76;
+    bool c26 = _78;
+    bool c27 = _80;
+    bool c28 = _82;
+    bool c29 = _84;
+    bool c30 = _86;
+    mediump int c31 = c8 + c3;
+    mediump int c32 = _92;
+    bool c33 = _94;
+    bool c34 = _96;
+    mediump int c35 = _100;
+    mediump uint c36 = _103;
+    float c37 = float(g);
+    float vec0[_118][8];
+    float vec1[_127];
+    Foo foo;
+    FragColor = ((vec4(t0 + t1) + vec4(vec0[0][0])) + vec4(vec1[0])) + vec4(foo.elems[c]);
+}
+
diff --git a/reference/shaders/amd/fragmentMaskFetch_subpassInput.vk.nocompat.invalid.frag b/reference/shaders/amd/fragmentMaskFetch_subpassInput.vk.nocompat.invalid.frag
deleted file mode 100644
index d670898481..0000000000
--- a/reference/shaders/amd/fragmentMaskFetch_subpassInput.vk.nocompat.invalid.frag
+++ /dev/null
@@ -1,11 +0,0 @@
-#version 450
-#extension GL_AMD_shader_fragment_mask : require
-
-layout(binding = 0) uniform sampler2DMS t;
-
-void main()
-{
-    vec4 test2 = fragmentFetchAMD(t, 4u);
-    uint testi2 = fragmentMaskFetchAMD(t);
-}
-
diff --git a/reference/shaders/amd/fs.invalid.frag b/reference/shaders/amd/fs.invalid.frag
index 97e7bcd180..aecf69eba7 100644
--- a/reference/shaders/amd/fs.invalid.frag
+++ b/reference/shaders/amd/fs.invalid.frag
@@ -2,9 +2,9 @@
 #extension GL_AMD_shader_fragment_mask : require
 #extension GL_AMD_shader_explicit_vertex_parameter : require
 
-uniform sampler2DMS texture1;
+layout(binding = 0) uniform sampler2DMS texture1;
 
-layout(location = 0) in vec4 vary;
+layout(location = 0) __explicitInterpAMD in vec4 vary;
 
 void main()
 {
diff --git a/reference/shaders/amd/shader_ballot.comp b/reference/shaders/amd/shader_ballot.comp
index 64ac64d0d2..1fade727c6 100644
--- a/reference/shaders/amd/shader_ballot.comp
+++ b/reference/shaders/amd/shader_ballot.comp
@@ -18,7 +18,7 @@ void main()
 {
     float thisLaneData = _12.inputDataArray[gl_LocalInvocationID.x];
     bool laneActive = thisLaneData > 0.0;
-    uint thisLaneOutputSlot = mbcntAMD(packUint2x32(uvec2(unpackUint2x32(ballotARB(laneActive)).xy)));
+    uint thisLaneOutputSlot = mbcntAMD(packUint2x32(uvec2(uvec4(unpackUint2x32(ballotARB(laneActive)), 0u, 0u).xy)));
     int firstInvocation = readFirstInvocationARB(1);
     int invocation = readInvocationARB(1, 0u);
     vec3 swizzleInvocations = swizzleInvocationsAMD(vec3(0.0, 2.0, 1.0), uvec4(3u));
diff --git a/reference/shaders/asm/comp/bitcast_iequal.asm.comp b/reference/shaders/asm/comp/bitcast_iequal.asm.comp
index 79398b404b..bdb3eeb9af 100644
--- a/reference/shaders/asm/comp/bitcast_iequal.asm.comp
+++ b/reference/shaders/asm/comp/bitcast_iequal.asm.comp
@@ -15,10 +15,12 @@ layout(binding = 1, std430) buffer _4_6
 
 void main()
 {
-    bvec4 _34 = equal(ivec4(_5._m1), _5._m0);
-    bvec4 _35 = equal(_5._m0, ivec4(_5._m1));
-    bvec4 _36 = equal(_5._m1, _5._m1);
-    bvec4 _37 = equal(_5._m0, _5._m0);
+    ivec4 _30 = _5._m0;
+    uvec4 _31 = _5._m1;
+    bvec4 _34 = equal(ivec4(_31), _30);
+    bvec4 _35 = equal(_30, ivec4(_31));
+    bvec4 _36 = equal(_31, _31);
+    bvec4 _37 = equal(_30, _30);
     _6._m0 = mix(uvec4(0u), uvec4(1u), _34);
     _6._m0 = mix(uvec4(0u), uvec4(1u), _35);
     _6._m0 = mix(uvec4(0u), uvec4(1u), _36);
diff --git a/reference/shaders/asm/comp/bitcast_sar.asm.comp b/reference/shaders/asm/comp/bitcast_sar.asm.comp
index 42a4ed0233..283b444cce 100644
--- a/reference/shaders/asm/comp/bitcast_sar.asm.comp
+++ b/reference/shaders/asm/comp/bitcast_sar.asm.comp
@@ -15,13 +15,15 @@ layout(binding = 1, std430) buffer _4_6
 
 void main()
 {
-    _6._m0 = uvec4(ivec4(_5._m1) >> _5._m0);
-    _6._m0 = uvec4(_5._m0 >> ivec4(_5._m1));
-    _6._m0 = uvec4(ivec4(_5._m1) >> ivec4(_5._m1));
-    _6._m0 = uvec4(_5._m0 >> _5._m0);
-    _6._m1 = ivec4(_5._m1) >> ivec4(_5._m1);
-    _6._m1 = _5._m0 >> _5._m0;
-    _6._m1 = ivec4(_5._m1) >> _5._m0;
-    _6._m1 = _5._m0 >> ivec4(_5._m1);
+    ivec4 _22 = _5._m0;
+    uvec4 _23 = _5._m1;
+    _6._m0 = uvec4(ivec4(_23) >> _22);
+    _6._m0 = uvec4(_22 >> ivec4(_23));
+    _6._m0 = uvec4(ivec4(_23) >> ivec4(_23));
+    _6._m0 = uvec4(_22 >> _22);
+    _6._m1 = ivec4(_23) >> ivec4(_23);
+    _6._m1 = _22 >> _22;
+    _6._m1 = ivec4(_23) >> _22;
+    _6._m1 = _22 >> ivec4(_23);
 }
 
diff --git a/reference/shaders/asm/comp/bitcast_sdiv.asm.comp b/reference/shaders/asm/comp/bitcast_sdiv.asm.comp
index eeb97e14a2..e28c481d21 100644
--- a/reference/shaders/asm/comp/bitcast_sdiv.asm.comp
+++ b/reference/shaders/asm/comp/bitcast_sdiv.asm.comp
@@ -15,13 +15,15 @@ layout(binding = 1, std430) buffer _4_6
 
 void main()
 {
-    _6._m0 = uvec4(ivec4(_5._m1) / _5._m0);
-    _6._m0 = uvec4(_5._m0 / ivec4(_5._m1));
-    _6._m0 = uvec4(ivec4(_5._m1) / ivec4(_5._m1));
-    _6._m0 = uvec4(_5._m0 / _5._m0);
-    _6._m1 = ivec4(_5._m1) / ivec4(_5._m1);
-    _6._m1 = _5._m0 / _5._m0;
-    _6._m1 = ivec4(_5._m1) / _5._m0;
-    _6._m1 = _5._m0 / ivec4(_5._m1);
+    ivec4 _22 = _5._m0;
+    uvec4 _23 = _5._m1;
+    _6._m0 = uvec4(ivec4(_23) / _22);
+    _6._m0 = uvec4(_22 / ivec4(_23));
+    _6._m0 = uvec4(ivec4(_23) / ivec4(_23));
+    _6._m0 = uvec4(_22 / _22);
+    _6._m1 = ivec4(_23) / ivec4(_23);
+    _6._m1 = _22 / _22;
+    _6._m1 = ivec4(_23) / _22;
+    _6._m1 = _22 / ivec4(_23);
 }
 
diff --git a/reference/shaders/asm/comp/bitcast_slr.asm.comp b/reference/shaders/asm/comp/bitcast_slr.asm.comp
index 25245e63eb..78efaf3852 100644
--- a/reference/shaders/asm/comp/bitcast_slr.asm.comp
+++ b/reference/shaders/asm/comp/bitcast_slr.asm.comp
@@ -15,13 +15,15 @@ layout(binding = 1, std430) buffer _4_6
 
 void main()
 {
-    _6._m0 = _5._m1 >> uvec4(_5._m0);
-    _6._m0 = uvec4(_5._m0) >> _5._m1;
-    _6._m0 = _5._m1 >> _5._m1;
-    _6._m0 = uvec4(_5._m0) >> uvec4(_5._m0);
-    _6._m1 = ivec4(_5._m1 >> _5._m1);
-    _6._m1 = ivec4(uvec4(_5._m0) >> uvec4(_5._m0));
-    _6._m1 = ivec4(_5._m1 >> uvec4(_5._m0));
-    _6._m1 = ivec4(uvec4(_5._m0) >> _5._m1);
+    ivec4 _22 = _5._m0;
+    uvec4 _23 = _5._m1;
+    _6._m0 = _23 >> uvec4(_22);
+    _6._m0 = uvec4(_22) >> _23;
+    _6._m0 = _23 >> _23;
+    _6._m0 = uvec4(_22) >> uvec4(_22);
+    _6._m1 = ivec4(_23 >> _23);
+    _6._m1 = ivec4(uvec4(_22) >> uvec4(_22));
+    _6._m1 = ivec4(_23 >> uvec4(_22));
+    _6._m1 = ivec4(uvec4(_22) >> _23);
 }
 
diff --git a/reference/shaders/asm/comp/hlsl-functionality.asm.comp b/reference/shaders/asm/comp/hlsl-functionality.asm.comp
new file mode 100644
index 0000000000..ae3bb1f869
--- /dev/null
+++ b/reference/shaders/asm/comp/hlsl-functionality.asm.comp
@@ -0,0 +1,24 @@
+#version 450
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer Buf
+{
+    vec4 _data[];
+} Buf_1;
+
+layout(std430) buffer Buf_count
+{
+    int _count;
+} Buf_count_1;
+
+void _main()
+{
+    int _29 = atomicAdd(Buf_count_1._count, 1);
+    Buf_1._data[_29] = vec4(1.0);
+}
+
+void main()
+{
+    _main();
+}
+
diff --git a/reference/shaders/asm/comp/name-alias.asm.invalid.comp b/reference/shaders/asm/comp/name-alias.asm.invalid.comp
deleted file mode 100644
index 870b1df98d..0000000000
--- a/reference/shaders/asm/comp/name-alias.asm.invalid.comp
+++ /dev/null
@@ -1,37 +0,0 @@
-#version 310 es
-layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
-
-struct alias
-{
-    vec3 alias[100];
-};
-
-struct alias_1
-{
-    vec4 alias;
-    vec2 alias_1[10];
-    alias alias_2[2];
-};
-
-struct alias_2
-{
-    vec4 alias;
-    alias_1 alias_1;
-};
-
-layout(binding = 0, std430) buffer alias_3
-{
-    alias_2 alias;
-} alias_4;
-
-layout(binding = 1, std140) buffer alias_5
-{
-    alias_2 alias;
-} alias_6;
-
-void main()
-{
-    alias_2 alias_7 = alias_4.alias;
-    alias_6.alias = alias_7;
-}
-
diff --git a/reference/opt/shaders/asm/comp/storage-buffer-basic.asm.comp b/reference/shaders/asm/comp/storage-buffer-basic.invalid.asm.comp
similarity index 90%
rename from reference/opt/shaders/asm/comp/storage-buffer-basic.asm.comp
rename to reference/shaders/asm/comp/storage-buffer-basic.invalid.asm.comp
index 3de823fb10..a2210eb169 100644
--- a/reference/opt/shaders/asm/comp/storage-buffer-basic.asm.comp
+++ b/reference/shaders/asm/comp/storage-buffer-basic.invalid.asm.comp
@@ -11,6 +11,8 @@ layout(binding = 1, std430) buffer _6_9
     float _m0[];
 } _9;
 
+uvec3 _22 = gl_WorkGroupSize;
+
 void main()
 {
     _8._m0[gl_WorkGroupID.x] = _9._m0[gl_WorkGroupID.x] + _8._m0[gl_WorkGroupID.x];
diff --git a/reference/shaders/asm/frag/combined-sampler-reuse.vk.asm.frag b/reference/shaders/asm/frag/combined-sampler-reuse.vk.asm.frag
new file mode 100644
index 0000000000..b5e59f88bb
--- /dev/null
+++ b/reference/shaders/asm/frag/combined-sampler-reuse.vk.asm.frag
@@ -0,0 +1,13 @@
+#version 450
+
+uniform sampler2D SPIRV_Cross_CombineduTexuSampler;
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) in vec2 vUV;
+
+void main()
+{
+    FragColor = texture(SPIRV_Cross_CombineduTexuSampler, vUV);
+    FragColor += textureOffset(SPIRV_Cross_CombineduTexuSampler, vUV, ivec2(1));
+}
+
diff --git a/reference/shaders/asm/frag/combined-sampler-reuse.vk.asm.frag.vk b/reference/shaders/asm/frag/combined-sampler-reuse.vk.asm.frag.vk
new file mode 100644
index 0000000000..bce9808950
--- /dev/null
+++ b/reference/shaders/asm/frag/combined-sampler-reuse.vk.asm.frag.vk
@@ -0,0 +1,14 @@
+#version 450
+
+layout(set = 0, binding = 1) uniform texture2D uTex;
+layout(set = 0, binding = 0) uniform sampler uSampler;
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) in vec2 vUV;
+
+void main()
+{
+    FragColor = texture(sampler2D(uTex, uSampler), vUV);
+    FragColor += textureOffset(sampler2D(uTex, uSampler), vUV, ivec2(1));
+}
+
diff --git a/reference/shaders/asm/frag/complex-name-workarounds.asm.frag b/reference/shaders/asm/frag/complex-name-workarounds.asm.frag
new file mode 100644
index 0000000000..7b120719e6
--- /dev/null
+++ b/reference/shaders/asm/frag/complex-name-workarounds.asm.frag
@@ -0,0 +1,28 @@
+#version 450
+
+layout(location = 0) in vec4 _;
+layout(location = 1) in vec4 a;
+layout(location = 0) out vec4 b;
+
+vec4 fu_nc_(vec4 a_)
+{
+    return a_;
+}
+
+vec4 fu_nc_1(vec4 _0_1)
+{
+    return _0_1;
+}
+
+void main()
+{
+    vec4 b_1 = _;
+    vec4 _0_1 = (_ + a) + fu_nc_(b_1);
+    vec4 b_3 = a;
+    vec4 b_2 = (_ - a) + fu_nc_1(b_3);
+    b = _0_1;
+    b = b_2;
+    b = _0_1;
+    b = b_2;
+}
+
diff --git a/reference/shaders/asm/frag/empty-struct.asm.frag b/reference/shaders/asm/frag/empty-struct.asm.frag
new file mode 100644
index 0000000000..7c9d39338e
--- /dev/null
+++ b/reference/shaders/asm/frag/empty-struct.asm.frag
@@ -0,0 +1,25 @@
+#version 450
+
+struct EmptyStructTest
+{
+    int empty_struct_member;
+};
+
+float GetValue(EmptyStructTest self)
+{
+    return 0.0;
+}
+
+float GetValue_1(EmptyStructTest self)
+{
+    return 0.0;
+}
+
+void main()
+{
+    EmptyStructTest _23 = EmptyStructTest(0);
+    EmptyStructTest emptyStruct;
+    float value = GetValue(emptyStruct);
+    value = GetValue_1(_23);
+}
+
diff --git a/reference/shaders/asm/frag/image-extract-reuse.asm.frag b/reference/shaders/asm/frag/image-extract-reuse.asm.frag
new file mode 100644
index 0000000000..ab2749b4df
--- /dev/null
+++ b/reference/shaders/asm/frag/image-extract-reuse.asm.frag
@@ -0,0 +1,11 @@
+#version 450
+
+layout(binding = 0) uniform sampler2D uTexture;
+
+layout(location = 0) out ivec2 Size;
+
+void main()
+{
+    Size = textureSize(uTexture, 0) + textureSize(uTexture, 1);
+}
+
diff --git a/reference/shaders/asm/frag/image-fetch-no-sampler.asm.vk.frag.vk b/reference/shaders/asm/frag/image-fetch-no-sampler.asm.vk.frag.vk
index 3682eaee62..e4d9fc4543 100644
--- a/reference/shaders/asm/frag/image-fetch-no-sampler.asm.vk.frag.vk
+++ b/reference/shaders/asm/frag/image-fetch-no-sampler.asm.vk.frag.vk
@@ -2,7 +2,7 @@
 
 layout(set = 0, binding = 0) uniform sampler Sampler;
 layout(set = 0, binding = 0) uniform texture2D SampledImage;
-uniform sampler SPIRV_Cross_DummySampler;
+layout(set = 0, binding = 0) uniform sampler SPIRV_Cross_DummySampler;
 
 layout(location = 0) out vec4 _entryPointOutput;
 
diff --git a/reference/shaders/asm/frag/image-query-no-sampler.vk.asm.frag b/reference/shaders/asm/frag/image-query-no-sampler.vk.asm.frag
new file mode 100644
index 0000000000..2040dd1afb
--- /dev/null
+++ b/reference/shaders/asm/frag/image-query-no-sampler.vk.asm.frag
@@ -0,0 +1,13 @@
+#version 450
+
+uniform sampler2D SPIRV_Cross_CombineduSampler2DSPIRV_Cross_DummySampler;
+uniform sampler2DMS SPIRV_Cross_CombineduSampler2DMSSPIRV_Cross_DummySampler;
+
+void main()
+{
+    ivec2 b = textureSize(SPIRV_Cross_CombineduSampler2DSPIRV_Cross_DummySampler, 0);
+    ivec2 c = textureSize(SPIRV_Cross_CombineduSampler2DMSSPIRV_Cross_DummySampler);
+    int l1 = textureQueryLevels(SPIRV_Cross_CombineduSampler2DSPIRV_Cross_DummySampler);
+    int s0 = textureSamples(SPIRV_Cross_CombineduSampler2DMSSPIRV_Cross_DummySampler);
+}
+
diff --git a/reference/shaders/asm/frag/image-query-no-sampler.vk.asm.frag.vk b/reference/shaders/asm/frag/image-query-no-sampler.vk.asm.frag.vk
new file mode 100644
index 0000000000..828d2a8727
--- /dev/null
+++ b/reference/shaders/asm/frag/image-query-no-sampler.vk.asm.frag.vk
@@ -0,0 +1,14 @@
+#version 450
+
+layout(set = 0, binding = 0) uniform texture2D uSampler2D;
+layout(set = 0, binding = 0) uniform texture2DMS uSampler2DMS;
+layout(set = 0, binding = 0) uniform sampler SPIRV_Cross_DummySampler;
+
+void main()
+{
+    ivec2 b = textureSize(sampler2D(uSampler2D, SPIRV_Cross_DummySampler), 0);
+    ivec2 c = textureSize(sampler2DMS(uSampler2DMS, SPIRV_Cross_DummySampler));
+    int l1 = textureQueryLevels(sampler2D(uSampler2D, SPIRV_Cross_DummySampler));
+    int s0 = textureSamples(sampler2DMS(uSampler2DMS, SPIRV_Cross_DummySampler));
+}
+
diff --git a/reference/shaders/asm/frag/implicit-read-dep-phi.asm.frag b/reference/shaders/asm/frag/implicit-read-dep-phi.asm.frag
new file mode 100644
index 0000000000..6bc1be0d0f
--- /dev/null
+++ b/reference/shaders/asm/frag/implicit-read-dep-phi.asm.frag
@@ -0,0 +1,39 @@
+#version 450
+
+layout(binding = 0) uniform sampler2D uImage;
+
+layout(location = 0) in vec4 v0;
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    int i = 0;
+    float phi;
+    vec4 _36;
+    phi = 1.0;
+    _36 = vec4(1.0, 2.0, 1.0, 2.0);
+    for (;;)
+    {
+        FragColor = _36;
+        if (i < 4)
+        {
+            if (v0[i] > 0.0)
+            {
+                vec2 _48 = vec2(phi);
+                i++;
+                phi += 2.0;
+                _36 = textureLod(uImage, _48, 0.0);
+                continue;
+            }
+            else
+            {
+                break;
+            }
+        }
+        else
+        {
+            break;
+        }
+    }
+}
+
diff --git a/reference/shaders/asm/frag/inliner-dominator-inside-loop.asm.frag b/reference/shaders/asm/frag/inliner-dominator-inside-loop.asm.frag
deleted file mode 100644
index 98116cfdc7..0000000000
--- a/reference/shaders/asm/frag/inliner-dominator-inside-loop.asm.frag
+++ /dev/null
@@ -1,227 +0,0 @@
-#version 450
-
-struct VertexOutput
-{
-    vec4 HPosition;
-    vec4 Uv_EdgeDistance1;
-    vec4 UvStuds_EdgeDistance2;
-    vec4 Color;
-    vec4 LightPosition_Fog;
-    vec4 View_Depth;
-    vec4 Normal_SpecPower;
-    vec3 Tangent;
-    vec4 PosLightSpace_Reflectance;
-    float studIndex;
-};
-
-struct Surface
-{
-    vec3 albedo;
-    vec3 normal;
-    float specular;
-    float gloss;
-    float reflectance;
-    float opacity;
-};
-
-struct SurfaceInput
-{
-    vec4 Color;
-    vec2 Uv;
-    vec2 UvStuds;
-};
-
-struct Globals
-{
-    mat4 ViewProjection;
-    vec4 ViewRight;
-    vec4 ViewUp;
-    vec4 ViewDir;
-    vec3 CameraPosition;
-    vec3 AmbientColor;
-    vec3 Lamp0Color;
-    vec3 Lamp0Dir;
-    vec3 Lamp1Color;
-    vec4 FogParams;
-    vec3 FogColor;
-    vec4 LightBorder;
-    vec4 LightConfig0;
-    vec4 LightConfig1;
-    vec4 LightConfig2;
-    vec4 LightConfig3;
-    vec4 RefractionBias_FadeDistance_GlowFactor;
-    vec4 OutlineBrightness_ShadowInfo;
-    vec4 ShadowMatrix0;
-    vec4 ShadowMatrix1;
-    vec4 ShadowMatrix2;
-};
-
-struct Params
-{
-    vec4 LqmatFarTilingFactor;
-};
-
-layout(binding = 0, std140) uniform CB0
-{
-    Globals CB0;
-} _19;
-
-uniform sampler2D SPIRV_Cross_CombinedDiffuseMapTextureDiffuseMapSampler;
-uniform sampler2D SPIRV_Cross_CombinedNormalMapTextureNormalMapSampler;
-uniform sampler2D SPIRV_Cross_CombinedNormalDetailMapTextureNormalDetailMapSampler;
-uniform sampler2D SPIRV_Cross_CombinedStudsMapTextureStudsMapSampler;
-uniform sampler2D SPIRV_Cross_CombinedSpecularMapTextureSpecularMapSampler;
-uniform sampler3D SPIRV_Cross_CombinedLightMapTextureLightMapSampler;
-uniform sampler2D SPIRV_Cross_CombinedShadowMapTextureShadowMapSampler;
-uniform samplerCube SPIRV_Cross_CombinedEnvironmentMapTextureEnvironmentMapSampler;
-
-layout(location = 0) in vec4 IN_Uv_EdgeDistance1;
-layout(location = 1) in vec4 IN_UvStuds_EdgeDistance2;
-layout(location = 2) in vec4 IN_Color;
-layout(location = 3) in vec4 IN_LightPosition_Fog;
-layout(location = 4) in vec4 IN_View_Depth;
-layout(location = 5) in vec4 IN_Normal_SpecPower;
-layout(location = 6) in vec3 IN_Tangent;
-layout(location = 7) in vec4 IN_PosLightSpace_Reflectance;
-layout(location = 8) in float IN_studIndex;
-layout(location = 0) out vec4 _entryPointOutput;
-
-VertexOutput _121;
-SurfaceInput _122;
-vec2 _123;
-vec4 _124;
-Surface _125;
-vec4 _192;
-vec4 _219;
-vec4 _297;
-
-void main()
-{
-    VertexOutput _128 = _121;
-    _128.HPosition = gl_FragCoord;
-    VertexOutput _130 = _128;
-    _130.Uv_EdgeDistance1 = IN_Uv_EdgeDistance1;
-    VertexOutput _132 = _130;
-    _132.UvStuds_EdgeDistance2 = IN_UvStuds_EdgeDistance2;
-    VertexOutput _134 = _132;
-    _134.Color = IN_Color;
-    VertexOutput _136 = _134;
-    _136.LightPosition_Fog = IN_LightPosition_Fog;
-    VertexOutput _138 = _136;
-    _138.View_Depth = IN_View_Depth;
-    VertexOutput _140 = _138;
-    _140.Normal_SpecPower = IN_Normal_SpecPower;
-    VertexOutput _142 = _140;
-    _142.Tangent = IN_Tangent;
-    VertexOutput _144 = _142;
-    _144.PosLightSpace_Reflectance = IN_PosLightSpace_Reflectance;
-    VertexOutput _146 = _144;
-    _146.studIndex = IN_studIndex;
-    SurfaceInput _147 = _122;
-    _147.Color = IN_Color;
-    SurfaceInput _149 = _147;
-    _149.Uv = IN_Uv_EdgeDistance1.xy;
-    SurfaceInput _151 = _149;
-    _151.UvStuds = IN_UvStuds_EdgeDistance2.xy;
-    SurfaceInput _156 = _151;
-    _156.UvStuds.y = (fract(_151.UvStuds.y) + IN_studIndex) * 0.25;
-    float _163 = _146.View_Depth.w * _19.CB0.RefractionBias_FadeDistance_GlowFactor.y;
-    float _165 = clamp(1.0 - _163, 0.0, 1.0);
-    vec2 _166 = IN_Uv_EdgeDistance1.xy * 1.0;
-    bool _173;
-    vec4 _193;
-    do
-    {
-        _173 = 0.0 == 0.0;
-        if (_173)
-        {
-            _193 = texture(SPIRV_Cross_CombinedDiffuseMapTextureDiffuseMapSampler, _166);
-            break;
-        }
-        else
-        {
-            float _180 = 1.0 / (1.0 - 0.0);
-            _193 = mix(texture(SPIRV_Cross_CombinedDiffuseMapTextureDiffuseMapSampler, _166 * 0.25), texture(SPIRV_Cross_CombinedDiffuseMapTextureDiffuseMapSampler, _166), vec4(clamp((clamp(1.0 - (_146.View_Depth.w * 0.00333332992158830165863037109375), 0.0, 1.0) * _180) - (0.0 * _180), 0.0, 1.0)));
-            break;
-        }
-        _193 = _192;
-        break;
-    } while (false);
-    vec4 _220;
-    do
-    {
-        if (_173)
-        {
-            _220 = texture(SPIRV_Cross_CombinedNormalMapTextureNormalMapSampler, _166);
-            break;
-        }
-        else
-        {
-            float _207 = 1.0 / (1.0 - 0.0);
-            _220 = mix(texture(SPIRV_Cross_CombinedNormalMapTextureNormalMapSampler, _166 * 0.25), texture(SPIRV_Cross_CombinedNormalMapTextureNormalMapSampler, _166), vec4(clamp((_165 * _207) - (0.0 * _207), 0.0, 1.0)));
-            break;
-        }
-        _220 = _219;
-        break;
-    } while (false);
-    vec2 _223 = vec2(1.0);
-    vec2 _224 = (_220.wy * 2.0) - _223;
-    vec3 _232 = vec3(_224, sqrt(clamp(1.0 + dot(-_224, _224), 0.0, 1.0)));
-    vec2 _240 = (texture(SPIRV_Cross_CombinedNormalDetailMapTextureNormalDetailMapSampler, _166 * 0.0).wy * 2.0) - _223;
-    vec2 _252 = _232.xy + (vec3(_240, sqrt(clamp(1.0 + dot(-_240, _240), 0.0, 1.0))).xy * 0.0);
-    vec3 _253 = vec3(_252.x, _252.y, _232.z);
-    vec2 _255 = _253.xy * _165;
-    vec3 _256 = vec3(_255.x, _255.y, _253.z);
-    vec3 _271 = ((IN_Color.xyz * (_193 * 1.0).xyz) * (1.0 + (_256.x * 0.300000011920928955078125))) * (texture(SPIRV_Cross_CombinedStudsMapTextureStudsMapSampler, _156.UvStuds).x * 2.0);
-    vec4 _298;
-    do
-    {
-        if (0.75 == 0.0)
-        {
-            _298 = texture(SPIRV_Cross_CombinedSpecularMapTextureSpecularMapSampler, _166);
-            break;
-        }
-        else
-        {
-            float _285 = 1.0 / (1.0 - 0.75);
-            _298 = mix(texture(SPIRV_Cross_CombinedSpecularMapTextureSpecularMapSampler, _166 * 0.25), texture(SPIRV_Cross_CombinedSpecularMapTextureSpecularMapSampler, _166), vec4(clamp((_165 * _285) - (0.75 * _285), 0.0, 1.0)));
-            break;
-        }
-        _298 = _297;
-        break;
-    } while (false);
-    vec2 _303 = mix(vec2(0.800000011920928955078125, 120.0), (_298.xy * vec2(2.0, 256.0)) + vec2(0.0, 0.00999999977648258209228515625), vec2(_165));
-    Surface _304 = _125;
-    _304.albedo = _271;
-    Surface _305 = _304;
-    _305.normal = _256;
-    float _306 = _303.x;
-    Surface _307 = _305;
-    _307.specular = _306;
-    float _308 = _303.y;
-    Surface _309 = _307;
-    _309.gloss = _308;
-    float _312 = (_298.xy.y * _165) * 0.0;
-    Surface _313 = _309;
-    _313.reflectance = _312;
-    vec4 _318 = vec4(_271, _146.Color.w);
-    vec3 _329 = normalize(((IN_Tangent * _313.normal.x) + (cross(IN_Normal_SpecPower.xyz, IN_Tangent) * _313.normal.y)) + (IN_Normal_SpecPower.xyz * _313.normal.z));
-    vec3 _332 = -_19.CB0.Lamp0Dir;
-    float _333 = dot(_329, _332);
-    float _357 = clamp(dot(step(_19.CB0.LightConfig3.xyz, abs(IN_LightPosition_Fog.xyz - _19.CB0.LightConfig2.xyz)), vec3(1.0)), 0.0, 1.0);
-    vec4 _368 = mix(texture(SPIRV_Cross_CombinedLightMapTextureLightMapSampler, IN_LightPosition_Fog.xyz.yzx - (IN_LightPosition_Fog.xyz.yzx * _357)), _19.CB0.LightBorder, vec4(_357));
-    vec2 _376 = texture(SPIRV_Cross_CombinedShadowMapTextureShadowMapSampler, IN_PosLightSpace_Reflectance.xyz.xy).xy;
-    float _392 = (1.0 - (((step(_376.x, IN_PosLightSpace_Reflectance.xyz.z) * clamp(9.0 - (20.0 * abs(IN_PosLightSpace_Reflectance.xyz.z - 0.5)), 0.0, 1.0)) * _376.y) * _19.CB0.OutlineBrightness_ShadowInfo.w)) * _368.w;
-    vec3 _403 = mix(_318.xyz, texture(SPIRV_Cross_CombinedEnvironmentMapTextureEnvironmentMapSampler, reflect(-IN_View_Depth.xyz, _329)).xyz, vec3(_312));
-    vec4 _404 = vec4(_403.x, _403.y, _403.z, _318.w);
-    vec3 _422 = (((_19.CB0.AmbientColor + (((_19.CB0.Lamp0Color * clamp(_333, 0.0, 1.0)) + (_19.CB0.Lamp1Color * max(-_333, 0.0))) * _392)) + _368.xyz) * _404.xyz) + (_19.CB0.Lamp0Color * (((step(0.0, _333) * _306) * _392) * pow(clamp(dot(_329, normalize(_332 + normalize(IN_View_Depth.xyz))), 0.0, 1.0), _308)));
-    vec4 _425 = vec4(_422.x, _422.y, _422.z, _124.w);
-    _425.w = _404.w;
-    vec2 _435 = min(IN_Uv_EdgeDistance1.wz, IN_UvStuds_EdgeDistance2.wz);
-    float _439 = min(_435.x, _435.y) / _163;
-    vec3 _445 = _425.xyz * clamp((clamp((_163 * _19.CB0.OutlineBrightness_ShadowInfo.x) + _19.CB0.OutlineBrightness_ShadowInfo.y, 0.0, 1.0) * (1.5 - _439)) + _439, 0.0, 1.0);
-    vec4 _446 = vec4(_445.x, _445.y, _445.z, _425.w);
-    vec3 _453 = mix(_19.CB0.FogColor, _446.xyz, vec3(clamp(_146.LightPosition_Fog.w, 0.0, 1.0)));
-    _entryPointOutput = vec4(_453.x, _453.y, _453.z, _446.w);
-}
-
diff --git a/reference/shaders/asm/frag/loop-header-to-continue.asm.frag b/reference/shaders/asm/frag/loop-header-to-continue.asm.frag
index f3a6b4eceb..a99322d67e 100644
--- a/reference/shaders/asm/frag/loop-header-to-continue.asm.frag
+++ b/reference/shaders/asm/frag/loop-header-to-continue.asm.frag
@@ -28,11 +28,15 @@ void main()
     float _58;
     _55 = 0.0;
     _58 = 0.0;
-    float _64;
-    vec4 _72;
-    float _78;
-    for (int _60 = -3; _60 <= 3; _64 = float(_60), _72 = texture(SPIRV_Cross_CombinedmapTexturemapSampler, IN_uv + (vec2(0.0, _8.CB1.TextureSize.w) * _64)), _78 = exp(((-_64) * _64) * 0.2222220003604888916015625) * float(abs(_72.y - _50) < clamp((_50 * 80.0) * 0.0007999999797903001308441162109375, 7.999999797903001308441162109375e-05, 0.008000000379979610443115234375)), _55 += (_72.x * _78), _58 += _78, _60++)
+    for (int _60 = -3; _60 <= 3; )
     {
+        float _64 = float(_60);
+        vec4 _72 = texture(SPIRV_Cross_CombinedmapTexturemapSampler, IN_uv + (vec2(0.0, _8.CB1.TextureSize.w) * _64));
+        float _78 = exp(((-_64) * _64) * 0.2222220003604888916015625) * float(abs(_72.y - _50) < clamp((_50 * 80.0) * 0.0007999999797903001308441162109375, 7.999999797903001308441162109375e-05, 0.008000000379979610443115234375));
+        _55 += (_72.x * _78);
+        _58 += _78;
+        _60++;
+        continue;
     }
     _entryPointOutput = vec4(_55 / _58, _50, 0.0, 1.0);
 }
diff --git a/reference/shaders/asm/frag/loop-merge-to-continue.asm.frag b/reference/shaders/asm/frag/loop-merge-to-continue.asm.frag
new file mode 100644
index 0000000000..55db70cf6f
--- /dev/null
+++ b/reference/shaders/asm/frag/loop-merge-to-continue.asm.frag
@@ -0,0 +1,17 @@
+#version 450
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) in vec4 v0;
+
+void main()
+{
+    FragColor = vec4(1.0);
+    for (int i = 0; i < 4; i++)
+    {
+        for (int j = 0; j < 4; j++)
+        {
+            FragColor += vec4(v0[(i + j) & 3]);
+        }
+    }
+}
+
diff --git a/reference/shaders/asm/frag/sampler-buffer-array-without-sampler.asm.frag b/reference/shaders/asm/frag/sampler-buffer-array-without-sampler.asm.frag
new file mode 100644
index 0000000000..3dc1839d6b
--- /dev/null
+++ b/reference/shaders/asm/frag/sampler-buffer-array-without-sampler.asm.frag
@@ -0,0 +1,28 @@
+#version 450
+
+struct Registers
+{
+    int index;
+};
+
+uniform Registers registers;
+
+uniform sampler2D SPIRV_Cross_CombineduSamplerSPIRV_Cross_DummySampler[4];
+
+layout(location = 0) out vec4 FragColor;
+
+vec4 sample_from_func(sampler2D SPIRV_Cross_CombineduSamplerSPIRV_Cross_DummySampler_1[4])
+{
+    return texelFetch(SPIRV_Cross_CombineduSamplerSPIRV_Cross_DummySampler_1[registers.index], ivec2(4), 0);
+}
+
+vec4 sample_one_from_func(sampler2D SPIRV_Cross_CombineduSamplerSPIRV_Cross_DummySampler_1)
+{
+    return texelFetch(SPIRV_Cross_CombineduSamplerSPIRV_Cross_DummySampler_1, ivec2(4), 0);
+}
+
+void main()
+{
+    FragColor = (texelFetch(SPIRV_Cross_CombineduSamplerSPIRV_Cross_DummySampler[registers.index], ivec2(10), 0) + sample_from_func(SPIRV_Cross_CombineduSamplerSPIRV_Cross_DummySampler)) + sample_one_from_func(SPIRV_Cross_CombineduSamplerSPIRV_Cross_DummySampler[registers.index]);
+}
+
diff --git a/reference/shaders/asm/frag/sampler-buffer-without-sampler.asm.frag b/reference/shaders/asm/frag/sampler-buffer-without-sampler.asm.frag
index a4cf078308..1ebf8fb96b 100644
--- a/reference/shaders/asm/frag/sampler-buffer-without-sampler.asm.frag
+++ b/reference/shaders/asm/frag/sampler-buffer-without-sampler.asm.frag
@@ -1,7 +1,7 @@
 #version 450
 
-layout(rgba32f) uniform writeonly imageBuffer RWTex;
-uniform samplerBuffer Tex;
+layout(binding = 0, rgba32f) uniform writeonly imageBuffer RWTex;
+layout(binding = 1) uniform samplerBuffer Tex;
 
 layout(location = 0) out vec4 _entryPointOutput;
 
diff --git a/reference/shaders/asm/frag/selection-merge-to-continue.asm.frag b/reference/shaders/asm/frag/selection-merge-to-continue.asm.frag
new file mode 100644
index 0000000000..82b5973f8a
--- /dev/null
+++ b/reference/shaders/asm/frag/selection-merge-to-continue.asm.frag
@@ -0,0 +1,23 @@
+#version 450
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) in vec4 v0;
+
+void main()
+{
+    FragColor = vec4(1.0);
+    for (int i = 0; i < 4; i++)
+    {
+        if (v0.x == 20.0)
+        {
+            FragColor += vec4(v0[i & 3]);
+            continue;
+        }
+        else
+        {
+            FragColor += vec4(v0[i & 1]);
+            continue;
+        }
+    }
+}
+
diff --git a/reference/shaders/asm/frag/srem.asm.frag b/reference/shaders/asm/frag/srem.asm.frag
new file mode 100644
index 0000000000..05a3d7554f
--- /dev/null
+++ b/reference/shaders/asm/frag/srem.asm.frag
@@ -0,0 +1,13 @@
+#version 310 es
+precision mediump float;
+precision highp int;
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) flat in ivec4 vA;
+layout(location = 1) flat in ivec4 vB;
+
+void main()
+{
+    FragColor = vec4(vA - vB * (vA / vB));
+}
+
diff --git a/reference/shaders/asm/frag/switch-merge-to-continue.asm.frag b/reference/shaders/asm/frag/switch-merge-to-continue.asm.frag
new file mode 100644
index 0000000000..88f76cf1d6
--- /dev/null
+++ b/reference/shaders/asm/frag/switch-merge-to-continue.asm.frag
@@ -0,0 +1,30 @@
+#version 450
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    FragColor = vec4(1.0);
+    for (int i = 0; i < 4; i++)
+    {
+        switch (i)
+        {
+            case 0:
+            {
+                FragColor.x += 1.0;
+                break;
+            }
+            case 1:
+            {
+                FragColor.y += 3.0;
+                break;
+            }
+            default:
+            {
+                FragColor.z += 3.0;
+                break;
+            }
+        }
+    }
+}
+
diff --git a/reference/shaders/asm/frag/temporary-name-alias.asm.frag b/reference/shaders/asm/frag/temporary-name-alias.asm.frag
new file mode 100644
index 0000000000..927c0434a8
--- /dev/null
+++ b/reference/shaders/asm/frag/temporary-name-alias.asm.frag
@@ -0,0 +1,10 @@
+#version 450
+
+void main()
+{
+    float constituent = float(0);
+    mat3 _mat3 = mat3(vec3(constituent), vec3(constituent), vec3(constituent));
+    float constituent_1 = float(1);
+    _mat3 = mat3(vec3(constituent_1), vec3(constituent_1), vec3(constituent_1));
+}
+
diff --git a/reference/shaders/asm/frag/texel-fetch-no-lod.asm.frag b/reference/shaders/asm/frag/texel-fetch-no-lod.asm.frag
new file mode 100644
index 0000000000..6193de0da9
--- /dev/null
+++ b/reference/shaders/asm/frag/texel-fetch-no-lod.asm.frag
@@ -0,0 +1,11 @@
+#version 450
+
+layout(binding = 0) uniform sampler2D uTexture;
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    FragColor = texelFetch(uTexture, ivec2(gl_FragCoord.xy), 0);
+}
+
diff --git a/reference/shaders/asm/frag/vector-shuffle-oom.asm.frag b/reference/shaders/asm/frag/vector-shuffle-oom.asm.frag
index 1c211caa6d..cdaf78727e 100644
--- a/reference/shaders/asm/frag/vector-shuffle-oom.asm.frag
+++ b/reference/shaders/asm/frag/vector-shuffle-oom.asm.frag
@@ -110,7 +110,8 @@ void main()
     {
         _129 = _109;
     }
-    vec3 _133 = vec4(0.0).xyz + (_129 * 0.5);
+    vec3 _130 = _129 * 0.5;
+    vec3 _133 = vec4(0.0).xyz + _130;
     vec4 _134 = vec4(_133.x, _133.y, _133.z, vec4(0.0).w);
     _28 _135 = _77;
     _135._m0 = _134;
@@ -126,7 +127,8 @@ void main()
     {
         _176 = _156;
     }
-    vec3 _180 = _134.xyz + (_176 * 0.5);
+    vec3 _177 = _176 * 0.5;
+    vec3 _180 = _134.xyz + _177;
     vec4 _181 = vec4(_180.x, _180.y, _180.z, _134.w);
     _28 _182 = _135;
     _182._m0 = _181;
@@ -142,7 +144,8 @@ void main()
     {
         _223 = _203;
     }
-    vec3 _227 = _181.xyz + (_223 * 0.75);
+    vec3 _224 = _223 * 0.75;
+    vec3 _227 = _181.xyz + _224;
     vec4 _228 = vec4(_227.x, _227.y, _227.z, _181.w);
     _28 _229 = _182;
     _229._m0 = _228;
@@ -158,7 +161,8 @@ void main()
     {
         _270 = _250;
     }
-    vec3 _274 = _228.xyz + (_270 * 0.5);
+    vec3 _271 = _270 * 0.5;
+    vec3 _274 = _228.xyz + _271;
     vec4 _275 = vec4(_274.x, _274.y, _274.z, _228.w);
     _28 _276 = _229;
     _276._m0 = _275;
@@ -174,7 +178,8 @@ void main()
     {
         _317 = _297;
     }
-    vec3 _321 = _275.xyz + (_317 * 0.5);
+    vec3 _318 = _317 * 0.5;
+    vec3 _321 = _275.xyz + _318;
     vec4 _322 = vec4(_321.x, _321.y, _321.z, _275.w);
     _28 _323 = _276;
     _323._m0 = _322;
@@ -190,7 +195,8 @@ void main()
     {
         _364 = _344;
     }
-    vec3 _368 = _322.xyz + (_364 * 0.75);
+    vec3 _365 = _364 * 0.75;
+    vec3 _368 = _322.xyz + _365;
     vec4 _369 = vec4(_368.x, _368.y, _368.z, _322.w);
     _28 _370 = _323;
     _370._m0 = _369;
@@ -206,7 +212,8 @@ void main()
     {
         _411 = _391;
     }
-    vec3 _415 = _369.xyz + (_411 * 1.0);
+    vec3 _412 = _411 * 1.0;
+    vec3 _415 = _369.xyz + _412;
     vec4 _416 = vec4(_415.x, _415.y, _415.z, _369.w);
     _28 _417 = _370;
     _417._m0 = _416;
@@ -222,7 +229,8 @@ void main()
     {
         _458 = _438;
     }
-    vec3 _462 = _416.xyz + (_458 * 0.75);
+    vec3 _459 = _458 * 0.75;
+    vec3 _462 = _416.xyz + _459;
     vec4 _463 = vec4(_462.x, _462.y, _462.z, _416.w);
     _28 _464 = _417;
     _464._m0 = _463;
@@ -238,7 +246,8 @@ void main()
     {
         _505 = _485;
     }
-    vec3 _509 = _463.xyz + (_505 * 0.5);
+    vec3 _506 = _505 * 0.5;
+    vec3 _509 = _463.xyz + _506;
     vec4 _510 = vec4(_509.x, _509.y, _509.z, _463.w);
     _28 _511 = _464;
     _511._m0 = _510;
@@ -254,7 +263,8 @@ void main()
     {
         _552 = _532;
     }
-    vec3 _556 = _510.xyz + (_552 * 0.5);
+    vec3 _553 = _552 * 0.5;
+    vec3 _556 = _510.xyz + _553;
     vec4 _557 = vec4(_556.x, _556.y, _556.z, _510.w);
     _28 _558 = _511;
     _558._m0 = _557;
@@ -270,7 +280,8 @@ void main()
     {
         _599 = _579;
     }
-    vec3 _603 = _557.xyz + (_599 * 0.75);
+    vec3 _600 = _599 * 0.75;
+    vec3 _603 = _557.xyz + _600;
     vec4 _604 = vec4(_603.x, _603.y, _603.z, _557.w);
     _28 _605 = _558;
     _605._m0 = _604;
@@ -286,7 +297,8 @@ void main()
     {
         _646 = _626;
     }
-    vec3 _650 = _604.xyz + (_646 * 0.5);
+    vec3 _647 = _646 * 0.5;
+    vec3 _650 = _604.xyz + _647;
     vec4 _651 = vec4(_650.x, _650.y, _650.z, _604.w);
     _28 _652 = _605;
     _652._m0 = _651;
diff --git a/reference/shaders/asm/geom/inout-split-access-chain-handle.asm.geom b/reference/shaders/asm/geom/inout-split-access-chain-handle.asm.geom
new file mode 100644
index 0000000000..71082099e7
--- /dev/null
+++ b/reference/shaders/asm/geom/inout-split-access-chain-handle.asm.geom
@@ -0,0 +1,23 @@
+#version 440
+layout(triangles) in;
+layout(max_vertices = 5, triangle_strip) out;
+
+struct Data
+{
+    vec4 ApiPerspectivePosition;
+};
+
+void Copy(inout Data inputStream[3])
+{
+    inputStream[0].ApiPerspectivePosition = gl_in[0].gl_Position;
+}
+
+void main()
+{
+    Data inputStream[3];
+    Data param[3] = inputStream;
+    Copy(param);
+    inputStream = param;
+    gl_Position = inputStream[0].ApiPerspectivePosition;
+}
+
diff --git a/reference/shaders/asm/geom/split-access-chain-input.asm.geom b/reference/shaders/asm/geom/split-access-chain-input.asm.geom
new file mode 100644
index 0000000000..511d87fcbe
--- /dev/null
+++ b/reference/shaders/asm/geom/split-access-chain-input.asm.geom
@@ -0,0 +1,9 @@
+#version 440
+layout(triangles) in;
+layout(max_vertices = 3, triangle_strip) out;
+
+void main()
+{
+    gl_Position = gl_in[0].gl_Position;
+}
+
diff --git a/reference/shaders/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc b/reference/shaders/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc
new file mode 100644
index 0000000000..8cb7a4e64c
--- /dev/null
+++ b/reference/shaders/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc
@@ -0,0 +1,79 @@
+#version 450
+layout(vertices = 3) out;
+
+struct VertexOutput
+{
+    vec4 pos;
+    vec2 uv;
+};
+
+struct HSOut
+{
+    vec4 pos;
+    vec2 uv;
+};
+
+struct HSConstantOut
+{
+    float EdgeTess[3];
+    float InsideTess;
+};
+
+struct VertexOutput_1
+{
+    vec2 uv;
+};
+
+struct HSOut_1
+{
+    vec2 uv;
+};
+
+layout(location = 0) in VertexOutput_1 p[];
+layout(location = 0) out HSOut_1 _entryPointOutput[3];
+
+HSOut _hs_main(VertexOutput p_1[3], uint i)
+{
+    HSOut _output;
+    _output.pos = p_1[i].pos;
+    _output.uv = p_1[i].uv;
+    return _output;
+}
+
+HSConstantOut PatchHS(VertexOutput _patch[3])
+{
+    HSConstantOut _output;
+    _output.EdgeTess[0] = (vec2(1.0) + _patch[0].uv).x;
+    _output.EdgeTess[1] = (vec2(1.0) + _patch[0].uv).x;
+    _output.EdgeTess[2] = (vec2(1.0) + _patch[0].uv).x;
+    _output.InsideTess = (vec2(1.0) + _patch[0].uv).x;
+    return _output;
+}
+
+void main()
+{
+    VertexOutput p_1[3];
+    p_1[0].pos = gl_in[0].gl_Position;
+    p_1[0].uv = p[0].uv;
+    p_1[1].pos = gl_in[1].gl_Position;
+    p_1[1].uv = p[1].uv;
+    p_1[2].pos = gl_in[2].gl_Position;
+    p_1[2].uv = p[2].uv;
+    uint i = gl_InvocationID;
+    VertexOutput param[3] = p_1;
+    uint param_1 = i;
+    HSOut flattenTemp = _hs_main(param, param_1);
+    gl_out[gl_InvocationID].gl_Position = flattenTemp.pos;
+    _entryPointOutput[gl_InvocationID].uv = flattenTemp.uv;
+    barrier();
+    if (int(gl_InvocationID) == 0)
+    {
+        VertexOutput param_2[3] = p_1;
+        HSConstantOut _patchConstantResult = PatchHS(param_2);
+        gl_TessLevelOuter[0] = _patchConstantResult.EdgeTess[0];
+        gl_TessLevelOuter[1] = _patchConstantResult.EdgeTess[1];
+        gl_TessLevelOuter[2] = _patchConstantResult.EdgeTess[2];
+        gl_TessLevelInner[0] = _patchConstantResult.InsideTess;
+    }
+}
+
diff --git a/reference/shaders/asm/vert/empty-io.asm.vert b/reference/shaders/asm/vert/empty-io.asm.vert
index e1a56d9d4c..cc432cb890 100644
--- a/reference/shaders/asm/vert/empty-io.asm.vert
+++ b/reference/shaders/asm/vert/empty-io.asm.vert
@@ -10,6 +10,11 @@ struct VSOutput
     vec4 position;
 };
 
+struct VSOutput_1
+{
+    int empty_struct_member;
+};
+
 layout(location = 0) in vec4 position;
 
 VSOutput _main(VSInput _input)
diff --git a/reference/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert b/reference/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert
new file mode 100644
index 0000000000..c297d94f28
--- /dev/null
+++ b/reference/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert
@@ -0,0 +1,16 @@
+#version 450
+
+layout(location = 0) flat out int _4;
+
+void main()
+{
+    vec4 pos = vec4(0.0);
+    pos.y += float(((-10) + 2));
+    pos.z += float((100u % 5u));
+    pos += vec4(ivec4(20, 30, 0, 0));
+    vec2 _56 = pos.xy + vec2(ivec2(ivec4(20, 30, 0, 0).y, ivec4(20, 30, 0, 0).x));
+    pos = vec4(_56.x, _56.y, pos.z, pos.w);
+    gl_Position = pos;
+    _4 = ivec4(20, 30, 0, 0).y;
+}
+
diff --git a/reference/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert.vk b/reference/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert.vk
new file mode 100644
index 0000000000..d308693aac
--- /dev/null
+++ b/reference/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert.vk
@@ -0,0 +1,25 @@
+#version 450
+
+layout(constant_id = 201) const int _7 = -10;
+layout(constant_id = 202) const uint _8 = 100u;
+layout(constant_id = 200) const float _9 = 3.141590118408203125;
+const int _20 = (_7 + 2);
+const uint _25 = (_8 % 5u);
+const ivec4 _30 = ivec4(20, 30, _20, _20);
+const ivec2 _32 = ivec2(_30.y, _30.x);
+const int _33 = _30.y;
+
+layout(location = 0) flat out int _4;
+
+void main()
+{
+    vec4 pos = vec4(0.0);
+    pos.y += float(_20);
+    pos.z += float(_25);
+    pos += vec4(_30);
+    vec2 _56 = pos.xy + vec2(_32);
+    pos = vec4(_56.x, _56.y, pos.z, pos.w);
+    gl_Position = pos;
+    _4 = _33;
+}
+
diff --git a/reference/shaders/comp/bitfield.noopt.comp b/reference/shaders/comp/bitfield.noopt.comp
deleted file mode 100644
index 49bbddb0ab..0000000000
--- a/reference/shaders/comp/bitfield.noopt.comp
+++ /dev/null
@@ -1,19 +0,0 @@
-#version 310 es
-layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
-
-void main()
-{
-    int signed_value = 0;
-    uint unsigned_value = 0u;
-    int s = bitfieldExtract(signed_value, 5, 20);
-    uint u = bitfieldExtract(unsigned_value, 6, 21);
-    s = bitfieldInsert(s, 40, 5, 4);
-    u = bitfieldInsert(u, 60u, 5, 4);
-    u = bitfieldReverse(u);
-    s = bitfieldReverse(s);
-    int v0 = bitCount(u);
-    int v1 = bitCount(s);
-    int v2 = findMSB(u);
-    int v3 = findLSB(s);
-}
-
diff --git a/reference/shaders/comp/generate_height.comp b/reference/shaders/comp/generate_height.comp
index 30ec624cfb..fe733e2893 100644
--- a/reference/shaders/comp/generate_height.comp
+++ b/reference/shaders/comp/generate_height.comp
@@ -4,40 +4,40 @@ layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
 layout(binding = 0, std430) readonly buffer Distribution
 {
     vec2 distribution[];
-} _136;
+} _137;
 
 layout(binding = 2, std140) uniform UBO
 {
     vec4 uModTime;
-} _165;
+} _166;
 
 layout(binding = 1, std430) writeonly buffer HeightmapFFT
 {
     uint heights[];
-} _224;
+} _225;
 
 uvec2 workaround_mix(uvec2 a, uvec2 b, bvec2 sel)
 {
-    uint _83;
+    uint _86;
     if (sel.x)
     {
-        _83 = b.x;
+        _86 = b.x;
     }
     else
     {
-        _83 = a.x;
+        _86 = a.x;
     }
-    uint _93 = _83;
-    uint _94;
+    uint _94 = _86;
+    uint _97;
     if (sel.y)
     {
-        _94 = b.y;
+        _97 = b.y;
     }
     else
     {
-        _94 = a.y;
+        _97 = a.y;
     }
-    return uvec2(_93, _94);
+    return uvec2(_94, _97);
 }
 
 vec2 alias(vec2 i, vec2 N)
@@ -68,13 +68,13 @@ void generate_heightmap()
     uvec2 param_1 = uvec2(0u);
     bvec2 param_2 = equal(i, uvec2(0u));
     uvec2 wi = workaround_mix(param, param_1, param_2);
-    vec2 a = _136.distribution[(i.y * N.x) + i.x];
-    vec2 b = _136.distribution[(wi.y * N.x) + wi.x];
+    vec2 a = _137.distribution[(i.y * N.x) + i.x];
+    vec2 b = _137.distribution[(wi.y * N.x) + wi.x];
     vec2 param_3 = vec2(i);
     vec2 param_4 = vec2(N);
-    vec2 k = _165.uModTime.xy * alias(param_3, param_4);
+    vec2 k = _166.uModTime.xy * alias(param_3, param_4);
     float k_len = length(k);
-    float w = sqrt(9.81000041961669921875 * k_len) * _165.uModTime.z;
+    float w = sqrt(9.81000041961669921875 * k_len) * _166.uModTime.z;
     float cw = cos(w);
     float sw = sin(w);
     vec2 param_5 = a;
@@ -86,7 +86,7 @@ void generate_heightmap()
     b = vec2(b.x, -b.y);
     vec2 res = a + b;
     vec2 param_9 = res;
-    _224.heights[(i.y * N.x) + i.x] = pack2(param_9);
+    _225.heights[(i.y * N.x) + i.x] = pack2(param_9);
 }
 
 void main()
diff --git a/reference/shaders/comp/loop.noopt.comp b/reference/shaders/comp/loop.noopt.comp
deleted file mode 100644
index 049a30669c..0000000000
--- a/reference/shaders/comp/loop.noopt.comp
+++ /dev/null
@@ -1,105 +0,0 @@
-#version 310 es
-layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
-
-layout(binding = 0, std430) readonly buffer SSBO
-{
-    mat4 mvp;
-    vec4 in_data[];
-} _24;
-
-layout(binding = 1, std430) writeonly buffer SSBO2
-{
-    vec4 out_data[];
-} _177;
-
-void main()
-{
-    uint ident = gl_GlobalInvocationID.x;
-    vec4 idat = _24.in_data[ident];
-    int k = 0;
-    uint i = 0u;
-    if (idat.y == 20.0)
-    {
-        do
-        {
-            k *= 2;
-            i++;
-        } while (i < ident);
-    }
-    switch (k)
-    {
-        case 10:
-        {
-            for (;;)
-            {
-                i++;
-                if (i > 10u)
-                {
-                    break;
-                }
-                continue;
-            }
-            break;
-        }
-        default:
-        {
-            for (;;)
-            {
-                i += 2u;
-                if (i > 20u)
-                {
-                    break;
-                }
-                continue;
-            }
-            break;
-        }
-    }
-    while (k < 10)
-    {
-        idat *= 2.0;
-        k++;
-    }
-    for (uint i_1 = 0u; i_1 < 16u; i_1++, k++)
-    {
-        for (uint j = 0u; j < 30u; j++)
-        {
-            idat = _24.mvp * idat;
-        }
-    }
-    k = 0;
-    for (;;)
-    {
-        k++;
-        if (k > 10)
-        {
-            k += 2;
-        }
-        else
-        {
-            k += 3;
-            continue;
-        }
-        k += 10;
-        continue;
-    }
-    k = 0;
-    do
-    {
-        k++;
-    } while (k > 10);
-    int l = 0;
-    for (;;)
-    {
-        if (l == 5)
-        {
-            l++;
-            continue;
-        }
-        idat += vec4(1.0);
-        l++;
-        continue;
-    }
-    _177.out_data[ident] = idat;
-}
-
diff --git a/reference/shaders/comp/rmw-matrix.comp b/reference/shaders/comp/rmw-matrix.comp
new file mode 100644
index 0000000000..5c4ac94bc7
--- /dev/null
+++ b/reference/shaders/comp/rmw-matrix.comp
@@ -0,0 +1,20 @@
+#version 310 es
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer SSBO
+{
+    float a;
+    vec4 b;
+    mat4 c;
+    float a1;
+    vec4 b1;
+    mat4 c1;
+} _11;
+
+void main()
+{
+    _11.a *= _11.a1;
+    _11.b *= _11.b1;
+    _11.c = _11.c * _11.c1;
+}
+
diff --git a/reference/shaders/comp/struct-packing.comp b/reference/shaders/comp/struct-packing.comp
index 3c30aa6088..cd1eda1b32 100644
--- a/reference/shaders/comp/struct-packing.comp
+++ b/reference/shaders/comp/struct-packing.comp
@@ -43,7 +43,49 @@ struct Content
     S4 m3s[8];
 };
 
-layout(binding = 1, std430) buffer SSBO1
+struct S0_1
+{
+    vec2 a[1];
+    float b;
+};
+
+struct S1_1
+{
+    vec3 a;
+    float b;
+};
+
+struct S2_1
+{
+    vec3 a[1];
+    float b;
+};
+
+struct S3_1
+{
+    vec2 a;
+    float b;
+};
+
+struct S4_1
+{
+    vec2 c;
+};
+
+struct Content_1
+{
+    S0_1 m0s[1];
+    S1_1 m1s[1];
+    S2_1 m2s[1];
+    S0_1 m0;
+    S1_1 m1;
+    S2_1 m2;
+    S3_1 m3;
+    float m4;
+    S4_1 m3s[8];
+};
+
+layout(binding = 1, std430) restrict buffer SSBO1
 {
     Content content;
     Content content1[2];
@@ -59,11 +101,11 @@ layout(binding = 1, std430) buffer SSBO1
     float array[];
 } ssbo_430;
 
-layout(binding = 0, std140) buffer SSBO0
+layout(binding = 0, std140) restrict buffer SSBO0
 {
-    Content content;
-    Content content1[2];
-    Content content2;
+    Content_1 content;
+    Content_1 content1[2];
+    Content_1 content2;
     mat2 m0;
     mat2 m1;
     mat2x3 m2[4];
diff --git a/reference/shaders/desktop-only/comp/enhanced-layouts.comp b/reference/shaders/desktop-only/comp/enhanced-layouts.comp
index ba37ca237b..45b25064b6 100644
--- a/reference/shaders/desktop-only/comp/enhanced-layouts.comp
+++ b/reference/shaders/desktop-only/comp/enhanced-layouts.comp
@@ -8,6 +8,13 @@ struct Foo
     int c;
 };
 
+struct Foo_1
+{
+    int a;
+    int b;
+    int c;
+};
+
 layout(binding = 1, std140) buffer SSBO1
 {
     layout(offset = 4) int a;
@@ -20,7 +27,7 @@ layout(binding = 2, std430) buffer SSBO2
 {
     layout(offset = 4) int a;
     layout(offset = 8) int b;
-    layout(offset = 16) Foo foo;
+    layout(offset = 16) Foo_1 foo;
     layout(offset = 48) int c[8];
 } ssbo2;
 
diff --git a/reference/shaders/desktop-only/comp/fp64.desktop.comp b/reference/shaders/desktop-only/comp/fp64.desktop.comp
index 18869eda52..c9e5e84962 100644
--- a/reference/shaders/desktop-only/comp/fp64.desktop.comp
+++ b/reference/shaders/desktop-only/comp/fp64.desktop.comp
@@ -67,7 +67,6 @@ void main()
     dvec3 e = cross(a.xyz, a.yzw);
     a = faceforward(a, a, a);
     a = reflect(a, a);
-    a = refract(a, a, a.x);
     dmat4 l = dmat4(amat[0] * amat[0], amat[1] * amat[1], amat[2] * amat[2], amat[3] * amat[3]);
     l = outerProduct(a, a);
     l = transpose(l);
diff --git a/reference/shaders/desktop-only/frag/control-dependent-in-branch.desktop.frag b/reference/shaders/desktop-only/frag/control-dependent-in-branch.desktop.frag
new file mode 100644
index 0000000000..391b4de1c2
--- /dev/null
+++ b/reference/shaders/desktop-only/frag/control-dependent-in-branch.desktop.frag
@@ -0,0 +1,37 @@
+#version 450
+
+layout(binding = 0) uniform sampler2D uSampler;
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) in vec4 vInput;
+
+void main()
+{
+    FragColor = vInput;
+    vec4 t = texture(uSampler, vInput.xy);
+    vec4 d0 = dFdx(vInput);
+    vec4 d1 = dFdy(vInput);
+    vec4 d2 = fwidth(vInput);
+    vec4 d3 = dFdxCoarse(vInput);
+    vec4 d4 = dFdyCoarse(vInput);
+    vec4 d5 = fwidthCoarse(vInput);
+    vec4 d6 = dFdxFine(vInput);
+    vec4 d7 = dFdyFine(vInput);
+    vec4 d8 = fwidthFine(vInput);
+    vec2 lod = textureQueryLod(uSampler, vInput.zw);
+    if (vInput.y > 10.0)
+    {
+        FragColor += t;
+        FragColor += d0;
+        FragColor += d1;
+        FragColor += d2;
+        FragColor += d3;
+        FragColor += d4;
+        FragColor += d5;
+        FragColor += d6;
+        FragColor += d7;
+        FragColor += d8;
+        FragColor += lod.xyxy;
+    }
+}
+
diff --git a/reference/shaders/desktop-only/frag/dual-source-blending.desktop.frag b/reference/shaders/desktop-only/frag/dual-source-blending.desktop.frag
new file mode 100644
index 0000000000..3d946b04a5
--- /dev/null
+++ b/reference/shaders/desktop-only/frag/dual-source-blending.desktop.frag
@@ -0,0 +1,11 @@
+#version 450
+
+layout(location = 0, index = 0) out vec4 FragColor0;
+layout(location = 0, index = 1) out vec4 FragColor1;
+
+void main()
+{
+    FragColor0 = vec4(1.0);
+    FragColor1 = vec4(2.0);
+}
+
diff --git a/reference/shaders/desktop-only/frag/fp16.desktop.frag b/reference/shaders/desktop-only/frag/fp16.desktop.frag
new file mode 100644
index 0000000000..0c5203a102
--- /dev/null
+++ b/reference/shaders/desktop-only/frag/fp16.desktop.frag
@@ -0,0 +1,153 @@
+#version 450
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct ResType
+{
+    f16vec4 _m0;
+    ivec4 _m1;
+};
+
+layout(location = 3) in f16vec4 v4;
+layout(location = 2) in f16vec3 v3;
+layout(location = 0) in float16_t v1;
+layout(location = 1) in f16vec2 v2;
+
+f16mat2 test_mat2(f16vec2 a, f16vec2 b, f16vec2 c, f16vec2 d)
+{
+    return f16mat2(f16vec2(a), f16vec2(b)) * f16mat2(f16vec2(c), f16vec2(d));
+}
+
+f16mat3 test_mat3(f16vec3 a, f16vec3 b, f16vec3 c, f16vec3 d, f16vec3 e, f16vec3 f)
+{
+    return f16mat3(f16vec3(a), f16vec3(b), f16vec3(c)) * f16mat3(f16vec3(d), f16vec3(e), f16vec3(f));
+}
+
+void test_constants()
+{
+    float16_t a = 1.0hf;
+    float16_t b = 1.5hf;
+    float16_t c = -1.5hf;
+    float16_t d = (0.0hf / 0.0hf);
+    float16_t e = (1.0hf / 0.0hf);
+    float16_t f = (-1.0hf / 0.0hf);
+    float16_t g = 1014.0hf;
+    float16_t h = 9.5367431640625e-07hf;
+}
+
+float16_t test_result()
+{
+    return 1.0hf;
+}
+
+void test_conversions()
+{
+    float16_t one = test_result();
+    int a = int(one);
+    uint b = uint(one);
+    bool c = one != 0.0hf;
+    float d = float(one);
+    double e = double(one);
+    float16_t a2 = float16_t(a);
+    float16_t b2 = float16_t(b);
+    float16_t c2 = float16_t(c);
+    float16_t d2 = float16_t(d);
+    float16_t e2 = float16_t(e);
+}
+
+void test_builtins()
+{
+    f16vec4 res = radians(v4);
+    res = degrees(v4);
+    res = sin(v4);
+    res = cos(v4);
+    res = tan(v4);
+    res = asin(v4);
+    res = atan(v4, v3.xyzz);
+    res = atan(v4);
+    res = sinh(v4);
+    res = cosh(v4);
+    res = tanh(v4);
+    res = asinh(v4);
+    res = acosh(v4);
+    res = atanh(v4);
+    res = pow(v4, v4);
+    res = exp(v4);
+    res = log(v4);
+    res = exp2(v4);
+    res = log2(v4);
+    res = sqrt(v4);
+    res = inversesqrt(v4);
+    res = abs(v4);
+    res = sign(v4);
+    res = floor(v4);
+    res = trunc(v4);
+    res = round(v4);
+    res = roundEven(v4);
+    res = ceil(v4);
+    res = fract(v4);
+    res = mod(v4, v4);
+    f16vec4 tmp;
+    f16vec4 _231 = modf(v4, tmp);
+    res = _231;
+    res = min(v4, v4);
+    res = max(v4, v4);
+    res = clamp(v4, v4, v4);
+    res = mix(v4, v4, v4);
+    res = mix(v4, v4, lessThan(v4, v4));
+    res = step(v4, v4);
+    res = smoothstep(v4, v4, v4);
+    bvec4 btmp = isnan(v4);
+    btmp = isinf(v4);
+    res = fma(v4, v4, v4);
+    ResType _275;
+    _275._m0 = frexp(v4, _275._m1);
+    ivec4 itmp = _275._m1;
+    res = _275._m0;
+    res = ldexp(res, itmp);
+    uint pack0 = packFloat2x16(v4.xy);
+    uint pack1 = packFloat2x16(v4.zw);
+    res = f16vec4(unpackFloat2x16(pack0), unpackFloat2x16(pack1));
+    float16_t t0 = length(v4);
+    t0 = distance(v4, v4);
+    t0 = dot(v4, v4);
+    f16vec3 res3 = cross(v3, v3);
+    res = normalize(v4);
+    res = faceforward(v4, v4, v4);
+    res = reflect(v4, v4);
+    res = refract(v4, v4, v1);
+    btmp = lessThan(v4, v4);
+    btmp = lessThanEqual(v4, v4);
+    btmp = greaterThan(v4, v4);
+    btmp = greaterThanEqual(v4, v4);
+    btmp = equal(v4, v4);
+    btmp = notEqual(v4, v4);
+    res = dFdx(v4);
+    res = dFdy(v4);
+    res = dFdxFine(v4);
+    res = dFdyFine(v4);
+    res = dFdxCoarse(v4);
+    res = dFdyCoarse(v4);
+    res = fwidth(v4);
+    res = fwidthFine(v4);
+    res = fwidthCoarse(v4);
+}
+
+void main()
+{
+    f16vec2 param = v2;
+    f16vec2 param_1 = v2;
+    f16vec2 param_2 = v3.xy;
+    f16vec2 param_3 = v3.xy;
+    f16mat2 m0 = test_mat2(param, param_1, param_2, param_3);
+    f16vec3 param_4 = v3;
+    f16vec3 param_5 = v3;
+    f16vec3 param_6 = v3;
+    f16vec3 param_7 = v4.xyz;
+    f16vec3 param_8 = v4.xyz;
+    f16vec3 param_9 = v4.yzw;
+    f16mat3 m1 = test_mat3(param_4, param_5, param_6, param_7, param_8, param_9);
+    test_constants();
+    test_conversions();
+    test_builtins();
+}
+
diff --git a/reference/shaders/frag/array-lut-no-loop-variable.frag b/reference/shaders/frag/array-lut-no-loop-variable.frag
new file mode 100644
index 0000000000..54d7bf774c
--- /dev/null
+++ b/reference/shaders/frag/array-lut-no-loop-variable.frag
@@ -0,0 +1,14 @@
+#version 310 es
+precision mediump float;
+precision highp int;
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    float lut[5] = float[](1.0, 2.0, 3.0, 4.0, 5.0);
+    for (mediump int i = 0; i < 4; i++, FragColor += vec4(lut[i]))
+    {
+    }
+}
+
diff --git a/reference/shaders/frag/constant-composites.frag b/reference/shaders/frag/constant-composites.frag
index ab0816c3d2..b105dbd26c 100644
--- a/reference/shaders/frag/constant-composites.frag
+++ b/reference/shaders/frag/constant-composites.frag
@@ -9,7 +9,7 @@ struct Foo
 };
 
 layout(location = 0) out vec4 FragColor;
-layout(location = 0) flat in mediump int _line;
+layout(location = 0) flat in mediump int line;
 float lut[4];
 Foo foos[2];
 
@@ -17,7 +17,7 @@ void main()
 {
     lut = float[](1.0, 4.0, 3.0, 2.0);
     foos = Foo[](Foo(10.0, 20.0), Foo(30.0, 40.0));
-    FragColor = vec4(lut[_line]);
-    FragColor += vec4(foos[_line].a * (foos[1 - _line].a));
+    FragColor = vec4(lut[line]);
+    FragColor += vec4(foos[line].a * (foos[1 - line].a));
 }
 
diff --git a/reference/shaders/frag/gather-dref.frag b/reference/shaders/frag/gather-dref.frag
new file mode 100644
index 0000000000..5416f79cb5
--- /dev/null
+++ b/reference/shaders/frag/gather-dref.frag
@@ -0,0 +1,14 @@
+#version 310 es
+precision mediump float;
+precision highp int;
+
+layout(binding = 0) uniform mediump sampler2DShadow uT;
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) in vec3 vUV;
+
+void main()
+{
+    FragColor = textureGather(uT, vUV.xy, vUV.z);
+}
+
diff --git a/reference/shaders/frag/swizzle.frag b/reference/shaders/frag/swizzle.frag
index e619be2f48..a229e5b0d5 100644
--- a/reference/shaders/frag/swizzle.frag
+++ b/reference/shaders/frag/swizzle.frag
@@ -2,7 +2,7 @@
 precision mediump float;
 precision highp int;
 
-layout(location = 0) uniform mediump sampler2D samp;
+layout(binding = 0) uniform mediump sampler2D samp;
 
 layout(location = 0) out vec4 FragColor;
 layout(location = 2) in vec2 vUV;
diff --git a/reference/shaders/frag/ubo_layout.frag b/reference/shaders/frag/ubo_layout.frag
index bc0b01c065..4b66e1396a 100644
--- a/reference/shaders/frag/ubo_layout.frag
+++ b/reference/shaders/frag/ubo_layout.frag
@@ -7,6 +7,11 @@ struct Str
     mat4 foo;
 };
 
+struct Str_1
+{
+    mat4 foo;
+};
+
 layout(binding = 0, std140) uniform UBO1
 {
     layout(row_major) Str foo;
@@ -14,7 +19,7 @@ layout(binding = 0, std140) uniform UBO1
 
 layout(binding = 1, std140) uniform UBO2
 {
-    Str foo;
+    Str_1 foo;
 } ubo0;
 
 layout(location = 0) out vec4 FragColor;
diff --git a/reference/shaders/tesc/water_tess.tesc b/reference/shaders/tesc/water_tess.tesc
index 26611b8b25..4daaa456e3 100644
--- a/reference/shaders/tesc/water_tess.tesc
+++ b/reference/shaders/tesc/water_tess.tesc
@@ -27,7 +27,8 @@ bool frustum_cull(vec2 p0)
     vec3 f0 = vec3(dot(_41.uFrustum[0], vec4(center, 1.0)), dot(_41.uFrustum[1], vec4(center, 1.0)), dot(_41.uFrustum[2], vec4(center, 1.0)));
     vec3 f1 = vec3(dot(_41.uFrustum[3], vec4(center, 1.0)), dot(_41.uFrustum[4], vec4(center, 1.0)), dot(_41.uFrustum[5], vec4(center, 1.0)));
     vec3 _199 = f0;
-    bool _205 = any(lessThanEqual(_199, vec3(-radius)));
+    float _200 = radius;
+    bool _205 = any(lessThanEqual(_199, vec3(-_200)));
     bool _215;
     if (!_205)
     {
diff --git a/reference/shaders/vert/ground.vert b/reference/shaders/vert/ground.vert
index b028cc34c6..69f92534cc 100644
--- a/reference/shaders/vert/ground.vert
+++ b/reference/shaders/vert/ground.vert
@@ -58,26 +58,26 @@ vec2 warp_position()
     uint ufloor_lod = uint(floor_lod);
     uvec2 uPosition = uvec2(Position);
     uvec2 mask = (uvec2(1u) << uvec2(ufloor_lod, ufloor_lod + 1u)) - uvec2(1u);
-    uint _106;
+    uint _110;
     if (uPosition.x < 32u)
     {
-        _106 = mask.x;
+        _110 = mask.x;
     }
     else
     {
-        _106 = 0u;
+        _110 = 0u;
     }
-    uint _116 = _106;
-    uint _117;
+    uint _116 = _110;
+    uint _120;
     if (uPosition.y < 32u)
     {
-        _117 = mask.y;
+        _120 = mask.y;
     }
     else
     {
-        _117 = 0u;
+        _120 = 0u;
     }
-    uvec2 rounding = uvec2(_116, _117);
+    uvec2 rounding = uvec2(_116, _120);
     vec4 lower_upper_snapped = vec4((uPosition + rounding).xyxy & (~mask).xxyy);
     return mix(lower_upper_snapped.xy, lower_upper_snapped.zw, vec2(fract_lod));
 }
diff --git a/reference/shaders/vert/ocean.vert b/reference/shaders/vert/ocean.vert
index d77a29fcbf..720bd7d0de 100644
--- a/reference/shaders/vert/ocean.vert
+++ b/reference/shaders/vert/ocean.vert
@@ -59,47 +59,47 @@ vec2 warp_position()
     uint ufloor_lod = uint(floor_lod);
     uvec4 uPosition = uvec4(Position);
     uvec2 mask = (uvec2(1u) << uvec2(ufloor_lod, ufloor_lod + 1u)) - uvec2(1u);
-    uint _107;
+    uint _111;
     if (uPosition.x < 32u)
     {
-        _107 = mask.x;
+        _111 = mask.x;
     }
     else
     {
-        _107 = 0u;
+        _111 = 0u;
     }
     uvec4 rounding;
-    rounding.x = _107;
-    uint _119;
+    rounding.x = _111;
+    uint _122;
     if (uPosition.y < 32u)
     {
-        _119 = mask.x;
+        _122 = mask.x;
     }
     else
     {
-        _119 = 0u;
+        _122 = 0u;
     }
-    rounding.y = _119;
-    uint _130;
+    rounding.y = _122;
+    uint _133;
     if (uPosition.x < 32u)
     {
-        _130 = mask.y;
+        _133 = mask.y;
     }
     else
     {
-        _130 = 0u;
+        _133 = 0u;
     }
-    rounding.z = _130;
-    uint _142;
+    rounding.z = _133;
+    uint _145;
     if (uPosition.y < 32u)
     {
-        _142 = mask.y;
+        _145 = mask.y;
     }
     else
     {
-        _142 = 0u;
+        _145 = 0u;
     }
-    rounding.w = _142;
+    rounding.w = _145;
     vec4 lower_upper_snapped = vec4((uPosition.xyxy + rounding) & (~mask).xxyy);
     return mix(lower_upper_snapped.xy, lower_upper_snapped.zw, vec2(fract_lod));
 }
diff --git a/reference/shaders/vert/read-from-row-major-array.vert b/reference/shaders/vert/read-from-row-major-array.vert
new file mode 100644
index 0000000000..1c950f3fa4
--- /dev/null
+++ b/reference/shaders/vert/read-from-row-major-array.vert
@@ -0,0 +1,45 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform Block
+{
+    layout(row_major) mat2x3 var[3][4];
+} _104;
+
+layout(location = 0) in vec4 a_position;
+layout(location = 0) out mediump float v_vtxResult;
+
+mediump float compare_float(float a, float b)
+{
+    return float(abs(a - b) < 0.0500000007450580596923828125);
+}
+
+mediump float compare_vec3(vec3 a, vec3 b)
+{
+    float param = a.x;
+    float param_1 = b.x;
+    float param_2 = a.y;
+    float param_3 = b.y;
+    float param_4 = a.z;
+    float param_5 = b.z;
+    return (compare_float(param, param_1) * compare_float(param_2, param_3)) * compare_float(param_4, param_5);
+}
+
+mediump float compare_mat2x3(mat2x3 a, mat2x3 b)
+{
+    vec3 param = a[0];
+    vec3 param_1 = b[0];
+    vec3 param_2 = a[1];
+    vec3 param_3 = b[1];
+    return compare_vec3(param, param_1) * compare_vec3(param_2, param_3);
+}
+
+void main()
+{
+    gl_Position = a_position;
+    mediump float result = 1.0;
+    mat2x3 param = _104.var[0][0];
+    mat2x3 param_1 = mat2x3(vec3(2.0, 6.0, -6.0), vec3(0.0, 5.0, 5.0));
+    result *= compare_mat2x3(param, param_1);
+    v_vtxResult = result;
+}
+
diff --git a/reference/shaders/vulkan/comp/subgroups.nocompat.invalid.vk.comp.vk b/reference/shaders/vulkan/comp/subgroups.nocompat.invalid.vk.comp.vk
new file mode 100644
index 0000000000..6d288574f7
--- /dev/null
+++ b/reference/shaders/vulkan/comp/subgroups.nocompat.invalid.vk.comp.vk
@@ -0,0 +1,110 @@
+#version 450
+#extension GL_KHR_shader_subgroup_basic : require
+#extension GL_KHR_shader_subgroup_ballot : require
+#extension GL_KHR_shader_subgroup_shuffle : require
+#extension GL_KHR_shader_subgroup_shuffle_relative : require
+#extension GL_KHR_shader_subgroup_vote : require
+#extension GL_KHR_shader_subgroup_arithmetic : require
+#extension GL_KHR_shader_subgroup_clustered : require
+#extension GL_KHR_shader_subgroup_quad : require
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(set = 0, binding = 0, std430) buffer SSBO
+{
+    float FragColor;
+} _9;
+
+void main()
+{
+    _9.FragColor = float(gl_NumSubgroups);
+    _9.FragColor = float(gl_SubgroupID);
+    _9.FragColor = float(gl_SubgroupSize);
+    _9.FragColor = float(gl_SubgroupInvocationID);
+    subgroupMemoryBarrier();
+    subgroupBarrier();
+    subgroupMemoryBarrier();
+    subgroupMemoryBarrierBuffer();
+    subgroupMemoryBarrierShared();
+    subgroupMemoryBarrierImage();
+    bool elected = subgroupElect();
+    _9.FragColor = vec4(gl_SubgroupEqMask).x;
+    _9.FragColor = vec4(gl_SubgroupGeMask).x;
+    _9.FragColor = vec4(gl_SubgroupGtMask).x;
+    _9.FragColor = vec4(gl_SubgroupLeMask).x;
+    _9.FragColor = vec4(gl_SubgroupLtMask).x;
+    vec4 broadcasted = subgroupBroadcast(vec4(10.0), 8u);
+    vec3 first = subgroupBroadcastFirst(vec3(20.0));
+    uvec4 ballot_value = subgroupBallot(true);
+    bool inverse_ballot_value = subgroupInverseBallot(ballot_value);
+    bool bit_extracted = subgroupBallotBitExtract(uvec4(10u), 8u);
+    uint bit_count = subgroupBallotBitCount(ballot_value);
+    uint inclusive_bit_count = subgroupBallotInclusiveBitCount(ballot_value);
+    uint exclusive_bit_count = subgroupBallotExclusiveBitCount(ballot_value);
+    uint lsb = subgroupBallotFindLSB(ballot_value);
+    uint msb = subgroupBallotFindMSB(ballot_value);
+    uint shuffled = subgroupShuffle(10u, 8u);
+    uint shuffled_xor = subgroupShuffleXor(30u, 8u);
+    uint shuffled_up = subgroupShuffleUp(20u, 4u);
+    uint shuffled_down = subgroupShuffleDown(20u, 4u);
+    bool has_all = subgroupAll(true);
+    bool has_any = subgroupAny(true);
+    bool has_equal = subgroupAllEqual(true);
+    vec4 added = subgroupAdd(vec4(20.0));
+    ivec4 iadded = subgroupAdd(ivec4(20));
+    vec4 multiplied = subgroupMul(vec4(20.0));
+    ivec4 imultiplied = subgroupMul(ivec4(20));
+    vec4 lo = subgroupMin(vec4(20.0));
+    vec4 hi = subgroupMax(vec4(20.0));
+    ivec4 slo = subgroupMin(ivec4(20));
+    ivec4 shi = subgroupMax(ivec4(20));
+    uvec4 ulo = subgroupMin(uvec4(20u));
+    uvec4 uhi = subgroupMax(uvec4(20u));
+    uvec4 anded = subgroupAnd(ballot_value);
+    uvec4 ored = subgroupOr(ballot_value);
+    uvec4 xored = subgroupXor(ballot_value);
+    added = subgroupInclusiveAdd(added);
+    iadded = subgroupInclusiveAdd(iadded);
+    multiplied = subgroupInclusiveMul(multiplied);
+    imultiplied = subgroupInclusiveMul(imultiplied);
+    lo = subgroupInclusiveMin(lo);
+    hi = subgroupInclusiveMax(hi);
+    slo = subgroupInclusiveMin(slo);
+    shi = subgroupInclusiveMax(shi);
+    ulo = subgroupInclusiveMin(ulo);
+    uhi = subgroupInclusiveMax(uhi);
+    anded = subgroupInclusiveAnd(anded);
+    ored = subgroupInclusiveOr(ored);
+    xored = subgroupInclusiveXor(ored);
+    added = subgroupExclusiveAdd(lo);
+    added = subgroupExclusiveAdd(multiplied);
+    multiplied = subgroupExclusiveMul(multiplied);
+    iadded = subgroupExclusiveAdd(imultiplied);
+    imultiplied = subgroupExclusiveMul(imultiplied);
+    lo = subgroupExclusiveMin(lo);
+    hi = subgroupExclusiveMax(hi);
+    ulo = subgroupExclusiveMin(ulo);
+    uhi = subgroupExclusiveMax(uhi);
+    slo = subgroupExclusiveMin(slo);
+    shi = subgroupExclusiveMax(shi);
+    anded = subgroupExclusiveAnd(anded);
+    ored = subgroupExclusiveOr(ored);
+    xored = subgroupExclusiveXor(ored);
+    added = subgroupClusteredAdd(added, 4u);
+    multiplied = subgroupClusteredMul(multiplied, 4u);
+    iadded = subgroupClusteredAdd(iadded, 4u);
+    imultiplied = subgroupClusteredMul(imultiplied, 4u);
+    lo = subgroupClusteredMin(lo, 4u);
+    hi = subgroupClusteredMax(hi, 4u);
+    ulo = subgroupClusteredMin(ulo, 4u);
+    uhi = subgroupClusteredMax(uhi, 4u);
+    slo = subgroupClusteredMin(slo, 4u);
+    shi = subgroupClusteredMax(shi, 4u);
+    anded = subgroupClusteredAnd(anded, 4u);
+    ored = subgroupClusteredOr(ored, 4u);
+    xored = subgroupClusteredXor(xored, 4u);
+    vec4 swap_horiz = subgroupQuadSwapHorizontal(vec4(20.0));
+    vec4 swap_vertical = subgroupQuadSwapVertical(vec4(20.0));
+    vec4 swap_diagonal = subgroupQuadSwapDiagonal(vec4(20.0));
+    vec4 quad_broadcast = subgroupQuadBroadcast(vec4(20.0), 3u);
+}
+
diff --git a/reference/shaders/vulkan/frag/push-constant.frag.vk b/reference/shaders/vulkan/frag/push-constant.frag.vk
deleted file mode 100644
index 748a028678..0000000000
--- a/reference/shaders/vulkan/frag/push-constant.frag.vk
+++ /dev/null
@@ -1,18 +0,0 @@
-#version 310 es
-precision mediump float;
-precision highp int;
-
-layout(push_constant, std430) uniform PushConstants
-{
-    vec4 value0;
-    vec4 value1;
-} push;
-
-layout(location = 0) out vec4 FragColor;
-layout(location = 0) in vec4 vColor;
-
-void main()
-{
-    FragColor = ((vColor + push.value0) + push.value1);
-}
-
diff --git a/reference/shaders/vulkan/frag/separate-combined-fake-overload.vk.frag b/reference/shaders/vulkan/frag/separate-combined-fake-overload.vk.frag
new file mode 100644
index 0000000000..575c4187eb
--- /dev/null
+++ b/reference/shaders/vulkan/frag/separate-combined-fake-overload.vk.frag
@@ -0,0 +1,22 @@
+#version 450
+
+layout(binding = 0) uniform sampler2D uSamp;
+uniform sampler2D SPIRV_Cross_CombineduTuS;
+
+layout(location = 0) out vec4 FragColor;
+
+vec4 samp(sampler2D uSamp_1)
+{
+    return texture(uSamp_1, vec2(0.5));
+}
+
+vec4 samp_1(sampler2D SPIRV_Cross_CombinedTS)
+{
+    return texture(SPIRV_Cross_CombinedTS, vec2(0.5));
+}
+
+void main()
+{
+    FragColor = samp(uSamp) + samp_1(SPIRV_Cross_CombineduTuS);
+}
+
diff --git a/reference/shaders/vulkan/frag/separate-combined-fake-overload.vk.frag.vk b/reference/shaders/vulkan/frag/separate-combined-fake-overload.vk.frag.vk
new file mode 100644
index 0000000000..222b659e40
--- /dev/null
+++ b/reference/shaders/vulkan/frag/separate-combined-fake-overload.vk.frag.vk
@@ -0,0 +1,23 @@
+#version 450
+
+layout(set = 0, binding = 0) uniform sampler2D uSamp;
+layout(set = 0, binding = 1) uniform texture2D uT;
+layout(set = 0, binding = 2) uniform sampler uS;
+
+layout(location = 0) out vec4 FragColor;
+
+vec4 samp(sampler2D uSamp_1)
+{
+    return texture(uSamp_1, vec2(0.5));
+}
+
+vec4 samp(texture2D T, sampler S)
+{
+    return texture(sampler2D(T, S), vec2(0.5));
+}
+
+void main()
+{
+    FragColor = samp(uSamp) + samp(uT, uS);
+}
+
diff --git a/reference/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag b/reference/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag
new file mode 100644
index 0000000000..43393f4e77
--- /dev/null
+++ b/reference/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag
@@ -0,0 +1,44 @@
+#version 310 es
+precision mediump float;
+precision highp int;
+
+uniform mediump sampler2D SPIRV_Cross_CombineduTextureuSampler[4];
+uniform mediump sampler2DArray SPIRV_Cross_CombineduTextureArrayuSampler[4];
+uniform mediump samplerCube SPIRV_Cross_CombineduTextureCubeuSampler[4];
+uniform mediump sampler3D SPIRV_Cross_CombineduTexture3DuSampler[4];
+
+layout(location = 0) in vec2 vTex;
+layout(location = 1) in vec3 vTex3;
+layout(location = 0) out vec4 FragColor;
+
+vec4 sample_func(vec2 uv, mediump sampler2D SPIRV_Cross_CombineduTexturesamp[4])
+{
+    return texture(SPIRV_Cross_CombineduTexturesamp[2], uv);
+}
+
+vec4 sample_func_dual(vec2 uv, mediump sampler2D SPIRV_Cross_Combinedtexsamp)
+{
+    return texture(SPIRV_Cross_Combinedtexsamp, uv);
+}
+
+vec4 sample_func_dual_array(vec2 uv, mediump sampler2D SPIRV_Cross_Combinedtexsamp[4])
+{
+    return texture(SPIRV_Cross_Combinedtexsamp[1], uv);
+}
+
+void main()
+{
+    vec2 off = vec2(1.0) / vec2(textureSize(SPIRV_Cross_CombineduTextureuSampler[1], 0));
+    vec2 off2 = vec2(1.0) / vec2(textureSize(SPIRV_Cross_CombineduTextureuSampler[2], 1));
+    highp vec2 param = (vTex + off) + off2;
+    vec4 c0 = sample_func(param, SPIRV_Cross_CombineduTextureuSampler);
+    highp vec2 param_1 = (vTex + off) + off2;
+    vec4 c1 = sample_func_dual(param_1, SPIRV_Cross_CombineduTextureuSampler[1]);
+    highp vec2 param_2 = (vTex + off) + off2;
+    vec4 c2 = sample_func_dual_array(param_2, SPIRV_Cross_CombineduTextureuSampler);
+    vec4 c3 = texture(SPIRV_Cross_CombineduTextureArrayuSampler[3], vTex3);
+    vec4 c4 = texture(SPIRV_Cross_CombineduTextureCubeuSampler[1], vTex3);
+    vec4 c5 = texture(SPIRV_Cross_CombineduTexture3DuSampler[2], vTex3);
+    FragColor = ((((c0 + c1) + c2) + c3) + c4) + c5;
+}
+
diff --git a/reference/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag.vk b/reference/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag.vk
new file mode 100644
index 0000000000..495874ecc2
--- /dev/null
+++ b/reference/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag.vk
@@ -0,0 +1,45 @@
+#version 310 es
+precision mediump float;
+precision highp int;
+
+layout(set = 0, binding = 1) uniform mediump texture2D uTexture[4];
+layout(set = 0, binding = 0) uniform mediump sampler uSampler;
+layout(set = 0, binding = 4) uniform mediump texture2DArray uTextureArray[4];
+layout(set = 0, binding = 3) uniform mediump textureCube uTextureCube[4];
+layout(set = 0, binding = 2) uniform mediump texture3D uTexture3D[4];
+
+layout(location = 0) in vec2 vTex;
+layout(location = 1) in vec3 vTex3;
+layout(location = 0) out vec4 FragColor;
+
+vec4 sample_func(mediump sampler samp, vec2 uv)
+{
+    return texture(sampler2D(uTexture[2], samp), uv);
+}
+
+vec4 sample_func_dual(mediump sampler samp, mediump texture2D tex, vec2 uv)
+{
+    return texture(sampler2D(tex, samp), uv);
+}
+
+vec4 sample_func_dual_array(mediump sampler samp, mediump texture2D tex[4], vec2 uv)
+{
+    return texture(sampler2D(tex[1], samp), uv);
+}
+
+void main()
+{
+    vec2 off = vec2(1.0) / vec2(textureSize(sampler2D(uTexture[1], uSampler), 0));
+    vec2 off2 = vec2(1.0) / vec2(textureSize(sampler2D(uTexture[2], uSampler), 1));
+    highp vec2 param = (vTex + off) + off2;
+    vec4 c0 = sample_func(uSampler, param);
+    highp vec2 param_1 = (vTex + off) + off2;
+    vec4 c1 = sample_func_dual(uSampler, uTexture[1], param_1);
+    highp vec2 param_2 = (vTex + off) + off2;
+    vec4 c2 = sample_func_dual_array(uSampler, uTexture, param_2);
+    vec4 c3 = texture(sampler2DArray(uTextureArray[3], uSampler), vTex3);
+    vec4 c4 = texture(samplerCube(uTextureCube[1], uSampler), vTex3);
+    vec4 c5 = texture(sampler3D(uTexture3D[2], uSampler), vTex3);
+    FragColor = ((((c0 + c1) + c2) + c3) + c4) + c5;
+}
+
diff --git a/reference/shaders/vulkan/frag/spec-constant.vk.frag.vk b/reference/shaders/vulkan/frag/spec-constant.vk.frag.vk
deleted file mode 100644
index d0765cc8bd..0000000000
--- a/reference/shaders/vulkan/frag/spec-constant.vk.frag.vk
+++ /dev/null
@@ -1,68 +0,0 @@
-#version 310 es
-precision mediump float;
-precision highp int;
-
-layout(constant_id = 1) const float a = 1.0;
-layout(constant_id = 2) const float b = 2.0;
-layout(constant_id = 3) const int c = 3;
-layout(constant_id = 4) const int d = 4;
-layout(constant_id = 5) const uint e = 5u;
-layout(constant_id = 6) const uint f = 6u;
-layout(constant_id = 7) const bool g = false;
-layout(constant_id = 8) const bool h = true;
-
-struct Foo
-{
-    float elems[(d + 2)];
-};
-
-layout(location = 0) out vec4 FragColor;
-
-void main()
-{
-    float t0 = a;
-    float t1 = b;
-    mediump uint c0 = (uint(c) + 0u);
-    mediump int c1 = (-c);
-    mediump int c2 = (~c);
-    mediump int c3 = (c + d);
-    mediump int c4 = (c - d);
-    mediump int c5 = (c * d);
-    mediump int c6 = (c / d);
-    mediump uint c7 = (e / f);
-    mediump int c8 = (c % d);
-    mediump uint c9 = (e % f);
-    mediump int c10 = (c >> d);
-    mediump uint c11 = (e >> f);
-    mediump int c12 = (c << d);
-    mediump int c13 = (c | d);
-    mediump int c14 = (c ^ d);
-    mediump int c15 = (c & d);
-    bool c16 = (g || h);
-    bool c17 = (g && h);
-    bool c18 = (!g);
-    bool c19 = (g == h);
-    bool c20 = (g != h);
-    bool c21 = (c == d);
-    bool c22 = (c != d);
-    bool c23 = (c < d);
-    bool c24 = (e < f);
-    bool c25 = (c > d);
-    bool c26 = (e > f);
-    bool c27 = (c <= d);
-    bool c28 = (e <= f);
-    bool c29 = (c >= d);
-    bool c30 = (e >= f);
-    mediump int c31 = c8 + c3;
-    mediump int c32 = int(e + 0u);
-    bool c33 = (c != int(0u));
-    bool c34 = (e != 0u);
-    mediump int c35 = int(g);
-    mediump uint c36 = uint(g);
-    float c37 = float(g);
-    float vec0[(c + 3)][8];
-    float vec1[(c + 2)];
-    Foo foo;
-    FragColor = ((vec4(t0 + t1) + vec4(vec0[0][0])) + vec4(vec1[0])) + vec4(foo.elems[c]);
-}
-
diff --git a/reference/shaders/vulkan/vert/multiview.nocompat.vk.vert b/reference/shaders/vulkan/vert/multiview.nocompat.vk.vert
deleted file mode 100644
index 533738efc3..0000000000
--- a/reference/shaders/vulkan/vert/multiview.nocompat.vk.vert
+++ /dev/null
@@ -1,15 +0,0 @@
-#version 310 es
-#extension GL_OVR_multiview2 : require
-
-layout(binding = 0, std140) uniform MVPs
-{
-    mat4 MVP[2];
-} _19;
-
-layout(location = 0) in vec4 Position;
-
-void main()
-{
-    gl_Position = _19.MVP[gl_ViewID_OVR] * Position;
-}
-
diff --git a/reference/shaders/vulkan/vert/vulkan-vertex.vert b/reference/shaders/vulkan/vert/vulkan-vertex.vert
deleted file mode 100644
index 8de2b111ef..0000000000
--- a/reference/shaders/vulkan/vert/vulkan-vertex.vert
+++ /dev/null
@@ -1,9 +0,0 @@
-#version 310 es
-
-uniform int SPIRV_Cross_BaseInstance;
-
-void main()
-{
-    gl_Position = (vec4(1.0, 2.0, 3.0, 4.0) * float((gl_VertexID + (gl_InstanceID + SPIRV_Cross_BaseInstance))));
-}
-
diff --git a/reference/shaders/vulkan/vert/vulkan-vertex.vert.vk b/reference/shaders/vulkan/vert/vulkan-vertex.vert.vk
deleted file mode 100644
index 9ee3cc0997..0000000000
--- a/reference/shaders/vulkan/vert/vulkan-vertex.vert.vk
+++ /dev/null
@@ -1,7 +0,0 @@
-#version 310 es
-
-void main()
-{
-    gl_Position = (vec4(1.0, 2.0, 3.0, 4.0) * float((gl_VertexIndex + gl_InstanceIndex)));
-}
-
diff --git a/shaders-hlsl/asm/comp/specialization-constant-workgroup.nofxc.asm.comp b/shaders-hlsl-no-opt/asm/comp/specialization-constant-workgroup.nofxc.asm.comp
similarity index 100%
rename from shaders-hlsl/asm/comp/specialization-constant-workgroup.nofxc.asm.comp
rename to shaders-hlsl-no-opt/asm/comp/specialization-constant-workgroup.nofxc.asm.comp
diff --git a/shaders-hlsl/asm/vert/empty-struct-composite.asm.vert b/shaders-hlsl-no-opt/asm/vert/empty-struct-composite.asm.vert
similarity index 100%
rename from shaders-hlsl/asm/vert/empty-struct-composite.asm.vert
rename to shaders-hlsl-no-opt/asm/vert/empty-struct-composite.asm.vert
diff --git a/shaders-hlsl/comp/bitfield.noopt.comp b/shaders-hlsl-no-opt/comp/bitfield.comp
similarity index 100%
rename from shaders-hlsl/comp/bitfield.noopt.comp
rename to shaders-hlsl-no-opt/comp/bitfield.comp
diff --git a/shaders-hlsl/frag/spec-constant.frag b/shaders-hlsl-no-opt/frag/spec-constant.frag
similarity index 100%
rename from shaders-hlsl/frag/spec-constant.frag
rename to shaders-hlsl-no-opt/frag/spec-constant.frag
diff --git a/shaders-hlsl/asm/comp/storage-buffer-basic.nofxc.asm.comp b/shaders-hlsl/asm/comp/storage-buffer-basic.invalid.nofxc.asm.comp
similarity index 100%
rename from shaders-hlsl/asm/comp/storage-buffer-basic.nofxc.asm.comp
rename to shaders-hlsl/asm/comp/storage-buffer-basic.invalid.nofxc.asm.comp
diff --git a/shaders-hlsl/asm/frag/combined-sampler-reuse.asm.frag b/shaders-hlsl/asm/frag/combined-sampler-reuse.asm.frag
new file mode 100644
index 0000000000..ba2f95b234
--- /dev/null
+++ b/shaders-hlsl/asm/frag/combined-sampler-reuse.asm.frag
@@ -0,0 +1,57 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 6
+; Bound: 36
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor %vUV
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %FragColor "FragColor"
+               OpName %uTex "uTex"
+               OpName %uSampler "uSampler"
+               OpName %vUV "vUV"
+               OpDecorate %FragColor Location 0
+               OpDecorate %uTex DescriptorSet 0
+               OpDecorate %uTex Binding 1
+               OpDecorate %uSampler DescriptorSet 0
+               OpDecorate %uSampler Binding 0
+               OpDecorate %vUV Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+         %10 = OpTypeImage %float 2D 0 0 0 1 Unknown
+%_ptr_UniformConstant_10 = OpTypePointer UniformConstant %10
+       %uTex = OpVariable %_ptr_UniformConstant_10 UniformConstant
+         %14 = OpTypeSampler
+%_ptr_UniformConstant_14 = OpTypePointer UniformConstant %14
+   %uSampler = OpVariable %_ptr_UniformConstant_14 UniformConstant
+         %18 = OpTypeSampledImage %10
+    %v2float = OpTypeVector %float 2
+%_ptr_Input_v2float = OpTypePointer Input %v2float
+        %vUV = OpVariable %_ptr_Input_v2float Input
+        %int = OpTypeInt 32 1
+      %v2int = OpTypeVector %int 2
+      %int_1 = OpConstant %int 1
+         %32 = OpConstantComposite %v2int %int_1 %int_1
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %13 = OpLoad %10 %uTex
+         %17 = OpLoad %14 %uSampler
+         %19 = OpSampledImage %18 %13 %17
+         %23 = OpLoad %v2float %vUV
+         %24 = OpImageSampleImplicitLod %v4float %19 %23
+               OpStore %FragColor %24
+         %28 = OpLoad %v2float %vUV
+         %33 = OpImageSampleImplicitLod %v4float %19 %28 ConstOffset %32
+         %34 = OpLoad %v4float %FragColor
+         %35 = OpFAdd %v4float %34 %33
+               OpStore %FragColor %35
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-hlsl/asm/frag/empty-struct.asm.frag b/shaders-hlsl/asm/frag/empty-struct.asm.frag
new file mode 100644
index 0000000000..701f9f2a1e
--- /dev/null
+++ b/shaders-hlsl/asm/frag/empty-struct.asm.frag
@@ -0,0 +1,56 @@
+; SPIR-V
+; Version: 1.2
+; Generator: Khronos; 0
+; Bound: 43
+; Schema: 0
+               OpCapability Linkage
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %EntryPoint_Main "main"
+               OpExecutionMode %EntryPoint_Main OriginUpperLeft
+               OpSource Unknown 100
+               OpName %EmptyStructTest "EmptyStructTest"
+               OpName %GetValue "GetValue"
+               OpName %GetValue2 "GetValue"
+               OpName %self "self"
+               OpName %self2 "self"
+               OpName %emptyStruct "emptyStruct"
+               OpName %value "value"
+               OpName %EntryPoint_Main "EntryPoint_Main"
+
+%EmptyStructTest = OpTypeStruct
+%_ptr_Function_EmptyStructTest = OpTypePointer Function %EmptyStructTest
+      %float = OpTypeFloat 32
+%_ptr_Function_float = OpTypePointer Function %float
+          %5 = OpTypeFunction %float %_ptr_Function_EmptyStructTest
+          %6 = OpTypeFunction %float %EmptyStructTest
+       %void = OpTypeVoid
+%_ptr_Function_void = OpTypePointer Function %void
+          %8 = OpTypeFunction %void %_ptr_Function_EmptyStructTest
+          %9 = OpTypeFunction %void
+    %float_0 = OpConstant %float 0
+
+   %GetValue = OpFunction %float None %5
+       %self = OpFunctionParameter %_ptr_Function_EmptyStructTest
+         %13 = OpLabel
+               OpReturnValue %float_0
+               OpFunctionEnd
+
+   %GetValue2 = OpFunction %float None %6
+       %self2 = OpFunctionParameter %EmptyStructTest
+         %14 = OpLabel
+               OpReturnValue %float_0
+               OpFunctionEnd
+
+%EntryPoint_Main = OpFunction %void None %9
+         %37 = OpLabel
+     %emptyStruct = OpVariable %_ptr_Function_EmptyStructTest Function
+         %18 = OpVariable %_ptr_Function_EmptyStructTest Function
+      %value = OpVariable %_ptr_Function_float Function
+	  %value2 = OpCompositeConstruct %EmptyStructTest
+         %22 = OpFunctionCall %float %GetValue %emptyStruct
+         %23 = OpFunctionCall %float %GetValue2 %value2
+               OpStore %value %22
+               OpStore %value %23
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-hlsl/asm/frag/image-extract-reuse.asm.frag b/shaders-hlsl/asm/frag/image-extract-reuse.asm.frag
new file mode 100644
index 0000000000..63c8ab57a5
--- /dev/null
+++ b/shaders-hlsl/asm/frag/image-extract-reuse.asm.frag
@@ -0,0 +1,41 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 6
+; Bound: 19
+; Schema: 0
+               OpCapability Shader
+               OpCapability ImageQuery
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %Size
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %Size "Size"
+               OpName %uTexture "uTexture"
+               OpDecorate %Size Location 0
+               OpDecorate %uTexture DescriptorSet 0
+               OpDecorate %uTexture Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+        %int = OpTypeInt 32 1
+      %v2int = OpTypeVector %int 2
+%_ptr_Output_v2int = OpTypePointer Output %v2int
+       %Size = OpVariable %_ptr_Output_v2int Output
+      %float = OpTypeFloat 32
+         %11 = OpTypeImage %float 2D 0 0 0 1 Unknown
+         %12 = OpTypeSampledImage %11
+%_ptr_UniformConstant_12 = OpTypePointer UniformConstant %12
+   %uTexture = OpVariable %_ptr_UniformConstant_12 UniformConstant
+      %int_0 = OpConstant %int 0
+      %int_1 = OpConstant %int 1
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %15 = OpLoad %12 %uTexture
+         %17 = OpImage %11 %15
+         %18 = OpImageQuerySizeLod %v2int %17 %int_0
+         %19 = OpImageQuerySizeLod %v2int %17 %int_1
+		 %20 = OpIAdd %v2int %18 %19
+               OpStore %Size %20
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-hlsl/asm/frag/implicit-read-dep-phi.asm.frag b/shaders-hlsl/asm/frag/implicit-read-dep-phi.asm.frag
new file mode 100644
index 0000000000..ccdfeef58d
--- /dev/null
+++ b/shaders-hlsl/asm/frag/implicit-read-dep-phi.asm.frag
@@ -0,0 +1,81 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 3
+; Bound: 60
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %v0 %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %phi "phi"
+               OpName %i "i"
+               OpName %v0 "v0"
+               OpName %FragColor "FragColor"
+               OpName %uImage "uImage"
+               OpDecorate %v0 Location 0
+               OpDecorate %FragColor Location 0
+               OpDecorate %uImage DescriptorSet 0
+               OpDecorate %uImage Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+%_ptr_Function_float = OpTypePointer Function %float
+    %float_1 = OpConstant %float 1
+        %int = OpTypeInt 32 1
+%_ptr_Function_int = OpTypePointer Function %int
+      %int_0 = OpConstant %int 0
+      %int_4 = OpConstant %int 4
+       %bool = OpTypeBool
+    %v4float = OpTypeVector %float 4
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+         %v0 = OpVariable %_ptr_Input_v4float Input
+%_ptr_Input_float = OpTypePointer Input %float
+    %float_0 = OpConstant %float 0
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+         %36 = OpTypeImage %float 2D 0 0 0 1 Unknown
+         %37 = OpTypeSampledImage %36
+%_ptr_UniformConstant_37 = OpTypePointer UniformConstant %37
+     %uImage = OpVariable %_ptr_UniformConstant_37 UniformConstant
+    %v2float = OpTypeVector %float 2
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+    %float_2 = OpConstant %float 2
+      %int_1 = OpConstant %int 1
+	  %float_1_vec = OpConstantComposite %v4float %float_1 %float_2 %float_1 %float_2
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+          %i = OpVariable %_ptr_Function_int Function
+               OpStore %i %int_0
+               OpBranch %loop_header
+         %loop_header = OpLabel
+        %phi = OpPhi %float %float_1 %5 %phi_plus_2 %continue_block
+		%tex_phi = OpPhi %v4float %float_1_vec %5 %texture_load_result %continue_block
+               OpLoopMerge %merge_block %continue_block None
+               OpBranch %loop_body
+         %loop_body = OpLabel
+               OpStore %FragColor %tex_phi
+         %19 = OpLoad %int %i
+         %22 = OpSLessThan %bool %19 %int_4
+               OpBranchConditional %22 %15 %merge_block
+         %15 = OpLabel
+         %26 = OpLoad %int %i
+         %28 = OpAccessChain %_ptr_Input_float %v0 %26
+         %29 = OpLoad %float %28
+         %31 = OpFOrdGreaterThan %bool %29 %float_0
+               OpBranchConditional %31 %continue_block %merge_block
+         %continue_block = OpLabel
+         %40 = OpLoad %37 %uImage
+         %43 = OpCompositeConstruct %v2float %phi %phi
+         %texture_load_result = OpImageSampleExplicitLod %v4float %40 %43 Lod %float_0
+         %phi_plus_2 = OpFAdd %float %phi %float_2
+         %54 = OpLoad %int %i
+         %56 = OpIAdd %int %54 %int_1
+               OpStore %i %56
+               OpBranch %loop_header
+         %merge_block = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-hlsl/asm/frag/srem.asm.frag b/shaders-hlsl/asm/frag/srem.asm.frag
new file mode 100644
index 0000000000..c6f8e27cbd
--- /dev/null
+++ b/shaders-hlsl/asm/frag/srem.asm.frag
@@ -0,0 +1,43 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 2
+; Bound: 23
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor %vA %vB
+               OpExecutionMode %main OriginUpperLeft
+               OpSource ESSL 310
+               OpName %main "main"
+               OpName %FragColor "FragColor"
+               OpName %vA "vA"
+               OpName %vB "vB"
+               OpDecorate %FragColor RelaxedPrecision
+               OpDecorate %FragColor Location 0
+               OpDecorate %vA Flat
+               OpDecorate %vA Location 0
+               OpDecorate %vB Flat
+               OpDecorate %vB Location 1
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+        %int = OpTypeInt 32 1
+      %v4int = OpTypeVector %int 4
+%_ptr_Input_v4int = OpTypePointer Input %v4int
+         %vA = OpVariable %_ptr_Input_v4int Input
+         %vB = OpVariable %_ptr_Input_v4int Input
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %14 = OpLoad %v4int %vA
+         %16 = OpLoad %v4int %vB
+         %17 = OpLoad %v4int %vA
+         %18 = OpLoad %v4int %vB
+         %19 = OpSRem %v4int %17 %18
+         %20 = OpConvertSToF %v4float %19
+               OpStore %FragColor %20
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-hlsl/asm/frag/texel-fetch-no-lod.asm.frag b/shaders-hlsl/asm/frag/texel-fetch-no-lod.asm.frag
new file mode 100644
index 0000000000..53dc63809c
--- /dev/null
+++ b/shaders-hlsl/asm/frag/texel-fetch-no-lod.asm.frag
@@ -0,0 +1,46 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 6
+; Bound: 26
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor %gl_FragCoord
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %FragColor "FragColor"
+               OpName %uTexture "uTexture"
+               OpName %gl_FragCoord "gl_FragCoord"
+               OpDecorate %FragColor Location 0
+               OpDecorate %uTexture DescriptorSet 0
+               OpDecorate %uTexture Binding 0
+               OpDecorate %gl_FragCoord BuiltIn FragCoord
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+         %10 = OpTypeImage %float 2D 0 0 0 1 Unknown
+         %11 = OpTypeSampledImage %10
+%_ptr_UniformConstant_11 = OpTypePointer UniformConstant %11
+   %uTexture = OpVariable %_ptr_UniformConstant_11 UniformConstant
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+    %v2float = OpTypeVector %float 2
+        %int = OpTypeInt 32 1
+      %v2int = OpTypeVector %int 2
+      %int_0 = OpConstant %int 0
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %14 = OpLoad %11 %uTexture
+         %18 = OpLoad %v4float %gl_FragCoord
+         %19 = OpVectorShuffle %v2float %18 %18 0 1
+         %22 = OpConvertFToS %v2int %19
+         %24 = OpImage %10 %14
+         %25 = OpImageFetch %v4float %24 %22
+               OpStore %FragColor %25
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-hlsl/asm/vert/spec-constant-op-composite.asm.vert b/shaders-hlsl/asm/vert/spec-constant-op-composite.asm.vert
new file mode 100644
index 0000000000..b566a3d1a0
--- /dev/null
+++ b/shaders-hlsl/asm/vert/spec-constant-op-composite.asm.vert
@@ -0,0 +1,98 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 1
+; Bound: 58
+; Schema: 0
+               OpCapability Shader
+               OpCapability ClipDistance
+               OpCapability CullDistance
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Vertex %4 "main" %52 %output
+               OpSource GLSL 450
+               OpName %4 "main"
+               OpName %9 "pos"
+               OpName %50 "gl_PerVertex"
+               OpMemberName %50 0 "gl_Position"
+               OpMemberName %50 1 "gl_PointSize"
+               OpMemberName %50 2 "gl_ClipDistance"
+               OpMemberName %50 3 "gl_CullDistance"
+               OpName %52 ""
+               OpDecorate %13 SpecId 201
+               OpDecorate %24 SpecId 202
+               OpMemberDecorate %50 0 BuiltIn Position
+               OpMemberDecorate %50 1 BuiltIn PointSize
+               OpMemberDecorate %50 2 BuiltIn ClipDistance
+               OpMemberDecorate %50 3 BuiltIn CullDistance
+               OpDecorate %50 Block
+               OpDecorate %57 SpecId 200
+			   OpDecorate %output Flat
+			   OpDecorate %output Location 0
+          %2 = OpTypeVoid
+          %3 = OpTypeFunction %2
+          %6 = OpTypeFloat 32
+          %7 = OpTypeVector %6 4
+          %8 = OpTypePointer Function %7
+         %10 = OpConstant %6 0
+         %11 = OpConstantComposite %7 %10 %10 %10 %10
+         %12 = OpTypeInt 32 1
+		 %int_ptr = OpTypePointer Output %12
+         %13 = OpSpecConstant %12 -10
+         %14 = OpConstant %12 2
+         %15 = OpSpecConstantOp %12 IAdd %13 %14
+         %17 = OpTypeInt 32 0
+         %18 = OpConstant %17 1
+         %19 = OpTypePointer Function %6
+         %24 = OpSpecConstant %17 100
+         %25 = OpConstant %17 5
+         %26 = OpSpecConstantOp %17 UMod %24 %25
+         %28 = OpConstant %17 2
+         %33 = OpConstant %12 20
+         %34 = OpConstant %12 30
+         %35 = OpTypeVector %12 4
+         %36 = OpSpecConstantComposite %35 %33 %34 %15 %15
+         %40 = OpTypeVector %12 2
+         %41 = OpSpecConstantOp %40 VectorShuffle %36 %36 1 0
+		 %foo = OpSpecConstantOp %12 CompositeExtract %36 1
+         %42 = OpTypeVector %6 2
+         %49 = OpTypeArray %6 %18
+         %50 = OpTypeStruct %7 %6 %49 %49
+         %51 = OpTypePointer Output %50
+         %52 = OpVariable %51 Output
+		 %output = OpVariable %int_ptr Output
+         %53 = OpConstant %12 0
+         %55 = OpTypePointer Output %7
+         %57 = OpSpecConstant %6 3.14159
+          %4 = OpFunction %2 None %3
+          %5 = OpLabel
+          %9 = OpVariable %8 Function
+               OpStore %9 %11
+         %16 = OpConvertSToF %6 %15
+         %20 = OpAccessChain %19 %9 %18
+         %21 = OpLoad %6 %20
+         %22 = OpFAdd %6 %21 %16
+         %23 = OpAccessChain %19 %9 %18
+               OpStore %23 %22
+         %27 = OpConvertUToF %6 %26
+         %29 = OpAccessChain %19 %9 %28
+         %30 = OpLoad %6 %29
+         %31 = OpFAdd %6 %30 %27
+         %32 = OpAccessChain %19 %9 %28
+               OpStore %32 %31
+         %37 = OpConvertSToF %7 %36
+         %38 = OpLoad %7 %9
+         %39 = OpFAdd %7 %38 %37
+               OpStore %9 %39
+         %43 = OpConvertSToF %42 %41
+         %44 = OpLoad %7 %9
+         %45 = OpVectorShuffle %42 %44 %44 0 1
+         %46 = OpFAdd %42 %45 %43
+         %47 = OpLoad %7 %9
+         %48 = OpVectorShuffle %7 %47 %46 4 5 2 3
+               OpStore %9 %48
+         %54 = OpLoad %7 %9
+         %56 = OpAccessChain %55 %52 %53
+               OpStore %56 %54
+			   OpStore %output %foo
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-hlsl/comp/rmw-matrix.comp b/shaders-hlsl/comp/rmw-matrix.comp
new file mode 100644
index 0000000000..c158ab4ddd
--- /dev/null
+++ b/shaders-hlsl/comp/rmw-matrix.comp
@@ -0,0 +1,20 @@
+#version 310 es
+layout(local_size_x = 1) in;
+
+layout(std430, binding = 0) buffer SSBO
+{
+	float a;
+	vec4 b;
+	mat4 c;
+
+	float a1;
+	vec4 b1;
+	mat4 c1;
+};
+
+void main()
+{
+	a *= a1;
+	b *= b1;
+	c *= c1;
+}
diff --git a/shaders-hlsl/comp/subgroups.invalid.nofxc.sm60.comp b/shaders-hlsl/comp/subgroups.invalid.nofxc.sm60.comp
new file mode 100644
index 0000000000..81135e2a93
--- /dev/null
+++ b/shaders-hlsl/comp/subgroups.invalid.nofxc.sm60.comp
@@ -0,0 +1,131 @@
+#version 450
+#extension GL_KHR_shader_subgroup_basic : require
+#extension GL_KHR_shader_subgroup_ballot : require
+#extension GL_KHR_shader_subgroup_vote : require
+#extension GL_KHR_shader_subgroup_shuffle : require
+#extension GL_KHR_shader_subgroup_shuffle_relative : require
+#extension GL_KHR_shader_subgroup_arithmetic : require
+#extension GL_KHR_shader_subgroup_clustered : require
+#extension GL_KHR_shader_subgroup_quad : require
+layout(local_size_x = 1) in;
+
+layout(std430, binding = 0) buffer SSBO
+{
+	float FragColor;
+};
+
+void main()
+{
+	// basic
+	//FragColor = float(gl_NumSubgroups);
+	//FragColor = float(gl_SubgroupID);
+	FragColor = float(gl_SubgroupSize);
+	FragColor = float(gl_SubgroupInvocationID);
+	subgroupBarrier();
+	subgroupMemoryBarrier();
+	subgroupMemoryBarrierBuffer();
+	subgroupMemoryBarrierShared();
+	subgroupMemoryBarrierImage();
+	bool elected = subgroupElect();
+
+	// ballot
+	FragColor = float(gl_SubgroupEqMask);
+	FragColor = float(gl_SubgroupGeMask);
+	FragColor = float(gl_SubgroupGtMask);
+	FragColor = float(gl_SubgroupLeMask);
+	FragColor = float(gl_SubgroupLtMask);
+	vec4 broadcasted = subgroupBroadcast(vec4(10.0), 8u);
+	vec3 first = subgroupBroadcastFirst(vec3(20.0));
+	uvec4 ballot_value = subgroupBallot(true);
+	//bool inverse_ballot_value = subgroupInverseBallot(ballot_value);
+	//bool bit_extracted = subgroupBallotBitExtract(uvec4(10u), 8u);
+	uint bit_count = subgroupBallotBitCount(ballot_value);
+	//uint inclusive_bit_count = subgroupBallotInclusiveBitCount(ballot_value);
+	//uint exclusive_bit_count = subgroupBallotExclusiveBitCount(ballot_value);
+	//uint lsb = subgroupBallotFindLSB(ballot_value);
+	//uint msb = subgroupBallotFindMSB(ballot_value);
+
+	// shuffle
+	//uint shuffled = subgroupShuffle(10u, 8u);
+	//uint shuffled_xor = subgroupShuffleXor(30u, 8u);
+
+	// shuffle relative 
+	//uint shuffled_up = subgroupShuffleUp(20u, 4u);
+	//uint shuffled_down = subgroupShuffleDown(20u, 4u);
+
+	// vote
+	bool has_all = subgroupAll(true);
+	bool has_any = subgroupAny(true);
+	bool has_equal = subgroupAllEqual(true);
+
+	// arithmetic
+	vec4 added = subgroupAdd(vec4(20.0));
+	ivec4 iadded = subgroupAdd(ivec4(20));
+	vec4 multiplied = subgroupMul(vec4(20.0));
+	ivec4 imultiplied = subgroupMul(ivec4(20));
+	vec4 lo = subgroupMin(vec4(20.0));
+	vec4 hi = subgroupMax(vec4(20.0));
+	ivec4 slo = subgroupMin(ivec4(20));
+	ivec4 shi = subgroupMax(ivec4(20));
+	uvec4 ulo = subgroupMin(uvec4(20));
+	uvec4 uhi = subgroupMax(uvec4(20));
+	uvec4 anded = subgroupAnd(ballot_value);
+	uvec4 ored = subgroupOr(ballot_value);
+	uvec4 xored = subgroupXor(ballot_value);
+
+	added = subgroupInclusiveAdd(added);
+	iadded = subgroupInclusiveAdd(iadded);
+	multiplied = subgroupInclusiveMul(multiplied);
+	imultiplied = subgroupInclusiveMul(imultiplied);
+#if 0
+	lo = subgroupInclusiveMin(lo);
+	hi = subgroupInclusiveMax(hi);
+	slo = subgroupInclusiveMin(slo);
+	shi = subgroupInclusiveMax(shi);
+	ulo = subgroupInclusiveMin(ulo);
+	uhi = subgroupInclusiveMax(uhi);
+	anded = subgroupInclusiveAnd(anded);
+	ored = subgroupInclusiveOr(ored);
+	xored = subgroupInclusiveXor(ored);
+	added = subgroupExclusiveAdd(lo);
+#endif
+
+	added = subgroupExclusiveAdd(multiplied);
+	multiplied = subgroupExclusiveMul(multiplied);
+	iadded = subgroupExclusiveAdd(imultiplied);
+	imultiplied = subgroupExclusiveMul(imultiplied);
+#if 0
+	lo = subgroupExclusiveMin(lo);
+	hi = subgroupExclusiveMax(hi);
+	ulo = subgroupExclusiveMin(ulo);
+	uhi = subgroupExclusiveMax(uhi);
+	slo = subgroupExclusiveMin(slo);
+	shi = subgroupExclusiveMax(shi);
+	anded = subgroupExclusiveAnd(anded);
+	ored = subgroupExclusiveOr(ored);
+	xored = subgroupExclusiveXor(ored);
+#endif
+
+#if 0
+	// clustered
+	added = subgroupClusteredAdd(added, 4u);
+	multiplied = subgroupClusteredMul(multiplied, 4u);
+	iadded = subgroupClusteredAdd(iadded, 4u);
+	imultiplied = subgroupClusteredMul(imultiplied, 4u);
+	lo = subgroupClusteredMin(lo, 4u);
+	hi = subgroupClusteredMax(hi, 4u);
+	ulo = subgroupClusteredMin(ulo, 4u);
+	uhi = subgroupClusteredMax(uhi, 4u);
+	slo = subgroupClusteredMin(slo, 4u);
+	shi = subgroupClusteredMax(shi, 4u);
+	anded = subgroupClusteredAnd(anded, 4u);
+	ored = subgroupClusteredOr(ored, 4u);
+	xored = subgroupClusteredXor(xored, 4u);
+#endif
+
+	// quad
+	vec4 swap_horiz = subgroupQuadSwapHorizontal(vec4(20.0));
+	vec4 swap_vertical = subgroupQuadSwapVertical(vec4(20.0));
+	vec4 swap_diagonal = subgroupQuadSwapDiagonal(vec4(20.0));
+	vec4 quad_broadcast = subgroupQuadBroadcast(vec4(20.0), 3u);
+}
diff --git a/shaders-hlsl/frag/array-lut-no-loop-variable.frag b/shaders-hlsl/frag/array-lut-no-loop-variable.frag
new file mode 100644
index 0000000000..3493e0cccc
--- /dev/null
+++ b/shaders-hlsl/frag/array-lut-no-loop-variable.frag
@@ -0,0 +1,13 @@
+#version 310 es
+precision mediump float;
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) in vec4 v0;
+
+void main()
+{
+	float lut[5] = float[](1.0, 2.0, 3.0, 4.0, 5.0);
+	for (int i = 0; i < 4; i++, FragColor += lut[i])
+	{
+	}
+}
diff --git a/shaders-hlsl/frag/control-dependent-in-branch.desktop.frag b/shaders-hlsl/frag/control-dependent-in-branch.desktop.frag
new file mode 100644
index 0000000000..7c75ffe1bd
--- /dev/null
+++ b/shaders-hlsl/frag/control-dependent-in-branch.desktop.frag
@@ -0,0 +1,36 @@
+#version 450
+
+layout(location = 0) out vec4 FragColor;
+layout(binding = 0) uniform sampler2D uSampler;
+layout(location = 0) in vec4 vInput;
+
+void main()
+{
+	FragColor = vInput;
+	vec4 t = texture(uSampler, vInput.xy);
+	vec4 d0 = dFdx(vInput);
+	vec4 d1 = dFdy(vInput);
+	vec4 d2 = fwidth(vInput);
+	vec4 d3 = dFdxCoarse(vInput);
+	vec4 d4 = dFdyCoarse(vInput);
+	vec4 d5 = fwidthCoarse(vInput);
+	vec4 d6 = dFdxFine(vInput);
+	vec4 d7 = dFdyFine(vInput);
+	vec4 d8 = fwidthFine(vInput);
+	vec2 lod = textureQueryLod(uSampler, vInput.zw);
+	if (vInput.y > 10.0)
+	{
+		FragColor += t;
+		FragColor += d0;
+		FragColor += d1;
+		FragColor += d2;
+		FragColor += d3;
+		FragColor += d4;
+		FragColor += d5;
+		FragColor += d6;
+		FragColor += d7;
+		FragColor += d8;
+		FragColor += lod.xyxy;
+	}
+}
+
diff --git a/shaders-hlsl/frag/fp16.desktop.frag b/shaders-hlsl/frag/fp16.desktop.frag
new file mode 100644
index 0000000000..4f92e20359
--- /dev/null
+++ b/shaders-hlsl/frag/fp16.desktop.frag
@@ -0,0 +1,156 @@
+#version 450
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(location = 0) in float16_t v1;
+layout(location = 1) in f16vec2 v2;
+layout(location = 2) in f16vec3 v3;
+layout(location = 3) in f16vec4 v4;
+
+layout(location = 0) out float o1;
+layout(location = 1) out vec2 o2;
+layout(location = 2) out vec3 o3;
+layout(location = 3) out vec4 o4;
+
+#if 0
+// Doesn't work on glslang yet.
+f16mat2 test_mat2(f16vec2 a, f16vec2 b, f16vec2 c, f16vec2 d)
+{
+	return f16mat2(a, b) * f16mat2(c, d);
+}
+
+f16mat3 test_mat3(f16vec3 a, f16vec3 b, f16vec3 c, f16vec3 d, f16vec3 e, f16vec3 f)
+{
+	return f16mat3(a, b, c) * f16mat3(d, e, f);
+}
+#endif
+
+void test_constants()
+{
+	float16_t a = 1.0hf;
+	float16_t b = 1.5hf;
+	float16_t c = -1.5hf; // Negatives
+	float16_t d = (0.0hf / 0.0hf); // NaN
+	float16_t e = (1.0hf / 0.0hf); // +Inf
+	float16_t f = (-1.0hf / 0.0hf); // -Inf
+	float16_t g = 1014.0hf; // Large.
+	float16_t h = 0.000001hf; // Denormal
+}
+
+float16_t test_result()
+{
+	return 1.0hf;
+}
+
+void test_conversions()
+{
+	float16_t one = test_result();
+	int a = int(one);
+	uint b = uint(one);
+	bool c = bool(one);
+	float d = float(one);
+	double e = double(one);
+	float16_t a2 = float16_t(a);
+	float16_t b2 = float16_t(b);
+	float16_t c2 = float16_t(c);
+	float16_t d2 = float16_t(d);
+	float16_t e2 = float16_t(e);
+}
+
+void test_builtins()
+{
+	f16vec4 res;
+	res = radians(v4);
+	res = degrees(v4);
+	res = sin(v4);
+	res = cos(v4);
+	res = tan(v4);
+	res = asin(v4);
+	res = atan(v4, v3.xyzz);
+	res = atan(v4);
+	res = sinh(v4);
+	res = cosh(v4);
+	res = tanh(v4);
+	//res = asinh(v4);
+	//res = acosh(v4);
+	//res = atanh(v4);
+	res = pow(v4, v4);
+	res = exp(v4);
+	res = log(v4);
+	res = exp2(v4);
+	res = log2(v4);
+	res = sqrt(v4);
+	res = inversesqrt(v4);
+	res = abs(v4);
+	res = sign(v4);
+	res = floor(v4);
+	res = trunc(v4);
+	res = round(v4);
+	//res = roundEven(v4);
+	res = ceil(v4);
+	res = fract(v4);
+	res = mod(v4, v4);
+	f16vec4 tmp;
+	res = modf(v4, tmp);
+	res = min(v4, v4);
+	res = max(v4, v4);
+	res = clamp(v4, v4, v4);
+	res = mix(v4, v4, v4);
+	res = mix(v4, v4, lessThan(v4, v4));
+	res = step(v4, v4);
+	res = smoothstep(v4, v4, v4);
+
+	bvec4 btmp = isnan(v4);
+	btmp = isinf(v4);
+	res = fma(v4, v4, v4);
+
+	//ivec4 itmp;
+	//res = frexp(v4, itmp);
+	//res = ldexp(res, itmp);
+
+	uint pack0 = packFloat2x16(v4.xy);
+	uint pack1 = packFloat2x16(v4.zw);
+	res = f16vec4(unpackFloat2x16(pack0), unpackFloat2x16(pack1));
+
+	float16_t t0 = length(v4);
+	t0 = distance(v4, v4);
+	t0 = dot(v4, v4);
+	f16vec3 res3 = cross(v3, v3);
+	res = normalize(v4);
+	res = faceforward(v4, v4, v4);
+	res = reflect(v4, v4);
+	res = refract(v4, v4, v1);
+
+	btmp = lessThan(v4, v4);
+	btmp = lessThanEqual(v4, v4);
+	btmp = greaterThan(v4, v4);
+	btmp = greaterThanEqual(v4, v4);
+	btmp = equal(v4, v4);
+	btmp = notEqual(v4, v4);
+
+	res = dFdx(v4);
+	res = dFdy(v4);
+	res = dFdxFine(v4);
+	res = dFdyFine(v4);
+	res = dFdxCoarse(v4);
+	res = dFdyCoarse(v4);
+	res = fwidth(v4);
+	res = fwidthFine(v4);
+	res = fwidthCoarse(v4);
+
+	//res = interpolateAtCentroid(v4);
+	//res = interpolateAtSample(v4, 0);
+	//res = interpolateAtOffset(v4, f16vec2(0.1hf));
+}
+
+void main()
+{
+	// Basic matrix tests.
+#if 0
+	f16mat2 m0 = test_mat2(v2, v2, v3.xy, v3.xy);
+	f16mat3 m1 = test_mat3(v3, v3, v3, v4.xyz, v4.xyz, v4.yzw);
+#endif
+
+	test_constants();
+	test_conversions();
+	test_builtins();
+}
diff --git a/shaders-hlsl/frag/row-major-layout-in-struct.frag b/shaders-hlsl/frag/row-major-layout-in-struct.frag
new file mode 100644
index 0000000000..3e93bb2b43
--- /dev/null
+++ b/shaders-hlsl/frag/row-major-layout-in-struct.frag
@@ -0,0 +1,29 @@
+#version 450
+
+struct Foo
+{
+	mat4 v;
+	mat4 w;
+};
+
+struct NonFoo
+{
+	mat4 v;
+	mat4 w;
+};
+
+layout(std140, binding = 0) uniform UBO
+{
+	layout(column_major) Foo foo;
+};
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) in vec4 vUV;
+
+void main()
+{
+	NonFoo f;
+	f.v = foo.v;
+	f.w = foo.w;
+	FragColor = f.v * (f.w * vUV);
+}
diff --git a/shaders-hlsl/frag/sampler-image-arrays.frag b/shaders-hlsl/frag/sampler-image-arrays.frag
new file mode 100644
index 0000000000..42370d9728
--- /dev/null
+++ b/shaders-hlsl/frag/sampler-image-arrays.frag
@@ -0,0 +1,33 @@
+#version 450
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) flat in vec2 vTex;
+layout(location = 1) flat in int vIndex;
+layout(binding = 0) uniform sampler2D uSampler[4];
+layout(binding = 4) uniform sampler uSamplers[4];
+layout(binding = 8) uniform texture2D uTextures[4];
+
+vec4 sample_from_argument(sampler2D samplers[4])
+{
+	return texture(samplers[vIndex], vTex + 0.2);
+}
+
+vec4 sample_single_from_argument(sampler2D samp)
+{
+	return texture(samp, vTex + 0.3);
+}
+
+vec4 sample_from_global()
+{
+	return texture(uSampler[vIndex], vTex + 0.1);
+}
+
+void main()
+{
+	FragColor = vec4(0.0);
+	FragColor += texture(sampler2D(uTextures[2], uSamplers[1]), vTex);
+	FragColor += texture(uSampler[vIndex], vTex);
+	FragColor += sample_from_global();
+	FragColor += sample_from_argument(uSampler);
+	FragColor += sample_single_from_argument(uSampler[3]);
+}
diff --git a/shaders-hlsl/frag/tex-sampling-ms.frag b/shaders-hlsl/frag/tex-sampling-ms.frag
new file mode 100644
index 0000000000..7badbb1a35
--- /dev/null
+++ b/shaders-hlsl/frag/tex-sampling-ms.frag
@@ -0,0 +1,16 @@
+#version 450
+
+layout(location = 0) out vec4 FragColor;
+layout(binding = 0) uniform sampler2DMS uTex;
+
+void main()
+{
+	FragColor =
+		texelFetch(uTex, ivec2(gl_FragCoord.xy), 0);
+	FragColor +=
+		texelFetch(uTex, ivec2(gl_FragCoord.xy), 1);
+	FragColor +=
+		texelFetch(uTex, ivec2(gl_FragCoord.xy), 2);
+	FragColor +=
+		texelFetch(uTex, ivec2(gl_FragCoord.xy), 3);
+}
diff --git a/shaders-hlsl/frag/tex-sampling.frag b/shaders-hlsl/frag/tex-sampling.frag
index 4a386c0d33..762c60ac6e 100644
--- a/shaders-hlsl/frag/tex-sampling.frag
+++ b/shaders-hlsl/frag/tex-sampling.frag
@@ -1,22 +1,22 @@
 #version 450
 
-uniform sampler1D tex1d;
-uniform sampler2D tex2d;
-uniform sampler3D tex3d;
-uniform samplerCube texCube;
+layout(binding = 0) uniform sampler1D tex1d;
+layout(binding = 1) uniform sampler2D tex2d;
+layout(binding = 2) uniform sampler3D tex3d;
+layout(binding = 3) uniform samplerCube texCube;
 
-uniform sampler1DShadow tex1dShadow;
-uniform sampler2DShadow tex2dShadow;
-uniform samplerCubeShadow texCubeShadow;
+layout(binding = 4) uniform sampler1DShadow tex1dShadow;
+layout(binding = 5) uniform sampler2DShadow tex2dShadow;
+layout(binding = 6) uniform samplerCubeShadow texCubeShadow;
 
-uniform sampler1DArray tex1dArray;
-uniform sampler2DArray tex2dArray;
-uniform samplerCubeArray texCubeArray;
+layout(binding = 7) uniform sampler1DArray tex1dArray;
+layout(binding = 8) uniform sampler2DArray tex2dArray;
+layout(binding = 9) uniform samplerCubeArray texCubeArray;
 
-uniform samplerShadow samplerDepth;
-uniform sampler samplerNonDepth;
-uniform texture2D separateTex2d;
-uniform texture2D separateTex2dDepth;
+layout(binding = 10) uniform samplerShadow samplerDepth;
+layout(binding = 11) uniform sampler samplerNonDepth;
+layout(binding = 12) uniform texture2D separateTex2d;
+layout(binding = 13) uniform texture2D separateTex2dDepth;
 
 layout(location = 0) in float texCoord1d;
 layout(location = 1) in vec2 texCoord2d;
diff --git a/shaders-hlsl/vert/read-from-row-major-array.vert b/shaders-hlsl/vert/read-from-row-major-array.vert
new file mode 100644
index 0000000000..792fb8e36c
--- /dev/null
+++ b/shaders-hlsl/vert/read-from-row-major-array.vert
@@ -0,0 +1,20 @@
+#version 310 es
+layout(location = 0) in highp vec4 a_position;
+layout(location = 0) out mediump float v_vtxResult;
+
+layout(set = 0, binding = 0, std140, row_major) uniform Block
+{
+	highp mat2x3 var[3][4];
+};
+
+mediump float compare_float    (highp float a, highp float b)  { return abs(a - b) < 0.05 ? 1.0 : 0.0; }
+mediump float compare_vec3     (highp vec3 a, highp vec3 b)    { return compare_float(a.x, b.x)*compare_float(a.y, b.y)*compare_float(a.z, b.z); }
+mediump float compare_mat2x3   (highp mat2x3 a, highp mat2x3 b){ return compare_vec3(a[0], b[0])*compare_vec3(a[1], b[1]); }
+
+void main (void)
+{
+	gl_Position = a_position;
+	mediump float result = 1.0;
+	result *= compare_mat2x3(var[0][0], mat2x3(2.0, 6.0, -6.0, 0.0, 5.0, 5.0));
+	v_vtxResult = result;
+}
diff --git a/shaders-msl/asm/frag/inliner-dominator-inside-loop.asm.frag b/shaders-msl-no-opt/asm/frag/inliner-dominator-inside-loop.asm.frag
similarity index 100%
rename from shaders-msl/asm/frag/inliner-dominator-inside-loop.asm.frag
rename to shaders-msl-no-opt/asm/frag/inliner-dominator-inside-loop.asm.frag
diff --git a/shaders-msl/asm/vert/empty-struct-composite.asm.vert b/shaders-msl-no-opt/asm/vert/empty-struct-composite.asm.vert
similarity index 100%
rename from shaders-msl/asm/vert/empty-struct-composite.asm.vert
rename to shaders-msl-no-opt/asm/vert/empty-struct-composite.asm.vert
diff --git a/shaders-msl/comp/bitfield.noopt.comp b/shaders-msl-no-opt/comp/bitfield.comp
similarity index 100%
rename from shaders-msl/comp/bitfield.noopt.comp
rename to shaders-msl-no-opt/comp/bitfield.comp
diff --git a/shaders-msl/comp/loop.noopt.comp b/shaders-msl-no-opt/comp/loop.comp
similarity index 100%
rename from shaders-msl/comp/loop.noopt.comp
rename to shaders-msl-no-opt/comp/loop.comp
diff --git a/shaders-msl/comp/return.comp b/shaders-msl-no-opt/comp/return.comp
similarity index 100%
rename from shaders-msl/comp/return.comp
rename to shaders-msl-no-opt/comp/return.comp
diff --git a/shaders-msl/frag/in_block_assign.noopt.frag b/shaders-msl-no-opt/frag/in_block_assign.frag
similarity index 100%
rename from shaders-msl/frag/in_block_assign.noopt.frag
rename to shaders-msl-no-opt/frag/in_block_assign.frag
diff --git a/shaders-msl/asm/comp/storage-buffer-basic.asm.comp b/shaders-msl/asm/comp/storage-buffer-basic.invalid.asm.comp
similarity index 100%
rename from shaders-msl/asm/comp/storage-buffer-basic.asm.comp
rename to shaders-msl/asm/comp/storage-buffer-basic.invalid.asm.comp
diff --git a/shaders-msl/asm/frag/combined-sampler-reuse.asm.frag b/shaders-msl/asm/frag/combined-sampler-reuse.asm.frag
new file mode 100644
index 0000000000..ba2f95b234
--- /dev/null
+++ b/shaders-msl/asm/frag/combined-sampler-reuse.asm.frag
@@ -0,0 +1,57 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 6
+; Bound: 36
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor %vUV
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %FragColor "FragColor"
+               OpName %uTex "uTex"
+               OpName %uSampler "uSampler"
+               OpName %vUV "vUV"
+               OpDecorate %FragColor Location 0
+               OpDecorate %uTex DescriptorSet 0
+               OpDecorate %uTex Binding 1
+               OpDecorate %uSampler DescriptorSet 0
+               OpDecorate %uSampler Binding 0
+               OpDecorate %vUV Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+         %10 = OpTypeImage %float 2D 0 0 0 1 Unknown
+%_ptr_UniformConstant_10 = OpTypePointer UniformConstant %10
+       %uTex = OpVariable %_ptr_UniformConstant_10 UniformConstant
+         %14 = OpTypeSampler
+%_ptr_UniformConstant_14 = OpTypePointer UniformConstant %14
+   %uSampler = OpVariable %_ptr_UniformConstant_14 UniformConstant
+         %18 = OpTypeSampledImage %10
+    %v2float = OpTypeVector %float 2
+%_ptr_Input_v2float = OpTypePointer Input %v2float
+        %vUV = OpVariable %_ptr_Input_v2float Input
+        %int = OpTypeInt 32 1
+      %v2int = OpTypeVector %int 2
+      %int_1 = OpConstant %int 1
+         %32 = OpConstantComposite %v2int %int_1 %int_1
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %13 = OpLoad %10 %uTex
+         %17 = OpLoad %14 %uSampler
+         %19 = OpSampledImage %18 %13 %17
+         %23 = OpLoad %v2float %vUV
+         %24 = OpImageSampleImplicitLod %v4float %19 %23
+               OpStore %FragColor %24
+         %28 = OpLoad %v2float %vUV
+         %33 = OpImageSampleImplicitLod %v4float %19 %28 ConstOffset %32
+         %34 = OpLoad %v4float %FragColor
+         %35 = OpFAdd %v4float %34 %33
+               OpStore %FragColor %35
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl/asm/frag/empty-struct.asm.frag b/shaders-msl/asm/frag/empty-struct.asm.frag
new file mode 100644
index 0000000000..701f9f2a1e
--- /dev/null
+++ b/shaders-msl/asm/frag/empty-struct.asm.frag
@@ -0,0 +1,56 @@
+; SPIR-V
+; Version: 1.2
+; Generator: Khronos; 0
+; Bound: 43
+; Schema: 0
+               OpCapability Linkage
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %EntryPoint_Main "main"
+               OpExecutionMode %EntryPoint_Main OriginUpperLeft
+               OpSource Unknown 100
+               OpName %EmptyStructTest "EmptyStructTest"
+               OpName %GetValue "GetValue"
+               OpName %GetValue2 "GetValue"
+               OpName %self "self"
+               OpName %self2 "self"
+               OpName %emptyStruct "emptyStruct"
+               OpName %value "value"
+               OpName %EntryPoint_Main "EntryPoint_Main"
+
+%EmptyStructTest = OpTypeStruct
+%_ptr_Function_EmptyStructTest = OpTypePointer Function %EmptyStructTest
+      %float = OpTypeFloat 32
+%_ptr_Function_float = OpTypePointer Function %float
+          %5 = OpTypeFunction %float %_ptr_Function_EmptyStructTest
+          %6 = OpTypeFunction %float %EmptyStructTest
+       %void = OpTypeVoid
+%_ptr_Function_void = OpTypePointer Function %void
+          %8 = OpTypeFunction %void %_ptr_Function_EmptyStructTest
+          %9 = OpTypeFunction %void
+    %float_0 = OpConstant %float 0
+
+   %GetValue = OpFunction %float None %5
+       %self = OpFunctionParameter %_ptr_Function_EmptyStructTest
+         %13 = OpLabel
+               OpReturnValue %float_0
+               OpFunctionEnd
+
+   %GetValue2 = OpFunction %float None %6
+       %self2 = OpFunctionParameter %EmptyStructTest
+         %14 = OpLabel
+               OpReturnValue %float_0
+               OpFunctionEnd
+
+%EntryPoint_Main = OpFunction %void None %9
+         %37 = OpLabel
+     %emptyStruct = OpVariable %_ptr_Function_EmptyStructTest Function
+         %18 = OpVariable %_ptr_Function_EmptyStructTest Function
+      %value = OpVariable %_ptr_Function_float Function
+	  %value2 = OpCompositeConstruct %EmptyStructTest
+         %22 = OpFunctionCall %float %GetValue %emptyStruct
+         %23 = OpFunctionCall %float %GetValue2 %value2
+               OpStore %value %22
+               OpStore %value %23
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl/asm/frag/image-extract-reuse.asm.frag b/shaders-msl/asm/frag/image-extract-reuse.asm.frag
new file mode 100644
index 0000000000..63c8ab57a5
--- /dev/null
+++ b/shaders-msl/asm/frag/image-extract-reuse.asm.frag
@@ -0,0 +1,41 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 6
+; Bound: 19
+; Schema: 0
+               OpCapability Shader
+               OpCapability ImageQuery
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %Size
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %Size "Size"
+               OpName %uTexture "uTexture"
+               OpDecorate %Size Location 0
+               OpDecorate %uTexture DescriptorSet 0
+               OpDecorate %uTexture Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+        %int = OpTypeInt 32 1
+      %v2int = OpTypeVector %int 2
+%_ptr_Output_v2int = OpTypePointer Output %v2int
+       %Size = OpVariable %_ptr_Output_v2int Output
+      %float = OpTypeFloat 32
+         %11 = OpTypeImage %float 2D 0 0 0 1 Unknown
+         %12 = OpTypeSampledImage %11
+%_ptr_UniformConstant_12 = OpTypePointer UniformConstant %12
+   %uTexture = OpVariable %_ptr_UniformConstant_12 UniformConstant
+      %int_0 = OpConstant %int 0
+      %int_1 = OpConstant %int 1
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %15 = OpLoad %12 %uTexture
+         %17 = OpImage %11 %15
+         %18 = OpImageQuerySizeLod %v2int %17 %int_0
+         %19 = OpImageQuerySizeLod %v2int %17 %int_1
+		 %20 = OpIAdd %v2int %18 %19
+               OpStore %Size %20
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl/asm/frag/implicit-read-dep-phi.asm.frag b/shaders-msl/asm/frag/implicit-read-dep-phi.asm.frag
new file mode 100644
index 0000000000..ccdfeef58d
--- /dev/null
+++ b/shaders-msl/asm/frag/implicit-read-dep-phi.asm.frag
@@ -0,0 +1,81 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 3
+; Bound: 60
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %v0 %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %phi "phi"
+               OpName %i "i"
+               OpName %v0 "v0"
+               OpName %FragColor "FragColor"
+               OpName %uImage "uImage"
+               OpDecorate %v0 Location 0
+               OpDecorate %FragColor Location 0
+               OpDecorate %uImage DescriptorSet 0
+               OpDecorate %uImage Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+%_ptr_Function_float = OpTypePointer Function %float
+    %float_1 = OpConstant %float 1
+        %int = OpTypeInt 32 1
+%_ptr_Function_int = OpTypePointer Function %int
+      %int_0 = OpConstant %int 0
+      %int_4 = OpConstant %int 4
+       %bool = OpTypeBool
+    %v4float = OpTypeVector %float 4
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+         %v0 = OpVariable %_ptr_Input_v4float Input
+%_ptr_Input_float = OpTypePointer Input %float
+    %float_0 = OpConstant %float 0
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+         %36 = OpTypeImage %float 2D 0 0 0 1 Unknown
+         %37 = OpTypeSampledImage %36
+%_ptr_UniformConstant_37 = OpTypePointer UniformConstant %37
+     %uImage = OpVariable %_ptr_UniformConstant_37 UniformConstant
+    %v2float = OpTypeVector %float 2
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+    %float_2 = OpConstant %float 2
+      %int_1 = OpConstant %int 1
+	  %float_1_vec = OpConstantComposite %v4float %float_1 %float_2 %float_1 %float_2
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+          %i = OpVariable %_ptr_Function_int Function
+               OpStore %i %int_0
+               OpBranch %loop_header
+         %loop_header = OpLabel
+        %phi = OpPhi %float %float_1 %5 %phi_plus_2 %continue_block
+		%tex_phi = OpPhi %v4float %float_1_vec %5 %texture_load_result %continue_block
+               OpLoopMerge %merge_block %continue_block None
+               OpBranch %loop_body
+         %loop_body = OpLabel
+               OpStore %FragColor %tex_phi
+         %19 = OpLoad %int %i
+         %22 = OpSLessThan %bool %19 %int_4
+               OpBranchConditional %22 %15 %merge_block
+         %15 = OpLabel
+         %26 = OpLoad %int %i
+         %28 = OpAccessChain %_ptr_Input_float %v0 %26
+         %29 = OpLoad %float %28
+         %31 = OpFOrdGreaterThan %bool %29 %float_0
+               OpBranchConditional %31 %continue_block %merge_block
+         %continue_block = OpLabel
+         %40 = OpLoad %37 %uImage
+         %43 = OpCompositeConstruct %v2float %phi %phi
+         %texture_load_result = OpImageSampleExplicitLod %v4float %40 %43 Lod %float_0
+         %phi_plus_2 = OpFAdd %float %phi %float_2
+         %54 = OpLoad %int %i
+         %56 = OpIAdd %int %54 %int_1
+               OpStore %i %56
+               OpBranch %loop_header
+         %merge_block = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl/asm/frag/srem.asm.frag b/shaders-msl/asm/frag/srem.asm.frag
new file mode 100644
index 0000000000..c6f8e27cbd
--- /dev/null
+++ b/shaders-msl/asm/frag/srem.asm.frag
@@ -0,0 +1,43 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 2
+; Bound: 23
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor %vA %vB
+               OpExecutionMode %main OriginUpperLeft
+               OpSource ESSL 310
+               OpName %main "main"
+               OpName %FragColor "FragColor"
+               OpName %vA "vA"
+               OpName %vB "vB"
+               OpDecorate %FragColor RelaxedPrecision
+               OpDecorate %FragColor Location 0
+               OpDecorate %vA Flat
+               OpDecorate %vA Location 0
+               OpDecorate %vB Flat
+               OpDecorate %vB Location 1
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+        %int = OpTypeInt 32 1
+      %v4int = OpTypeVector %int 4
+%_ptr_Input_v4int = OpTypePointer Input %v4int
+         %vA = OpVariable %_ptr_Input_v4int Input
+         %vB = OpVariable %_ptr_Input_v4int Input
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %14 = OpLoad %v4int %vA
+         %16 = OpLoad %v4int %vB
+         %17 = OpLoad %v4int %vA
+         %18 = OpLoad %v4int %vB
+         %19 = OpSRem %v4int %17 %18
+         %20 = OpConvertSToF %v4float %19
+               OpStore %FragColor %20
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl/asm/frag/texel-fetch-no-lod.asm.frag b/shaders-msl/asm/frag/texel-fetch-no-lod.asm.frag
new file mode 100644
index 0000000000..53dc63809c
--- /dev/null
+++ b/shaders-msl/asm/frag/texel-fetch-no-lod.asm.frag
@@ -0,0 +1,46 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 6
+; Bound: 26
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor %gl_FragCoord
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %FragColor "FragColor"
+               OpName %uTexture "uTexture"
+               OpName %gl_FragCoord "gl_FragCoord"
+               OpDecorate %FragColor Location 0
+               OpDecorate %uTexture DescriptorSet 0
+               OpDecorate %uTexture Binding 0
+               OpDecorate %gl_FragCoord BuiltIn FragCoord
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+         %10 = OpTypeImage %float 2D 0 0 0 1 Unknown
+         %11 = OpTypeSampledImage %10
+%_ptr_UniformConstant_11 = OpTypePointer UniformConstant %11
+   %uTexture = OpVariable %_ptr_UniformConstant_11 UniformConstant
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+    %v2float = OpTypeVector %float 2
+        %int = OpTypeInt 32 1
+      %v2int = OpTypeVector %int 2
+      %int_0 = OpConstant %int 0
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %14 = OpLoad %11 %uTexture
+         %18 = OpLoad %v4float %gl_FragCoord
+         %19 = OpVectorShuffle %v2float %18 %18 0 1
+         %22 = OpConvertFToS %v2int %19
+         %24 = OpImage %10 %14
+         %25 = OpImageFetch %v4float %24 %22
+               OpStore %FragColor %25
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl/asm/frag/vector-shuffle-oom.asm.frag b/shaders-msl/asm/frag/vector-shuffle-oom.asm.frag
index d60c6f52d4..92652161cb 100644
--- a/shaders-msl/asm/frag/vector-shuffle-oom.asm.frag
+++ b/shaders-msl/asm/frag/vector-shuffle-oom.asm.frag
@@ -15,9 +15,9 @@
                OpDecorate %22044 DescriptorSet 0
                OpDecorate %22044 Binding 0
                OpDecorate %5785 DescriptorSet 0
-               OpDecorate %5785 Binding 140
+               OpDecorate %5785 Binding 14
                OpDecorate %5688 DescriptorSet 0
-               OpDecorate %5688 Binding 60
+               OpDecorate %5688 Binding 6
                OpMemberDecorate %_struct_994 0 Offset 0
                OpMemberDecorate %_struct_994 1 Offset 16
                OpMemberDecorate %_struct_994 2 Offset 28
@@ -49,13 +49,13 @@
                OpDecorate %12348 DescriptorSet 0
                OpDecorate %12348 Binding 2
                OpDecorate %3312 DescriptorSet 0
-               OpDecorate %3312 Binding 142
+               OpDecorate %3312 Binding 13
                OpDecorate %4646 DescriptorSet 0
-               OpDecorate %4646 Binding 62
+               OpDecorate %4646 Binding 5
                OpDecorate %4862 DescriptorSet 0
-               OpDecorate %4862 Binding 141
+               OpDecorate %4862 Binding 4
                OpDecorate %3594 DescriptorSet 0
-               OpDecorate %3594 Binding 61
+               OpDecorate %3594 Binding 3
                OpDecorate %_arr_mat4v4float_uint_2 ArrayStride 64
                OpDecorate %_arr_v4float_uint_2 ArrayStride 16
                OpMemberDecorate %_struct_408 0 RowMajor
diff --git a/shaders-msl/asm/vert/packing-test.asm.vert b/shaders-msl/asm/vert/packing-test.asm.vert
new file mode 100644
index 0000000000..8acdebc7d3
--- /dev/null
+++ b/shaders-msl/asm/vert/packing-test.asm.vert
@@ -0,0 +1,43 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 3
+; Bound: 18
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Vertex %main "main"
+               OpSource HLSL 500
+               OpName %main "main"
+               OpName %TestStruct "TestStruct"
+               OpMemberName %TestStruct 0 "transforms"
+               OpName %CB0 "CB0"
+               OpMemberName %CB0 0 "CB0"
+               OpName %_ ""
+               OpDecorate %_arr_mat4v4float_uint_6 ArrayStride 64
+               OpMemberDecorate %TestStruct 0 RowMajor
+               OpMemberDecorate %TestStruct 0 Offset 0
+               OpMemberDecorate %TestStruct 0 MatrixStride 16
+               OpDecorate %_arr_TestStruct_uint_16 ArrayStride 384
+               OpMemberDecorate %CB0 0 Offset 0
+               OpDecorate %CB0 Block
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+       %uint = OpTypeInt 32 0
+     %uint_6 = OpConstant %uint 6
+%_arr_mat4v4float_uint_6 = OpTypeArray %mat4v4float %uint_6
+ %TestStruct = OpTypeStruct %_arr_mat4v4float_uint_6
+    %uint_16 = OpConstant %uint 16
+%_arr_TestStruct_uint_16 = OpTypeArray %TestStruct %uint_16
+        %CB0 = OpTypeStruct %_arr_TestStruct_uint_16
+%_ptr_Uniform_CB0 = OpTypePointer Uniform %CB0
+          %_ = OpVariable %_ptr_Uniform_CB0 Uniform
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl/asm/vert/spec-constant-op-composite.asm.vert b/shaders-msl/asm/vert/spec-constant-op-composite.asm.vert
new file mode 100644
index 0000000000..b566a3d1a0
--- /dev/null
+++ b/shaders-msl/asm/vert/spec-constant-op-composite.asm.vert
@@ -0,0 +1,98 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 1
+; Bound: 58
+; Schema: 0
+               OpCapability Shader
+               OpCapability ClipDistance
+               OpCapability CullDistance
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Vertex %4 "main" %52 %output
+               OpSource GLSL 450
+               OpName %4 "main"
+               OpName %9 "pos"
+               OpName %50 "gl_PerVertex"
+               OpMemberName %50 0 "gl_Position"
+               OpMemberName %50 1 "gl_PointSize"
+               OpMemberName %50 2 "gl_ClipDistance"
+               OpMemberName %50 3 "gl_CullDistance"
+               OpName %52 ""
+               OpDecorate %13 SpecId 201
+               OpDecorate %24 SpecId 202
+               OpMemberDecorate %50 0 BuiltIn Position
+               OpMemberDecorate %50 1 BuiltIn PointSize
+               OpMemberDecorate %50 2 BuiltIn ClipDistance
+               OpMemberDecorate %50 3 BuiltIn CullDistance
+               OpDecorate %50 Block
+               OpDecorate %57 SpecId 200
+			   OpDecorate %output Flat
+			   OpDecorate %output Location 0
+          %2 = OpTypeVoid
+          %3 = OpTypeFunction %2
+          %6 = OpTypeFloat 32
+          %7 = OpTypeVector %6 4
+          %8 = OpTypePointer Function %7
+         %10 = OpConstant %6 0
+         %11 = OpConstantComposite %7 %10 %10 %10 %10
+         %12 = OpTypeInt 32 1
+		 %int_ptr = OpTypePointer Output %12
+         %13 = OpSpecConstant %12 -10
+         %14 = OpConstant %12 2
+         %15 = OpSpecConstantOp %12 IAdd %13 %14
+         %17 = OpTypeInt 32 0
+         %18 = OpConstant %17 1
+         %19 = OpTypePointer Function %6
+         %24 = OpSpecConstant %17 100
+         %25 = OpConstant %17 5
+         %26 = OpSpecConstantOp %17 UMod %24 %25
+         %28 = OpConstant %17 2
+         %33 = OpConstant %12 20
+         %34 = OpConstant %12 30
+         %35 = OpTypeVector %12 4
+         %36 = OpSpecConstantComposite %35 %33 %34 %15 %15
+         %40 = OpTypeVector %12 2
+         %41 = OpSpecConstantOp %40 VectorShuffle %36 %36 1 0
+		 %foo = OpSpecConstantOp %12 CompositeExtract %36 1
+         %42 = OpTypeVector %6 2
+         %49 = OpTypeArray %6 %18
+         %50 = OpTypeStruct %7 %6 %49 %49
+         %51 = OpTypePointer Output %50
+         %52 = OpVariable %51 Output
+		 %output = OpVariable %int_ptr Output
+         %53 = OpConstant %12 0
+         %55 = OpTypePointer Output %7
+         %57 = OpSpecConstant %6 3.14159
+          %4 = OpFunction %2 None %3
+          %5 = OpLabel
+          %9 = OpVariable %8 Function
+               OpStore %9 %11
+         %16 = OpConvertSToF %6 %15
+         %20 = OpAccessChain %19 %9 %18
+         %21 = OpLoad %6 %20
+         %22 = OpFAdd %6 %21 %16
+         %23 = OpAccessChain %19 %9 %18
+               OpStore %23 %22
+         %27 = OpConvertUToF %6 %26
+         %29 = OpAccessChain %19 %9 %28
+         %30 = OpLoad %6 %29
+         %31 = OpFAdd %6 %30 %27
+         %32 = OpAccessChain %19 %9 %28
+               OpStore %32 %31
+         %37 = OpConvertSToF %7 %36
+         %38 = OpLoad %7 %9
+         %39 = OpFAdd %7 %38 %37
+               OpStore %9 %39
+         %43 = OpConvertSToF %42 %41
+         %44 = OpLoad %7 %9
+         %45 = OpVectorShuffle %42 %44 %44 0 1
+         %46 = OpFAdd %42 %45 %43
+         %47 = OpLoad %7 %9
+         %48 = OpVectorShuffle %7 %47 %46 4 5 2 3
+               OpStore %9 %48
+         %54 = OpLoad %7 %9
+         %56 = OpAccessChain %55 %52 %53
+               OpStore %56 %54
+			   OpStore %output %foo
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl/comp/access-private-workgroup-in-function.comp b/shaders-msl/comp/access-private-workgroup-in-function.comp
new file mode 100644
index 0000000000..7cb1e6f131
--- /dev/null
+++ b/shaders-msl/comp/access-private-workgroup-in-function.comp
@@ -0,0 +1,31 @@
+#version 450
+layout(local_size_x = 1) in;
+
+int f;
+shared int u;
+
+void set_f()
+{
+	f = 40;
+}
+
+void set_shared_u()
+{
+	u = 50;
+}
+
+void main()
+{
+	set_f();
+	set_shared_u();
+	if (gl_LocalInvocationIndex == 0u)
+	{
+		f = 10;
+	}
+	else
+	{
+		f = 30;
+		u = 20;
+	}
+}
+
diff --git a/shaders-msl/comp/atomic.comp b/shaders-msl/comp/atomic.comp
index 417284d5de..e25c4f6d24 100644
--- a/shaders-msl/comp/atomic.comp
+++ b/shaders-msl/comp/atomic.comp
@@ -10,6 +10,9 @@ layout(binding = 2, std430) buffer SSBO
     int  i32;
 } ssbo;
 
+shared uint shared_u32;
+shared int shared_i32;
+
 void main()
 {
     atomicAdd(ssbo.u32, 1u);
@@ -29,5 +32,25 @@ void main()
     atomicMax(ssbo.i32, 1);
     atomicExchange(ssbo.i32, 1);
     atomicCompSwap(ssbo.i32, 10, 2);
+
+	shared_u32 = 10u;
+	shared_i32 = 10;
+    atomicAdd(shared_u32, 1u);
+    atomicOr(shared_u32, 1u);
+    atomicXor(shared_u32, 1u);
+    atomicAnd(shared_u32, 1u);
+    atomicMin(shared_u32, 1u);
+    atomicMax(shared_u32, 1u);
+    atomicExchange(shared_u32, 1u);
+    atomicCompSwap(shared_u32, 10u, 2u);
+
+    atomicAdd(shared_i32, 1);
+    atomicOr(shared_i32, 1);
+    atomicXor(shared_i32, 1);
+    atomicAnd(shared_i32, 1);
+    atomicMin(shared_i32, 1);
+    atomicMax(shared_i32, 1);
+    atomicExchange(shared_i32, 1);
+    atomicCompSwap(shared_i32, 10, 2);
 }
 
diff --git a/shaders-msl/comp/composite-construct.comp b/shaders-msl/comp/composite-construct.comp
new file mode 100644
index 0000000000..3054775325
--- /dev/null
+++ b/shaders-msl/comp/composite-construct.comp
@@ -0,0 +1,31 @@
+#version 310 es
+layout(local_size_x = 1) in;
+
+layout(std430, binding = 0) buffer SSBO0
+{
+   vec4 as[];
+};
+
+layout(std430, binding = 1) buffer SSBO1
+{
+   vec4 bs[];
+};
+
+struct Composite
+{
+   vec4 a;
+   vec4 b;
+};
+
+const vec4 const_values[2] = vec4[](vec4(20.0), vec4(40.0));
+
+void main()
+{
+   vec4 values[2] = vec4[](as[gl_GlobalInvocationID.x], bs[gl_GlobalInvocationID.x]);
+   vec4 copy_values[2];
+   copy_values = const_values;
+   Composite c = Composite(values[0], copy_values[1]);
+
+   as[0] = values[gl_LocalInvocationIndex];
+   bs[1] = c.b;
+}
diff --git a/shaders-msl/comp/image-cube-array-load-store.comp b/shaders-msl/comp/image-cube-array-load-store.comp
new file mode 100644
index 0000000000..36a9ffd8ef
--- /dev/null
+++ b/shaders-msl/comp/image-cube-array-load-store.comp
@@ -0,0 +1,13 @@
+#version 450
+layout(local_size_x = 1) in;
+
+layout(r32f, binding = 0) uniform readonly imageCubeArray uImageIn;
+layout(r32f, binding = 1) uniform writeonly imageCubeArray uImageOut;
+
+void main()
+{
+	ivec3 coord = ivec3(9, 7, 11);
+	vec4 indata = imageLoad(uImageIn, coord);
+	imageStore(uImageOut, coord, indata);
+}
+
diff --git a/shaders-msl/comp/packing-test-1.comp b/shaders-msl/comp/packing-test-1.comp
new file mode 100644
index 0000000000..1a8a39e219
--- /dev/null
+++ b/shaders-msl/comp/packing-test-1.comp
@@ -0,0 +1,18 @@
+#version 450
+struct T1
+{
+    vec3 a;
+    float b;
+};
+
+layout(std430, binding = 1) buffer Buffer0 { T1 buf0[]; };
+layout(std430, binding = 2) buffer Buffer1 { float buf1[]; };
+
+layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;
+void main()
+{
+    // broken case in Metal!
+    T1 v = buf0[0];
+    float x = v.b;
+    buf1[gl_GlobalInvocationID.x] = x;
+}
diff --git a/shaders-msl/comp/packing-test-2.comp b/shaders-msl/comp/packing-test-2.comp
new file mode 100644
index 0000000000..73268beec4
--- /dev/null
+++ b/shaders-msl/comp/packing-test-2.comp
@@ -0,0 +1,16 @@
+#version 450
+struct T1
+{
+    vec3 a;
+    float b;
+};
+
+layout(std430, binding = 1) buffer Buffer0 { T1 buf0[]; };
+layout(std430, binding = 2) buffer Buffer1 { float buf1[]; };
+
+layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;
+void main()
+{
+    float x = buf0[0].b;
+    buf1[gl_GlobalInvocationID.x] = x;
+}
diff --git a/shaders-msl/comp/rmw-matrix.comp b/shaders-msl/comp/rmw-matrix.comp
new file mode 100644
index 0000000000..c158ab4ddd
--- /dev/null
+++ b/shaders-msl/comp/rmw-matrix.comp
@@ -0,0 +1,20 @@
+#version 310 es
+layout(local_size_x = 1) in;
+
+layout(std430, binding = 0) buffer SSBO
+{
+	float a;
+	vec4 b;
+	mat4 c;
+
+	float a1;
+	vec4 b1;
+	mat4 c1;
+};
+
+void main()
+{
+	a *= a1;
+	b *= b1;
+	c *= c1;
+}
diff --git a/shaders-msl/comp/udiv.comp b/shaders-msl/comp/udiv.comp
index 33fe564f07..d4e1133bc1 100644
--- a/shaders-msl/comp/udiv.comp
+++ b/shaders-msl/comp/udiv.comp
@@ -6,7 +6,7 @@ layout(std430, binding = 0) buffer SSBO
     uint inputs[];
 };
 
-layout(std430, binding = 0) buffer SSBO2
+layout(std430, binding = 1) buffer SSBO2
 {
     uint outputs[];
 };
diff --git a/shaders-msl/flatten/types.flatten.frag b/shaders-msl/flatten/types.flatten.frag
index faab5b7e05..c1231445fc 100644
--- a/shaders-msl/flatten/types.flatten.frag
+++ b/shaders-msl/flatten/types.flatten.frag
@@ -7,13 +7,13 @@ layout(std140, binding = 0) uniform UBO0
    vec4 b;
 };
 
-layout(std140, binding = 0) uniform UBO1
+layout(std140, binding = 1) uniform UBO1
 {
    ivec4 c;
    ivec4 d;
 };
 
-layout(std140, binding = 0) uniform UBO2
+layout(std140, binding = 2) uniform UBO2
 {
    uvec4 e;
    uvec4 f;
diff --git a/shaders-msl/frag/array-lut-no-loop-variable.frag b/shaders-msl/frag/array-lut-no-loop-variable.frag
new file mode 100644
index 0000000000..3493e0cccc
--- /dev/null
+++ b/shaders-msl/frag/array-lut-no-loop-variable.frag
@@ -0,0 +1,13 @@
+#version 310 es
+precision mediump float;
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) in vec4 v0;
+
+void main()
+{
+	float lut[5] = float[](1.0, 2.0, 3.0, 4.0, 5.0);
+	for (int i = 0; i < 4; i++, FragColor += lut[i])
+	{
+	}
+}
diff --git a/shaders-msl/frag/binary-func-unpack-pack-arguments.frag b/shaders-msl/frag/binary-func-unpack-pack-arguments.frag
new file mode 100644
index 0000000000..c0e5dabd2b
--- /dev/null
+++ b/shaders-msl/frag/binary-func-unpack-pack-arguments.frag
@@ -0,0 +1,15 @@
+#version 450
+layout(location = 0) out float FragColor;
+
+layout(binding = 0, std140) uniform UBO
+{
+	vec3 color;
+	float v;
+};
+
+layout(location = 0) in vec3 vIn;
+
+void main()
+{
+	FragColor = dot(vIn, color);
+}
diff --git a/shaders-msl/frag/binary-unpack-pack-arguments.frag b/shaders-msl/frag/binary-unpack-pack-arguments.frag
new file mode 100644
index 0000000000..be30f84df7
--- /dev/null
+++ b/shaders-msl/frag/binary-unpack-pack-arguments.frag
@@ -0,0 +1,15 @@
+#version 450
+layout(location = 0) out vec3 FragColor;
+
+layout(binding = 0, std140) uniform UBO
+{
+	vec3 color;
+	float v;
+};
+
+layout(location = 0) in vec3 vIn;
+
+void main()
+{
+	FragColor = cross(vIn, color - vIn);
+}
diff --git a/shaders-msl/frag/control-dependent-in-branch.desktop.frag b/shaders-msl/frag/control-dependent-in-branch.desktop.frag
new file mode 100644
index 0000000000..1f21bef8c8
--- /dev/null
+++ b/shaders-msl/frag/control-dependent-in-branch.desktop.frag
@@ -0,0 +1,34 @@
+#version 450
+
+layout(location = 0) out vec4 FragColor;
+layout(binding = 0) uniform sampler2D uSampler;
+layout(location = 0) in vec4 vInput;
+
+void main()
+{
+	FragColor = vInput;
+	vec4 t = texture(uSampler, vInput.xy);
+	vec4 d0 = dFdx(vInput);
+	vec4 d1 = dFdy(vInput);
+	vec4 d2 = fwidth(vInput);
+	vec4 d3 = dFdxCoarse(vInput);
+	vec4 d4 = dFdyCoarse(vInput);
+	vec4 d5 = fwidthCoarse(vInput);
+	vec4 d6 = dFdxFine(vInput);
+	vec4 d7 = dFdyFine(vInput);
+	vec4 d8 = fwidthFine(vInput);
+	if (vInput.y > 10.0)
+	{
+		FragColor += t;
+		FragColor += d0;
+		FragColor += d1;
+		FragColor += d2;
+		FragColor += d3;
+		FragColor += d4;
+		FragColor += d5;
+		FragColor += d6;
+		FragColor += d7;
+		FragColor += d8;
+	}
+}
+
diff --git a/shaders-msl/frag/dual-source-blending.frag b/shaders-msl/frag/dual-source-blending.frag
new file mode 100644
index 0000000000..f322cf4c38
--- /dev/null
+++ b/shaders-msl/frag/dual-source-blending.frag
@@ -0,0 +1,10 @@
+#version 450
+
+layout(location = 0, index = 0) out vec4 FragColor0;
+layout(location = 0, index = 1) out vec4 FragColor1;
+
+void main()
+{
+	FragColor0 = vec4(1.0);
+	FragColor1 = vec4(2.0);
+}
diff --git a/shaders-msl/frag/fp16-packing.frag b/shaders-msl/frag/fp16-packing.frag
new file mode 100644
index 0000000000..98ca24e2f8
--- /dev/null
+++ b/shaders-msl/frag/fp16-packing.frag
@@ -0,0 +1,12 @@
+#version 450
+
+layout(location = 0) flat in uint FP16;
+layout(location = 1) flat in vec2 FP32;
+layout(location = 0) out vec2 FP32Out;
+layout(location = 1) out uint FP16Out;
+
+void main()
+{
+	FP32Out = unpackHalf2x16(FP16);
+	FP16Out = packHalf2x16(FP32);
+}
diff --git a/shaders-msl/frag/fp16.desktop.frag b/shaders-msl/frag/fp16.desktop.frag
new file mode 100644
index 0000000000..1e4026eb2b
--- /dev/null
+++ b/shaders-msl/frag/fp16.desktop.frag
@@ -0,0 +1,151 @@
+#version 450
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(location = 0) in float16_t v1;
+layout(location = 1) in f16vec2 v2;
+layout(location = 2) in f16vec3 v3;
+layout(location = 3) in f16vec4 v4;
+
+layout(location = 0) out float o1;
+layout(location = 1) out vec2 o2;
+layout(location = 2) out vec3 o3;
+layout(location = 3) out vec4 o4;
+
+f16mat2 test_mat2(f16vec2 a, f16vec2 b, f16vec2 c, f16vec2 d)
+{
+	return f16mat2(a, b) * f16mat2(c, d);
+}
+
+f16mat3 test_mat3(f16vec3 a, f16vec3 b, f16vec3 c, f16vec3 d, f16vec3 e, f16vec3 f)
+{
+	return f16mat3(a, b, c) * f16mat3(d, e, f);
+}
+
+void test_constants()
+{
+	float16_t a = 1.0hf;
+	float16_t b = 1.5hf;
+	float16_t c = -1.5hf; // Negatives
+	float16_t d = (0.0hf / 0.0hf); // NaN
+	float16_t e = (1.0hf / 0.0hf); // +Inf
+	float16_t f = (-1.0hf / 0.0hf); // -Inf
+	float16_t g = 1014.0hf; // Large.
+	float16_t h = 0.000001hf; // Denormal
+}
+
+float16_t test_result()
+{
+	return 1.0hf;
+}
+
+void test_conversions()
+{
+	float16_t one = test_result();
+	int a = int(one);
+	uint b = uint(one);
+	bool c = bool(one);
+	float d = float(one);
+	//double e = double(one);
+	float16_t a2 = float16_t(a);
+	float16_t b2 = float16_t(b);
+	float16_t c2 = float16_t(c);
+	float16_t d2 = float16_t(d);
+	//float16_t e2 = float16_t(e);
+}
+
+void test_builtins()
+{
+	f16vec4 res;
+	res = radians(v4);
+	res = degrees(v4);
+	res = sin(v4);
+	res = cos(v4);
+	res = tan(v4);
+	res = asin(v4);
+	res = atan(v4, v3.xyzz);
+	res = atan(v4);
+	res = sinh(v4);
+	res = cosh(v4);
+	res = tanh(v4);
+	res = asinh(v4);
+	res = acosh(v4);
+	res = atanh(v4);
+	res = pow(v4, v4);
+	res = exp(v4);
+	res = log(v4);
+	res = exp2(v4);
+	res = log2(v4);
+	res = sqrt(v4);
+	res = inversesqrt(v4);
+	res = abs(v4);
+	res = sign(v4);
+	res = floor(v4);
+	res = trunc(v4);
+	res = round(v4);
+	res = roundEven(v4);
+	res = ceil(v4);
+	res = fract(v4);
+	res = mod(v4, v4);
+	f16vec4 tmp;
+	res = modf(v4, tmp);
+	res = min(v4, v4);
+	res = max(v4, v4);
+	res = clamp(v4, v4, v4);
+	res = mix(v4, v4, v4);
+	res = mix(v4, v4, lessThan(v4, v4));
+	res = step(v4, v4);
+	res = smoothstep(v4, v4, v4);
+
+	bvec4 btmp = isnan(v4);
+	btmp = isinf(v4);
+	res = fma(v4, v4, v4);
+
+	ivec4 itmp;
+	res = frexp(v4, itmp);
+	res = ldexp(res, itmp);
+
+	uint pack0 = packFloat2x16(v4.xy);
+	uint pack1 = packFloat2x16(v4.zw);
+	res = f16vec4(unpackFloat2x16(pack0), unpackFloat2x16(pack1));
+
+	float16_t t0 = length(v4);
+	t0 = distance(v4, v4);
+	t0 = dot(v4, v4);
+	f16vec3 res3 = cross(v3, v3);
+	res = normalize(v4);
+	res = faceforward(v4, v4, v4);
+	res = reflect(v4, v4);
+	res = refract(v4, v4, v1);
+
+	btmp = lessThan(v4, v4);
+	btmp = lessThanEqual(v4, v4);
+	btmp = greaterThan(v4, v4);
+	btmp = greaterThanEqual(v4, v4);
+	btmp = equal(v4, v4);
+	btmp = notEqual(v4, v4);
+
+	res = dFdx(v4);
+	res = dFdy(v4);
+	res = dFdxFine(v4);
+	res = dFdyFine(v4);
+	res = dFdxCoarse(v4);
+	res = dFdyCoarse(v4);
+	res = fwidth(v4);
+	res = fwidthFine(v4);
+	res = fwidthCoarse(v4);
+
+	//res = interpolateAtCentroid(v4);
+	//res = interpolateAtSample(v4, 0);
+	//res = interpolateAtOffset(v4, f16vec2(0.1hf));
+}
+
+void main()
+{
+	// Basic matrix tests.
+	f16mat2 m0 = test_mat2(v2, v2, v3.xy, v3.xy);
+	f16mat3 m1 = test_mat3(v3, v3, v3, v4.xyz, v4.xyz, v4.yzw);
+
+	test_constants();
+	test_conversions();
+	test_builtins();
+}
diff --git a/shaders-msl/frag/gather-dref.frag b/shaders-msl/frag/gather-dref.frag
new file mode 100644
index 0000000000..a8aac56cb5
--- /dev/null
+++ b/shaders-msl/frag/gather-dref.frag
@@ -0,0 +1,11 @@
+#version 310 es
+precision mediump float;
+
+layout(binding = 0) uniform mediump sampler2DShadow uT;
+layout(location = 0) in vec3 vUV;
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+	FragColor = textureGather(uT, vUV.xy, vUV.z);
+}
diff --git a/shaders-msl/frag/gather-offset.frag b/shaders-msl/frag/gather-offset.frag
new file mode 100644
index 0000000000..409317ab56
--- /dev/null
+++ b/shaders-msl/frag/gather-offset.frag
@@ -0,0 +1,9 @@
+#version 450
+
+layout(binding = 0) uniform sampler2D uT;
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    FragColor = textureGather(uT, vec2(0.5), 3);
+}
diff --git a/shaders-msl/frag/illegal-name-test-0.frag b/shaders-msl/frag/illegal-name-test-0.frag
new file mode 100644
index 0000000000..8e6c11d1d3
--- /dev/null
+++ b/shaders-msl/frag/illegal-name-test-0.frag
@@ -0,0 +1,12 @@
+#version 450
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+	vec4 fragment = vec4(10.0);
+	vec4 compute = vec4(10.0);
+	vec4 kernel = vec4(10.0);
+	vec4 vertex = vec4(10.0);
+	FragColor = fragment + compute + kernel + vertex;
+}
diff --git a/shaders-msl/frag/in_mat.frag b/shaders-msl/frag/in_mat.frag
new file mode 100644
index 0000000000..dd0b5d0350
--- /dev/null
+++ b/shaders-msl/frag/in_mat.frag
@@ -0,0 +1,19 @@
+#version 450
+
+layout(binding = 1) uniform samplerCube samplerColor;
+
+layout(location = 0) in vec3 inPos;
+layout(location = 1) in vec3 inNormal;
+layout(location = 2) in mat4 inInvModelView;
+layout(location = 6) in float inLodBias;
+layout(location = 0) out vec4 outFragColor;
+
+void main()
+{
+	vec3 cI = normalize(inPos);
+	vec3 cR = reflect(cI, normalize(inNormal));
+	cR = vec3((inInvModelView * vec4(cR, 0.0)).xyz);
+	cR.x *= (-1.0);
+	outFragColor = texture(samplerColor, cR, inLodBias);
+}
+
diff --git a/shaders-msl/frag/mrt-array.frag b/shaders-msl/frag/mrt-array.frag
new file mode 100644
index 0000000000..0460c72ab9
--- /dev/null
+++ b/shaders-msl/frag/mrt-array.frag
@@ -0,0 +1,24 @@
+#version 310 es
+precision mediump float;
+
+layout(location = 0) out vec4 FragColor[4];
+layout(location = 0) in vec4 vA;
+layout(location = 1) in vec4 vB;
+
+void write_deeper_in_function()
+{
+	FragColor[3] = vA * vB;
+}
+
+void write_in_function()
+{
+	FragColor[2] = vA - vB;
+	write_deeper_in_function();
+}
+
+void main()
+{
+	FragColor[0] = mod(vA, vB);
+	FragColor[1] = vA + vB;
+	write_in_function();
+}
diff --git a/shaders-msl/frag/packed-expression-vector-shuffle.frag b/shaders-msl/frag/packed-expression-vector-shuffle.frag
new file mode 100644
index 0000000000..9958443813
--- /dev/null
+++ b/shaders-msl/frag/packed-expression-vector-shuffle.frag
@@ -0,0 +1,15 @@
+#version 450
+layout(location = 0) out vec4 FragColor;
+
+layout(binding = 0, std140) uniform UBO
+{
+	vec3 color;
+	float v;
+};
+
+void main()
+{
+	vec4 f = vec4(1.0);
+	f.rgb = color;
+	FragColor = f;
+}
diff --git a/shaders-msl/frag/packing-test-3.frag b/shaders-msl/frag/packing-test-3.frag
new file mode 100644
index 0000000000..56ad6f5f1b
--- /dev/null
+++ b/shaders-msl/frag/packing-test-3.frag
@@ -0,0 +1,36 @@
+#version 450
+		
+struct VertexOutput
+{
+    vec4 HPosition;
+};
+
+struct TestStruct
+{
+    vec3 position;
+    float radius;
+};
+
+layout(binding = 0, std140) uniform CB0
+{
+    TestStruct CB0[16];
+} _24;
+
+layout(location = 0) out vec4 _entryPointOutput;
+
+vec4 _main(VertexOutput IN)
+{
+    TestStruct st;
+    st.position = _24.CB0[1].position;
+    st.radius = _24.CB0[1].radius;
+    vec4 col = vec4(st.position, st.radius);
+    return col;
+}
+
+void main()
+{
+    VertexOutput IN;
+    IN.HPosition = gl_FragCoord;
+    VertexOutput param = IN;
+    _entryPointOutput = _main(param);
+}
diff --git a/shaders-msl/frag/readonly-ssbo.frag b/shaders-msl/frag/readonly-ssbo.frag
new file mode 100644
index 0000000000..9d7cff66fd
--- /dev/null
+++ b/shaders-msl/frag/readonly-ssbo.frag
@@ -0,0 +1,16 @@
+#version 450
+layout(location = 0) out vec4 FragColor;
+layout(binding = 0, std430) readonly buffer SSBO
+{
+	vec4 v;
+};
+
+vec4 read_from_function()
+{
+	return v;
+}
+
+void main()
+{
+	FragColor = v + read_from_function();
+}
diff --git a/shaders-msl/frag/sampler-1d-lod.frag b/shaders-msl/frag/sampler-1d-lod.frag
new file mode 100644
index 0000000000..f4526f39d0
--- /dev/null
+++ b/shaders-msl/frag/sampler-1d-lod.frag
@@ -0,0 +1,12 @@
+#version 450
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) flat in float vTex;
+layout(binding = 0) uniform sampler1D uSampler;
+
+void main()
+{
+	FragColor += texture(uSampler, vTex, 2.0) +
+		textureLod(uSampler, vTex, 3.0) +
+		textureGrad(uSampler, vTex, 5.0, 8.0);
+}
diff --git a/shaders-msl/frag/sampler-image-arrays.msl2.frag b/shaders-msl/frag/sampler-image-arrays.msl2.frag
new file mode 100644
index 0000000000..42370d9728
--- /dev/null
+++ b/shaders-msl/frag/sampler-image-arrays.msl2.frag
@@ -0,0 +1,33 @@
+#version 450
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) flat in vec2 vTex;
+layout(location = 1) flat in int vIndex;
+layout(binding = 0) uniform sampler2D uSampler[4];
+layout(binding = 4) uniform sampler uSamplers[4];
+layout(binding = 8) uniform texture2D uTextures[4];
+
+vec4 sample_from_argument(sampler2D samplers[4])
+{
+	return texture(samplers[vIndex], vTex + 0.2);
+}
+
+vec4 sample_single_from_argument(sampler2D samp)
+{
+	return texture(samp, vTex + 0.3);
+}
+
+vec4 sample_from_global()
+{
+	return texture(uSampler[vIndex], vTex + 0.1);
+}
+
+void main()
+{
+	FragColor = vec4(0.0);
+	FragColor += texture(sampler2D(uTextures[2], uSamplers[1]), vTex);
+	FragColor += texture(uSampler[vIndex], vTex);
+	FragColor += sample_from_global();
+	FragColor += sample_from_argument(uSampler);
+	FragColor += sample_single_from_argument(uSampler[3]);
+}
diff --git a/shaders-msl/frag/shadow-compare-global-alias.frag b/shaders-msl/frag/shadow-compare-global-alias.frag
new file mode 100644
index 0000000000..d885a78471
--- /dev/null
+++ b/shaders-msl/frag/shadow-compare-global-alias.frag
@@ -0,0 +1,38 @@
+#version 450
+
+layout(location = 0) out float FragColor;
+layout(binding = 0) uniform sampler2DShadow uSampler;
+layout(location = 0) in vec3 vUV;
+
+layout(binding = 1) uniform texture2D uTex;
+layout(binding = 2) uniform samplerShadow uSamp;
+
+float Samp(vec3 uv)
+{
+	return texture(sampler2DShadow(uTex, uSamp), uv);
+}
+
+float Samp2(vec3 uv)
+{
+	return texture(uSampler, vUV);
+}
+
+float Samp3(texture2D uT, samplerShadow uS, vec3 uv)
+{
+	return texture(sampler2DShadow(uT, uS), vUV);
+}
+
+float Samp4(sampler2DShadow uS, vec3 uv)
+{
+	return texture(uS, vUV);
+}
+
+void main()
+{
+	FragColor = texture(uSampler, vUV);
+	FragColor += texture(sampler2DShadow(uTex, uSamp), vUV);
+	FragColor += Samp(vUV);
+	FragColor += Samp2(vUV);
+	FragColor += Samp3(uTex, uSamp, vUV);
+	FragColor += Samp4(uSampler, vUV);
+}
diff --git a/shaders-msl/frag/swizzle.frag b/shaders-msl/frag/swizzle.frag
index 271ba6cb64..af22dd655d 100644
--- a/shaders-msl/frag/swizzle.frag
+++ b/shaders-msl/frag/swizzle.frag
@@ -1,7 +1,7 @@
 #version 310 es
 precision mediump float;
 
-layout(location = 0) uniform sampler2D samp;
+layout(binding = 0) uniform sampler2D samp;
 layout(location = 0) out vec4 FragColor;
 layout(location = 1) in vec3 vNormal;
 layout(location = 2) in vec2 vUV;
diff --git a/shaders-msl/vert/in_out_array_mat.vert b/shaders-msl/vert/in_out_array_mat.vert
new file mode 100644
index 0000000000..bdff3d2802
--- /dev/null
+++ b/shaders-msl/vert/in_out_array_mat.vert
@@ -0,0 +1,41 @@
+#version 450
+
+layout(binding = 0, std140) uniform UBO
+{
+	mat4 projection;
+	mat4 model;
+	float lodBias;
+} ubo;
+
+layout(location = 0) in vec3 inPos;
+layout(location = 1) in vec4 colors[3];
+layout(location = 4) in vec3 inNormal;
+layout(location = 5) in mat4 inViewMat;
+layout(location = 0) out vec3 outPos;
+layout(location = 1) out vec3 outNormal;
+layout(location = 2) out mat4 outTransModel;
+layout(location = 6) out float outLodBias;
+layout(location = 7) out vec4 color;
+
+void write_deeper_in_function()
+{
+	outTransModel[1][1] = ubo.lodBias;
+	color = colors[2];
+}
+
+void write_in_function()
+{
+	outTransModel[2] = vec4(inNormal, 1.0);
+	write_deeper_in_function();
+}
+
+void main()
+{
+	gl_Position = (ubo.projection * ubo.model) * vec4(inPos, 1.0);
+	outPos = vec3((ubo.model * vec4(inPos, 1.0)).xyz);
+	outNormal = mat3(vec3(ubo.model[0].x, ubo.model[0].y, ubo.model[0].z), vec3(ubo.model[1].x, ubo.model[1].y, ubo.model[1].z), vec3(ubo.model[2].x, ubo.model[2].y, ubo.model[2].z)) * inNormal;
+	outLodBias = ubo.lodBias;
+	outTransModel = transpose(ubo.model) * inViewMat;
+	write_in_function();
+}
+
diff --git a/shaders-msl/vert/read-from-row-major-array.vert b/shaders-msl/vert/read-from-row-major-array.vert
new file mode 100644
index 0000000000..792fb8e36c
--- /dev/null
+++ b/shaders-msl/vert/read-from-row-major-array.vert
@@ -0,0 +1,20 @@
+#version 310 es
+layout(location = 0) in highp vec4 a_position;
+layout(location = 0) out mediump float v_vtxResult;
+
+layout(set = 0, binding = 0, std140, row_major) uniform Block
+{
+	highp mat2x3 var[3][4];
+};
+
+mediump float compare_float    (highp float a, highp float b)  { return abs(a - b) < 0.05 ? 1.0 : 0.0; }
+mediump float compare_vec3     (highp vec3 a, highp vec3 b)    { return compare_float(a.x, b.x)*compare_float(a.y, b.y)*compare_float(a.z, b.z); }
+mediump float compare_mat2x3   (highp mat2x3 a, highp mat2x3 b){ return compare_vec3(a[0], b[0])*compare_vec3(a[1], b[1]); }
+
+void main (void)
+{
+	gl_Position = a_position;
+	mediump float result = 1.0;
+	result *= compare_mat2x3(var[0][0], mat2x3(2.0, 6.0, -6.0, 0.0, 5.0, 5.0));
+	v_vtxResult = result;
+}
diff --git a/shaders/asm/frag/inliner-dominator-inside-loop.asm.frag b/shaders-no-opt/asm/frag/inliner-dominator-inside-loop.asm.frag
similarity index 100%
rename from shaders/asm/frag/inliner-dominator-inside-loop.asm.frag
rename to shaders-no-opt/asm/frag/inliner-dominator-inside-loop.asm.frag
diff --git a/shaders/asm/vert/empty-struct-composite.asm.vert b/shaders-no-opt/asm/vert/empty-struct-composite.asm.vert
similarity index 100%
rename from shaders/asm/vert/empty-struct-composite.asm.vert
rename to shaders-no-opt/asm/vert/empty-struct-composite.asm.vert
diff --git a/shaders-no-opt/asm/vert/semantic-decoration.asm.vert b/shaders-no-opt/asm/vert/semantic-decoration.asm.vert
new file mode 100644
index 0000000000..76007c30a3
--- /dev/null
+++ b/shaders-no-opt/asm/vert/semantic-decoration.asm.vert
@@ -0,0 +1,68 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 6
+; Bound: 36
+; Schema: 0
+               OpCapability Shader
+               OpExtension "SPV_GOOGLE_decorate_string"
+               OpExtension "SPV_GOOGLE_hlsl_functionality1"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Vertex %main "main" %_entryPointOutput_p %_entryPointOutput_c
+               OpSource HLSL 500
+               OpName %main "main"
+               OpName %VOut "VOut"
+               OpMemberName %VOut 0 "p"
+               OpMemberName %VOut 1 "c"
+               OpName %_main_ "@main("
+               OpName %v "v"
+               OpName %flattenTemp "flattenTemp"
+               OpName %_entryPointOutput_p "@entryPointOutput.p"
+               OpName %_entryPointOutput_c "@entryPointOutput.c"
+               OpMemberDecorateStringGOOGLE %VOut 0 HlslSemanticGOOGLE "SV_POSITION"
+               OpMemberDecorateStringGOOGLE %VOut 1 HlslSemanticGOOGLE "COLOR"
+               OpDecorate %_entryPointOutput_p BuiltIn Position
+               OpDecorateStringGOOGLE %_entryPointOutput_p HlslSemanticGOOGLE "SV_POSITION"
+               OpDecorate %_entryPointOutput_c Location 0
+               OpDecorateStringGOOGLE %_entryPointOutput_c HlslSemanticGOOGLE "COLOR"
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+       %VOut = OpTypeStruct %v4float %v4float
+          %9 = OpTypeFunction %VOut
+%_ptr_Function_VOut = OpTypePointer Function %VOut
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+    %float_1 = OpConstant %float 1
+         %17 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Function_v4float = OpTypePointer Function %v4float
+      %int_1 = OpConstant %int 1
+    %float_2 = OpConstant %float 2
+         %22 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+%_entryPointOutput_p = OpVariable %_ptr_Output_v4float Output
+%_entryPointOutput_c = OpVariable %_ptr_Output_v4float Output
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+%flattenTemp = OpVariable %_ptr_Function_VOut Function
+         %28 = OpFunctionCall %VOut %_main_
+               OpStore %flattenTemp %28
+         %31 = OpAccessChain %_ptr_Function_v4float %flattenTemp %int_0
+         %32 = OpLoad %v4float %31
+               OpStore %_entryPointOutput_p %32
+         %34 = OpAccessChain %_ptr_Function_v4float %flattenTemp %int_1
+         %35 = OpLoad %v4float %34
+               OpStore %_entryPointOutput_c %35
+               OpReturn
+               OpFunctionEnd
+     %_main_ = OpFunction %VOut None %9
+         %11 = OpLabel
+          %v = OpVariable %_ptr_Function_VOut Function
+         %19 = OpAccessChain %_ptr_Function_v4float %v %int_0
+               OpStore %19 %17
+         %23 = OpAccessChain %_ptr_Function_v4float %v %int_1
+               OpStore %23 %22
+         %24 = OpLoad %VOut %v
+               OpReturnValue %24
+               OpFunctionEnd
diff --git a/shaders/comp/bitfield.noopt.comp b/shaders-no-opt/comp/bitfield.comp
similarity index 100%
rename from shaders/comp/bitfield.noopt.comp
rename to shaders-no-opt/comp/bitfield.comp
diff --git a/shaders/comp/loop.noopt.comp b/shaders-no-opt/comp/loop.comp
similarity index 100%
rename from shaders/comp/loop.noopt.comp
rename to shaders-no-opt/comp/loop.comp
diff --git a/shaders/comp/return.comp b/shaders-no-opt/comp/return.comp
similarity index 100%
rename from shaders/comp/return.comp
rename to shaders-no-opt/comp/return.comp
diff --git a/shaders/vulkan/frag/spec-constant.vk.frag b/shaders-no-opt/vulkan/frag/spec-constant.vk.frag
similarity index 100%
rename from shaders/vulkan/frag/spec-constant.vk.frag
rename to shaders-no-opt/vulkan/frag/spec-constant.vk.frag
diff --git a/shaders/amd/fs.invalid.frag b/shaders/amd/fs.invalid.frag
index 8cbd73e336..1ff82de06e 100644
--- a/shaders/amd/fs.invalid.frag
+++ b/shaders/amd/fs.invalid.frag
@@ -2,8 +2,8 @@
 #extension GL_AMD_shader_fragment_mask : require
 #extension GL_AMD_shader_explicit_vertex_parameter : require
 
-uniform sampler2DMS texture1;
-layout(location = 0) in vec4 vary;
+layout(binding = 0) uniform sampler2DMS texture1;
+layout(location = 0) __explicitInterpAMD in vec4 vary;
 
 void main()
 {
diff --git a/shaders/asm/comp/hlsl-functionality.asm.comp b/shaders/asm/comp/hlsl-functionality.asm.comp
new file mode 100644
index 0000000000..dfdcb45402
--- /dev/null
+++ b/shaders/asm/comp/hlsl-functionality.asm.comp
@@ -0,0 +1,63 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 6
+; Bound: 31
+; Schema: 0
+               OpCapability Shader
+			   OpExtension "SPV_GOOGLE_hlsl_functionality1"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource HLSL 500
+               OpName %main "main"
+               OpName %_main_ "@main("
+               OpName %Buf "Buf"
+               OpMemberName %Buf 0 "@data"
+               OpName %Buf_0 "Buf"
+               OpName %Buf_count "Buf@count"
+               OpMemberName %Buf_count 0 "@count"
+               OpName %Buf_count_0 "Buf@count"
+               OpDecorate %_runtimearr_v4float ArrayStride 16
+               OpMemberDecorate %Buf 0 Offset 0
+               OpDecorate %Buf BufferBlock
+               OpDecorate %Buf_0 DescriptorSet 0
+               OpDecorate %Buf_0 Binding 0
+               OpMemberDecorate %Buf_count 0 Offset 0
+               OpDecorate %Buf_count BufferBlock
+               OpDecorate %Buf_count_0 DescriptorSet 0
+               OpDecorateId %Buf_0 HlslCounterBufferGOOGLE %Buf_count_0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_runtimearr_v4float = OpTypeRuntimeArray %v4float
+        %Buf = OpTypeStruct %_runtimearr_v4float
+%_ptr_Uniform_Buf = OpTypePointer Uniform %Buf
+      %Buf_0 = OpVariable %_ptr_Uniform_Buf Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+  %Buf_count = OpTypeStruct %int
+%_ptr_Uniform_Buf_count = OpTypePointer Uniform %Buf_count
+%Buf_count_0 = OpVariable %_ptr_Uniform_Buf_count Uniform
+%_ptr_Uniform_int = OpTypePointer Uniform %int
+      %int_1 = OpConstant %int 1
+       %uint = OpTypeInt 32 0
+     %uint_1 = OpConstant %uint 1
+     %uint_0 = OpConstant %uint 0
+    %float_1 = OpConstant %float 1
+         %27 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %30 = OpFunctionCall %void %_main_
+               OpReturn
+               OpFunctionEnd
+     %_main_ = OpFunction %void None %3
+          %7 = OpLabel
+         %20 = OpAccessChain %_ptr_Uniform_int %Buf_count_0 %int_0
+         %25 = OpAtomicIAdd %int %20 %uint_1 %uint_0 %int_1
+         %29 = OpAccessChain %_ptr_Uniform_v4float %Buf_0 %int_0 %25
+               OpStore %29 %27
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders/asm/comp/name-alias.asm.invalid.comp b/shaders/asm/comp/name-alias.asm.invalid.comp
deleted file mode 100644
index f9bc6dbb67..0000000000
--- a/shaders/asm/comp/name-alias.asm.invalid.comp
+++ /dev/null
@@ -1,124 +0,0 @@
-; SPIR-V
-; Version: 1.0
-; Generator: Khronos Glslang Reference Front End; 1
-; Bound: 48
-; Schema: 0
-               OpCapability Shader
-          %1 = OpExtInstImport "GLSL.std.450"
-               OpMemoryModel Logical GLSL450
-               OpEntryPoint GLCompute %4 "main"
-               OpExecutionMode %4 LocalSize 1 1 1
-               OpSource ESSL 310
-               OpName %4 "alias"
-               OpName %15 "alias"
-               OpMemberName %15 0 "alias"
-               OpName %18 "alias"
-               OpMemberName %18 0 "alias"
-               OpMemberName %18 1 "alias"
-               OpMemberName %18 2 "alias"
-               OpName %19 "alias"
-               OpMemberName %19 0 "alias"
-               OpMemberName %19 1 "alias"
-               OpName %21 "alias"
-               OpName %24 "alias"
-               OpMemberName %24 0 "alias"
-               OpName %26 "alias"
-               OpMemberName %26 0 "alias"
-               OpMemberName %26 1 "alias"
-               OpMemberName %26 2 "alias"
-               OpName %27 "alias"
-               OpMemberName %27 0 "alias"
-               OpMemberName %27 1 "alias"
-               OpName %28 "alias"
-               OpMemberName %28 0 "alias"
-               OpName %30 "alias"
-               OpName %38 "alias"
-               OpMemberName %38 0 "alias"
-               OpName %40 "alias"
-               OpMemberName %40 0 "alias"
-               OpMemberName %40 1 "alias"
-               OpMemberName %40 2 "alias"
-               OpName %41 "alias"
-               OpMemberName %41 0 "alias"
-               OpMemberName %41 1 "alias"
-               OpName %42 "alias"
-               OpMemberName %42 0 "alias"
-               OpName %44 "alias"
-               OpDecorate %22 ArrayStride 8
-               OpDecorate %23 ArrayStride 16
-               OpMemberDecorate %24 0 Offset 0
-               OpDecorate %25 ArrayStride 1600
-               OpMemberDecorate %26 0 Offset 0
-               OpMemberDecorate %26 1 Offset 16
-               OpMemberDecorate %26 2 Offset 96
-               OpMemberDecorate %27 0 Offset 0
-               OpMemberDecorate %27 1 Offset 16
-               OpMemberDecorate %28 0 Offset 0
-               OpDecorate %28 BufferBlock
-               OpDecorate %30 DescriptorSet 0
-               OpDecorate %30 Binding 0
-               OpDecorate %36 ArrayStride 16
-               OpDecorate %37 ArrayStride 16
-               OpMemberDecorate %38 0 Offset 0
-               OpDecorate %39 ArrayStride 1600
-               OpMemberDecorate %40 0 Offset 0
-               OpMemberDecorate %40 1 Offset 16
-               OpMemberDecorate %40 2 Offset 176
-               OpMemberDecorate %41 0 Offset 0
-               OpMemberDecorate %41 1 Offset 16
-               OpMemberDecorate %42 0 Offset 0
-               OpDecorate %42 BufferBlock
-               OpDecorate %44 DescriptorSet 0
-               OpDecorate %44 Binding 1
-          %2 = OpTypeVoid
-          %3 = OpTypeFunction %2
-          %6 = OpTypeFloat 32
-          %7 = OpTypeVector %6 4
-          %8 = OpTypeVector %6 2
-          %9 = OpTypeInt 32 0
-         %10 = OpConstant %9 10
-         %11 = OpTypeArray %8 %10
-         %12 = OpTypeVector %6 3
-         %13 = OpConstant %9 100
-         %14 = OpTypeArray %12 %13
-         %15 = OpTypeStruct %14
-         %16 = OpConstant %9 2
-         %17 = OpTypeArray %15 %16
-         %18 = OpTypeStruct %7 %11 %17
-         %19 = OpTypeStruct %7 %18
-         %20 = OpTypePointer Function %19
-         %22 = OpTypeArray %8 %10
-         %23 = OpTypeArray %12 %13
-         %24 = OpTypeStruct %23
-         %25 = OpTypeArray %24 %16
-         %26 = OpTypeStruct %7 %22 %25
-         %27 = OpTypeStruct %7 %26
-         %28 = OpTypeStruct %27
-         %29 = OpTypePointer Uniform %28
-         %30 = OpVariable %29 Uniform
-         %31 = OpTypeInt 32 1
-         %32 = OpConstant %31 0
-         %33 = OpTypePointer Uniform %27
-         %36 = OpTypeArray %8 %10
-         %37 = OpTypeArray %12 %13
-         %38 = OpTypeStruct %37
-         %39 = OpTypeArray %38 %16
-         %40 = OpTypeStruct %7 %36 %39
-         %41 = OpTypeStruct %7 %40
-         %42 = OpTypeStruct %41
-         %43 = OpTypePointer Uniform %42
-         %44 = OpVariable %43 Uniform
-         %46 = OpTypePointer Uniform %41
-          %4 = OpFunction %2 None %3
-          %5 = OpLabel
-         %21 = OpVariable %20 Function
-         %34 = OpAccessChain %33 %30 %32
-         %35 = OpLoad %27 %34
-; This shader has an illegal aliased store for testing purposes. spirv-val is not run for this shader.
-               OpStore %21 %35
-         %45 = OpLoad %19 %21
-         %47 = OpAccessChain %46 %44 %32
-; This shader has an illegal aliased store for testing purposes. spirv-val is not run for this shader.
-               OpStore %47 %45
-               OpReturn
-               OpFunctionEnd
diff --git a/shaders/asm/comp/storage-buffer-basic.asm.comp b/shaders/asm/comp/storage-buffer-basic.invalid.asm.comp
similarity index 100%
rename from shaders/asm/comp/storage-buffer-basic.asm.comp
rename to shaders/asm/comp/storage-buffer-basic.invalid.asm.comp
diff --git a/shaders/asm/frag/combined-sampler-reuse.vk.asm.frag b/shaders/asm/frag/combined-sampler-reuse.vk.asm.frag
new file mode 100644
index 0000000000..ba2f95b234
--- /dev/null
+++ b/shaders/asm/frag/combined-sampler-reuse.vk.asm.frag
@@ -0,0 +1,57 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 6
+; Bound: 36
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor %vUV
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %FragColor "FragColor"
+               OpName %uTex "uTex"
+               OpName %uSampler "uSampler"
+               OpName %vUV "vUV"
+               OpDecorate %FragColor Location 0
+               OpDecorate %uTex DescriptorSet 0
+               OpDecorate %uTex Binding 1
+               OpDecorate %uSampler DescriptorSet 0
+               OpDecorate %uSampler Binding 0
+               OpDecorate %vUV Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+         %10 = OpTypeImage %float 2D 0 0 0 1 Unknown
+%_ptr_UniformConstant_10 = OpTypePointer UniformConstant %10
+       %uTex = OpVariable %_ptr_UniformConstant_10 UniformConstant
+         %14 = OpTypeSampler
+%_ptr_UniformConstant_14 = OpTypePointer UniformConstant %14
+   %uSampler = OpVariable %_ptr_UniformConstant_14 UniformConstant
+         %18 = OpTypeSampledImage %10
+    %v2float = OpTypeVector %float 2
+%_ptr_Input_v2float = OpTypePointer Input %v2float
+        %vUV = OpVariable %_ptr_Input_v2float Input
+        %int = OpTypeInt 32 1
+      %v2int = OpTypeVector %int 2
+      %int_1 = OpConstant %int 1
+         %32 = OpConstantComposite %v2int %int_1 %int_1
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %13 = OpLoad %10 %uTex
+         %17 = OpLoad %14 %uSampler
+         %19 = OpSampledImage %18 %13 %17
+         %23 = OpLoad %v2float %vUV
+         %24 = OpImageSampleImplicitLod %v4float %19 %23
+               OpStore %FragColor %24
+         %28 = OpLoad %v2float %vUV
+         %33 = OpImageSampleImplicitLod %v4float %19 %28 ConstOffset %32
+         %34 = OpLoad %v4float %FragColor
+         %35 = OpFAdd %v4float %34 %33
+               OpStore %FragColor %35
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders/asm/frag/complex-name-workarounds.asm.frag b/shaders/asm/frag/complex-name-workarounds.asm.frag
new file mode 100644
index 0000000000..59a67730a6
--- /dev/null
+++ b/shaders/asm/frag/complex-name-workarounds.asm.frag
@@ -0,0 +1,81 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 6
+; Bound: 47
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %a %b %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %func__vf4_ "fu__nc_"
+               OpName %a_ "a_"
+               OpName %func_2_vf4_ "fu__nc_"
+               OpName %a_2 "___"
+               OpName %c0 "___"
+               OpName %a "__"
+               OpName %b "a"
+               OpName %param "b"
+               OpName %c1 "b"
+               OpName %param_0 "b"
+               OpName %FragColor "b"
+               OpDecorate %a Location 0
+               OpDecorate %b Location 1
+               OpDecorate %FragColor Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Function_v4float = OpTypePointer Function %v4float
+          %9 = OpTypeFunction %v4float %_ptr_Function_v4float
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+          %a = OpVariable %_ptr_Input_v4float Input
+          %b = OpVariable %_ptr_Input_v4float Input
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %c0 = OpVariable %_ptr_Function_v4float Function
+      %param = OpVariable %_ptr_Function_v4float Function
+         %c1 = OpVariable %_ptr_Function_v4float Function
+    %param_0 = OpVariable %_ptr_Function_v4float Function
+         %25 = OpLoad %v4float %a
+         %27 = OpLoad %v4float %b
+         %28 = OpFAdd %v4float %25 %27
+         %30 = OpLoad %v4float %a
+               OpStore %param %30
+         %31 = OpFunctionCall %v4float %func__vf4_ %param
+         %32 = OpFAdd %v4float %28 %31
+               OpStore %c0 %32
+         %34 = OpLoad %v4float %a
+         %35 = OpLoad %v4float %b
+         %36 = OpFSub %v4float %34 %35
+         %38 = OpLoad %v4float %b
+               OpStore %param_0 %38
+         %39 = OpFunctionCall %v4float %func_2_vf4_ %param_0
+         %40 = OpFAdd %v4float %36 %39
+               OpStore %c1 %40
+         %43 = OpLoad %v4float %c0
+               OpStore %FragColor %43
+         %44 = OpLoad %v4float %c1
+               OpStore %FragColor %44
+         %45 = OpLoad %v4float %c0
+               OpStore %FragColor %45
+         %46 = OpLoad %v4float %c1
+               OpStore %FragColor %46
+               OpReturn
+               OpFunctionEnd
+ %func__vf4_ = OpFunction %v4float None %9
+         %a_ = OpFunctionParameter %_ptr_Function_v4float
+         %12 = OpLabel
+         %16 = OpLoad %v4float %a_
+               OpReturnValue %16
+               OpFunctionEnd
+%func_2_vf4_ = OpFunction %v4float None %9
+        %a_2 = OpFunctionParameter %_ptr_Function_v4float
+         %15 = OpLabel
+         %19 = OpLoad %v4float %a_2
+               OpReturnValue %19
+               OpFunctionEnd
diff --git a/shaders/asm/frag/empty-struct.asm.frag b/shaders/asm/frag/empty-struct.asm.frag
new file mode 100644
index 0000000000..701f9f2a1e
--- /dev/null
+++ b/shaders/asm/frag/empty-struct.asm.frag
@@ -0,0 +1,56 @@
+; SPIR-V
+; Version: 1.2
+; Generator: Khronos; 0
+; Bound: 43
+; Schema: 0
+               OpCapability Linkage
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %EntryPoint_Main "main"
+               OpExecutionMode %EntryPoint_Main OriginUpperLeft
+               OpSource Unknown 100
+               OpName %EmptyStructTest "EmptyStructTest"
+               OpName %GetValue "GetValue"
+               OpName %GetValue2 "GetValue"
+               OpName %self "self"
+               OpName %self2 "self"
+               OpName %emptyStruct "emptyStruct"
+               OpName %value "value"
+               OpName %EntryPoint_Main "EntryPoint_Main"
+
+%EmptyStructTest = OpTypeStruct
+%_ptr_Function_EmptyStructTest = OpTypePointer Function %EmptyStructTest
+      %float = OpTypeFloat 32
+%_ptr_Function_float = OpTypePointer Function %float
+          %5 = OpTypeFunction %float %_ptr_Function_EmptyStructTest
+          %6 = OpTypeFunction %float %EmptyStructTest
+       %void = OpTypeVoid
+%_ptr_Function_void = OpTypePointer Function %void
+          %8 = OpTypeFunction %void %_ptr_Function_EmptyStructTest
+          %9 = OpTypeFunction %void
+    %float_0 = OpConstant %float 0
+
+   %GetValue = OpFunction %float None %5
+       %self = OpFunctionParameter %_ptr_Function_EmptyStructTest
+         %13 = OpLabel
+               OpReturnValue %float_0
+               OpFunctionEnd
+
+   %GetValue2 = OpFunction %float None %6
+       %self2 = OpFunctionParameter %EmptyStructTest
+         %14 = OpLabel
+               OpReturnValue %float_0
+               OpFunctionEnd
+
+%EntryPoint_Main = OpFunction %void None %9
+         %37 = OpLabel
+     %emptyStruct = OpVariable %_ptr_Function_EmptyStructTest Function
+         %18 = OpVariable %_ptr_Function_EmptyStructTest Function
+      %value = OpVariable %_ptr_Function_float Function
+	  %value2 = OpCompositeConstruct %EmptyStructTest
+         %22 = OpFunctionCall %float %GetValue %emptyStruct
+         %23 = OpFunctionCall %float %GetValue2 %value2
+               OpStore %value %22
+               OpStore %value %23
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders/asm/frag/hlsl-sample-cmp-level-zero-cube.asm.frag b/shaders/asm/frag/hlsl-sample-cmp-level-zero-cube.asm.frag
index 2be18cfeeb..75ce80bfd4 100644
--- a/shaders/asm/frag/hlsl-sample-cmp-level-zero-cube.asm.frag
+++ b/shaders/asm/frag/hlsl-sample-cmp-level-zero-cube.asm.frag
@@ -16,6 +16,8 @@
                OpName %_entryPointOutput "@entryPointOutput"
                OpDecorate %pointLightShadowMap DescriptorSet 0
                OpDecorate %shadowSamplerPCF DescriptorSet 0
+               OpDecorate %pointLightShadowMap Binding 0
+               OpDecorate %shadowSamplerPCF Binding 1
                OpDecorate %_entryPointOutput Location 0
        %void = OpTypeVoid
           %3 = OpTypeFunction %void
diff --git a/shaders/asm/frag/hlsl-sample-cmp-level-zero.asm.frag b/shaders/asm/frag/hlsl-sample-cmp-level-zero.asm.frag
index 34fb6e834b..bb0a1ea313 100644
--- a/shaders/asm/frag/hlsl-sample-cmp-level-zero.asm.frag
+++ b/shaders/asm/frag/hlsl-sample-cmp-level-zero.asm.frag
@@ -29,6 +29,8 @@
                OpName %param_1 "param"
                OpDecorate %ShadowMap DescriptorSet 0
                OpDecorate %ShadowSamplerPCF DescriptorSet 0
+               OpDecorate %ShadowMap Binding 0
+               OpDecorate %ShadowSamplerPCF Binding 1
                OpDecorate %texCoords_1 Location 0
                OpDecorate %cascadeIndex_1 Location 1
                OpDecorate %fragDepth_1 Location 2
diff --git a/shaders/asm/frag/image-extract-reuse.asm.frag b/shaders/asm/frag/image-extract-reuse.asm.frag
new file mode 100644
index 0000000000..63c8ab57a5
--- /dev/null
+++ b/shaders/asm/frag/image-extract-reuse.asm.frag
@@ -0,0 +1,41 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 6
+; Bound: 19
+; Schema: 0
+               OpCapability Shader
+               OpCapability ImageQuery
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %Size
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %Size "Size"
+               OpName %uTexture "uTexture"
+               OpDecorate %Size Location 0
+               OpDecorate %uTexture DescriptorSet 0
+               OpDecorate %uTexture Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+        %int = OpTypeInt 32 1
+      %v2int = OpTypeVector %int 2
+%_ptr_Output_v2int = OpTypePointer Output %v2int
+       %Size = OpVariable %_ptr_Output_v2int Output
+      %float = OpTypeFloat 32
+         %11 = OpTypeImage %float 2D 0 0 0 1 Unknown
+         %12 = OpTypeSampledImage %11
+%_ptr_UniformConstant_12 = OpTypePointer UniformConstant %12
+   %uTexture = OpVariable %_ptr_UniformConstant_12 UniformConstant
+      %int_0 = OpConstant %int 0
+      %int_1 = OpConstant %int 1
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %15 = OpLoad %12 %uTexture
+         %17 = OpImage %11 %15
+         %18 = OpImageQuerySizeLod %v2int %17 %int_0
+         %19 = OpImageQuerySizeLod %v2int %17 %int_1
+		 %20 = OpIAdd %v2int %18 %19
+               OpStore %Size %20
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders/asm/frag/image-query-no-sampler.vk.asm.frag b/shaders/asm/frag/image-query-no-sampler.vk.asm.frag
new file mode 100644
index 0000000000..a232bd4898
--- /dev/null
+++ b/shaders/asm/frag/image-query-no-sampler.vk.asm.frag
@@ -0,0 +1,57 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 6
+; Bound: 36
+; Schema: 0
+               OpCapability Shader
+               OpCapability ImageQuery
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main"
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %b "b"
+               OpName %uSampler2D "uSampler2D"
+               OpName %c "c"
+               OpName %uSampler2DMS "uSampler2DMS"
+               OpName %l1 "l1"
+               OpName %s0 "s0"
+               OpDecorate %uSampler2D DescriptorSet 0
+               OpDecorate %uSampler2D Binding 0
+               OpDecorate %uSampler2DMS DescriptorSet 0
+               OpDecorate %uSampler2DMS Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+        %int = OpTypeInt 32 1
+      %v2int = OpTypeVector %int 2
+%_ptr_Function_v2int = OpTypePointer Function %v2int
+      %float = OpTypeFloat 32
+         %11 = OpTypeImage %float 2D 0 0 0 1 Unknown
+%_ptr_UniformConstant_12 = OpTypePointer UniformConstant %11
+ %uSampler2D = OpVariable %_ptr_UniformConstant_12 UniformConstant
+      %int_0 = OpConstant %int 0
+         %20 = OpTypeImage %float 2D 0 0 1 1 Unknown
+%_ptr_UniformConstant_21 = OpTypePointer UniformConstant %20
+%uSampler2DMS = OpVariable %_ptr_UniformConstant_21 UniformConstant
+%_ptr_Function_int = OpTypePointer Function %int
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+          %b = OpVariable %_ptr_Function_v2int Function
+          %c = OpVariable %_ptr_Function_v2int Function
+         %l1 = OpVariable %_ptr_Function_int Function
+         %s0 = OpVariable %_ptr_Function_int Function
+         %15 = OpLoad %11 %uSampler2D
+         %18 = OpImageQuerySizeLod %v2int %15 %int_0
+               OpStore %b %18
+         %24 = OpLoad %20 %uSampler2DMS
+         %26 = OpImageQuerySize %v2int %24
+               OpStore %c %26
+         %29 = OpLoad %11 %uSampler2D
+         %31 = OpImageQueryLevels %int %29
+               OpStore %l1 %31
+         %33 = OpLoad %20 %uSampler2DMS
+         %35 = OpImageQuerySamples %int %33
+               OpStore %s0 %35
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders/asm/frag/implicit-read-dep-phi.asm.frag b/shaders/asm/frag/implicit-read-dep-phi.asm.frag
new file mode 100644
index 0000000000..ccdfeef58d
--- /dev/null
+++ b/shaders/asm/frag/implicit-read-dep-phi.asm.frag
@@ -0,0 +1,81 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 3
+; Bound: 60
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %v0 %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %phi "phi"
+               OpName %i "i"
+               OpName %v0 "v0"
+               OpName %FragColor "FragColor"
+               OpName %uImage "uImage"
+               OpDecorate %v0 Location 0
+               OpDecorate %FragColor Location 0
+               OpDecorate %uImage DescriptorSet 0
+               OpDecorate %uImage Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+%_ptr_Function_float = OpTypePointer Function %float
+    %float_1 = OpConstant %float 1
+        %int = OpTypeInt 32 1
+%_ptr_Function_int = OpTypePointer Function %int
+      %int_0 = OpConstant %int 0
+      %int_4 = OpConstant %int 4
+       %bool = OpTypeBool
+    %v4float = OpTypeVector %float 4
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+         %v0 = OpVariable %_ptr_Input_v4float Input
+%_ptr_Input_float = OpTypePointer Input %float
+    %float_0 = OpConstant %float 0
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+         %36 = OpTypeImage %float 2D 0 0 0 1 Unknown
+         %37 = OpTypeSampledImage %36
+%_ptr_UniformConstant_37 = OpTypePointer UniformConstant %37
+     %uImage = OpVariable %_ptr_UniformConstant_37 UniformConstant
+    %v2float = OpTypeVector %float 2
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+    %float_2 = OpConstant %float 2
+      %int_1 = OpConstant %int 1
+	  %float_1_vec = OpConstantComposite %v4float %float_1 %float_2 %float_1 %float_2
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+          %i = OpVariable %_ptr_Function_int Function
+               OpStore %i %int_0
+               OpBranch %loop_header
+         %loop_header = OpLabel
+        %phi = OpPhi %float %float_1 %5 %phi_plus_2 %continue_block
+		%tex_phi = OpPhi %v4float %float_1_vec %5 %texture_load_result %continue_block
+               OpLoopMerge %merge_block %continue_block None
+               OpBranch %loop_body
+         %loop_body = OpLabel
+               OpStore %FragColor %tex_phi
+         %19 = OpLoad %int %i
+         %22 = OpSLessThan %bool %19 %int_4
+               OpBranchConditional %22 %15 %merge_block
+         %15 = OpLabel
+         %26 = OpLoad %int %i
+         %28 = OpAccessChain %_ptr_Input_float %v0 %26
+         %29 = OpLoad %float %28
+         %31 = OpFOrdGreaterThan %bool %29 %float_0
+               OpBranchConditional %31 %continue_block %merge_block
+         %continue_block = OpLabel
+         %40 = OpLoad %37 %uImage
+         %43 = OpCompositeConstruct %v2float %phi %phi
+         %texture_load_result = OpImageSampleExplicitLod %v4float %40 %43 Lod %float_0
+         %phi_plus_2 = OpFAdd %float %phi %float_2
+         %54 = OpLoad %int %i
+         %56 = OpIAdd %int %54 %int_1
+               OpStore %i %56
+               OpBranch %loop_header
+         %merge_block = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders/asm/frag/loop-merge-to-continue.asm.frag b/shaders/asm/frag/loop-merge-to-continue.asm.frag
new file mode 100644
index 0000000000..f2acc43604
--- /dev/null
+++ b/shaders/asm/frag/loop-merge-to-continue.asm.frag
@@ -0,0 +1,85 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 3
+; Bound: 51
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor %v0
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %FragColor "FragColor"
+               OpName %i "i"
+               OpName %j "j"
+               OpName %v0 "v0"
+               OpDecorate %FragColor Location 0
+               OpDecorate %v0 Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+    %float_1 = OpConstant %float 1
+         %11 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+        %int = OpTypeInt 32 1
+%_ptr_Function_int = OpTypePointer Function %int
+      %int_0 = OpConstant %int 0
+      %int_4 = OpConstant %int 4
+       %bool = OpTypeBool
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+         %v0 = OpVariable %_ptr_Input_v4float Input
+      %int_3 = OpConstant %int 3
+%_ptr_Input_float = OpTypePointer Input %float
+      %int_1 = OpConstant %int 1
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+          %i = OpVariable %_ptr_Function_int Function
+          %j = OpVariable %_ptr_Function_int Function
+               OpStore %FragColor %11
+               OpStore %i %int_0
+               OpBranch %16
+         %16 = OpLabel
+               OpLoopMerge %18 %19 None
+               OpBranch %20
+         %20 = OpLabel
+         %21 = OpLoad %int %i
+         %24 = OpSLessThan %bool %21 %int_4
+               OpBranchConditional %24 %17 %18
+         %17 = OpLabel
+               OpStore %j %int_0
+               OpBranch %26
+         %26 = OpLabel
+               OpLoopMerge %19 %29 None
+               OpBranch %30
+         %30 = OpLabel
+         %31 = OpLoad %int %j
+         %32 = OpSLessThan %bool %31 %int_4
+               OpBranchConditional %32 %27 %19
+         %27 = OpLabel
+         %35 = OpLoad %int %i
+         %36 = OpLoad %int %j
+         %37 = OpIAdd %int %35 %36
+         %39 = OpBitwiseAnd %int %37 %int_3
+         %41 = OpAccessChain %_ptr_Input_float %v0 %39
+         %42 = OpLoad %float %41
+         %43 = OpLoad %v4float %FragColor
+         %44 = OpCompositeConstruct %v4float %42 %42 %42 %42
+         %45 = OpFAdd %v4float %43 %44
+               OpStore %FragColor %45
+               OpBranch %29
+         %29 = OpLabel
+         %46 = OpLoad %int %j
+         %48 = OpIAdd %int %46 %int_1
+               OpStore %j %48
+               OpBranch %26
+         %19 = OpLabel
+         %49 = OpLoad %int %i
+         %50 = OpIAdd %int %49 %int_1
+               OpStore %i %50
+               OpBranch %16
+         %18 = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders/asm/frag/sampler-buffer-array-without-sampler.asm.frag b/shaders/asm/frag/sampler-buffer-array-without-sampler.asm.frag
new file mode 100644
index 0000000000..0c3833e7ec
--- /dev/null
+++ b/shaders/asm/frag/sampler-buffer-array-without-sampler.asm.frag
@@ -0,0 +1,86 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 6
+; Bound: 63
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %sample_from_func_s21_4__ "sample_from_func(s21[4];"
+               OpName %uSampler "uSampler"
+               OpName %sample_one_from_func_s21_ "sample_one_from_func(s21;"
+               OpName %uSampler_0 "uSampler"
+               OpName %Registers "Registers"
+               OpMemberName %Registers 0 "index"
+               OpName %registers "registers"
+               OpName %FragColor "FragColor"
+               OpName %uSampler_1 "uSampler"
+               OpMemberDecorate %Registers 0 Offset 0
+               OpDecorate %Registers Block
+               OpDecorate %FragColor Location 0
+               OpDecorate %uSampler_1 DescriptorSet 0
+               OpDecorate %uSampler_1 Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+          %7 = OpTypeImage %float 2D 0 0 0 1 Unknown
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_8_uint_4 = OpTypeArray %7 %uint_4
+%_ptr_UniformConstant__arr_8_uint_4 = OpTypePointer UniformConstant %_arr_8_uint_4
+    %v4float = OpTypeVector %float 4
+         %14 = OpTypeFunction %v4float %_ptr_UniformConstant__arr_8_uint_4
+%_ptr_UniformConstant_8 = OpTypePointer UniformConstant %7
+         %19 = OpTypeFunction %v4float %_ptr_UniformConstant_8
+        %int = OpTypeInt 32 1
+  %Registers = OpTypeStruct %int
+%_ptr_PushConstant_Registers = OpTypePointer PushConstant %Registers
+  %registers = OpVariable %_ptr_PushConstant_Registers PushConstant
+      %int_0 = OpConstant %int 0
+%_ptr_PushConstant_int = OpTypePointer PushConstant %int
+      %v2int = OpTypeVector %int 2
+      %int_4 = OpConstant %int 4
+         %35 = OpConstantComposite %v2int %int_4 %int_4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+ %uSampler_1 = OpVariable %_ptr_UniformConstant__arr_8_uint_4 UniformConstant
+     %int_10 = OpConstant %int 10
+         %53 = OpConstantComposite %v2int %int_10 %int_10
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %48 = OpAccessChain %_ptr_PushConstant_int %registers %int_0
+         %49 = OpLoad %int %48
+         %50 = OpAccessChain %_ptr_UniformConstant_8 %uSampler_1 %49
+         %51 = OpLoad %7 %50
+         %55 = OpImageFetch %v4float %51 %53 Lod %int_0
+         %56 = OpFunctionCall %v4float %sample_from_func_s21_4__ %uSampler_1
+         %57 = OpFAdd %v4float %55 %56
+         %58 = OpAccessChain %_ptr_PushConstant_int %registers %int_0
+         %59 = OpLoad %int %58
+         %60 = OpAccessChain %_ptr_UniformConstant_8 %uSampler_1 %59
+         %61 = OpFunctionCall %v4float %sample_one_from_func_s21_ %60
+         %62 = OpFAdd %v4float %57 %61
+               OpStore %FragColor %62
+               OpReturn
+               OpFunctionEnd
+%sample_from_func_s21_4__ = OpFunction %v4float None %14
+   %uSampler = OpFunctionParameter %_ptr_UniformConstant__arr_8_uint_4
+         %17 = OpLabel
+         %29 = OpAccessChain %_ptr_PushConstant_int %registers %int_0
+         %30 = OpLoad %int %29
+         %31 = OpAccessChain %_ptr_UniformConstant_8 %uSampler %30
+         %32 = OpLoad %7 %31
+         %37 = OpImageFetch %v4float %32 %35 Lod %int_0
+               OpReturnValue %37
+               OpFunctionEnd
+%sample_one_from_func_s21_ = OpFunction %v4float None %19
+ %uSampler_0 = OpFunctionParameter %_ptr_UniformConstant_8
+         %22 = OpLabel
+         %40 = OpLoad %7 %uSampler_0
+         %42 = OpImageFetch %v4float %40 %35 Lod %int_0
+               OpReturnValue %42
+               OpFunctionEnd
diff --git a/shaders/asm/frag/sampler-buffer-without-sampler.asm.frag b/shaders/asm/frag/sampler-buffer-without-sampler.asm.frag
index 9c08fc2830..e6776eaf5e 100644
--- a/shaders/asm/frag/sampler-buffer-without-sampler.asm.frag
+++ b/shaders/asm/frag/sampler-buffer-without-sampler.asm.frag
@@ -5,6 +5,7 @@
 ; Schema: 0
                OpCapability Shader
                OpCapability SampledBuffer
+               OpCapability ImageBuffer
           %1 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical GLSL450
                OpEntryPoint Fragment %main "main" %_entryPointOutput
@@ -17,6 +18,8 @@
                OpName %_entryPointOutput "@entryPointOutput"
                OpDecorate %RWTex DescriptorSet 0
                OpDecorate %Tex DescriptorSet 0
+               OpDecorate %RWTex Binding 0
+               OpDecorate %Tex Binding 1
                OpDecorate %_entryPointOutput Location 0
        %void = OpTypeVoid
           %3 = OpTypeFunction %void
diff --git a/shaders/asm/frag/selection-merge-to-continue.asm.frag b/shaders/asm/frag/selection-merge-to-continue.asm.frag
new file mode 100644
index 0000000000..ecc491594f
--- /dev/null
+++ b/shaders/asm/frag/selection-merge-to-continue.asm.frag
@@ -0,0 +1,85 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 3
+; Bound: 55
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor %v0
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %FragColor "FragColor"
+               OpName %i "i"
+               OpName %v0 "v0"
+               OpDecorate %FragColor Location 0
+               OpDecorate %v0 Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+    %float_1 = OpConstant %float 1
+         %11 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+        %int = OpTypeInt 32 1
+%_ptr_Function_int = OpTypePointer Function %int
+      %int_0 = OpConstant %int 0
+      %int_4 = OpConstant %int 4
+       %bool = OpTypeBool
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+         %v0 = OpVariable %_ptr_Input_v4float Input
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_float = OpTypePointer Input %float
+   %float_20 = OpConstant %float 20
+      %int_3 = OpConstant %int 3
+      %int_1 = OpConstant %int 1
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+          %i = OpVariable %_ptr_Function_int Function
+               OpStore %FragColor %11
+               OpStore %i %int_0
+               OpBranch %16
+         %16 = OpLabel
+               OpLoopMerge %18 %19 None
+               OpBranch %20
+         %20 = OpLabel
+         %21 = OpLoad %int %i
+         %24 = OpSLessThan %bool %21 %int_4
+               OpBranchConditional %24 %17 %18
+         %17 = OpLabel
+         %30 = OpAccessChain %_ptr_Input_float %v0 %uint_0
+         %31 = OpLoad %float %30
+         %33 = OpFOrdEqual %bool %31 %float_20
+               OpSelectionMerge %19 None
+               OpBranchConditional %33 %34 %44
+         %34 = OpLabel
+         %36 = OpLoad %int %i
+         %38 = OpBitwiseAnd %int %36 %int_3
+         %39 = OpAccessChain %_ptr_Input_float %v0 %38
+         %40 = OpLoad %float %39
+         %41 = OpLoad %v4float %FragColor
+         %42 = OpCompositeConstruct %v4float %40 %40 %40 %40
+         %43 = OpFAdd %v4float %41 %42
+               OpStore %FragColor %43
+               OpBranch %19
+         %44 = OpLabel
+         %45 = OpLoad %int %i
+         %47 = OpBitwiseAnd %int %45 %int_1
+         %48 = OpAccessChain %_ptr_Input_float %v0 %47
+         %49 = OpLoad %float %48
+         %50 = OpLoad %v4float %FragColor
+         %51 = OpCompositeConstruct %v4float %49 %49 %49 %49
+         %52 = OpFAdd %v4float %50 %51
+               OpStore %FragColor %52
+               OpBranch %19
+         %19 = OpLabel
+         %53 = OpLoad %int %i
+         %54 = OpIAdd %int %53 %int_1
+               OpStore %i %54
+               OpBranch %16
+         %18 = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders/asm/frag/srem.asm.frag b/shaders/asm/frag/srem.asm.frag
new file mode 100644
index 0000000000..c6f8e27cbd
--- /dev/null
+++ b/shaders/asm/frag/srem.asm.frag
@@ -0,0 +1,43 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 2
+; Bound: 23
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor %vA %vB
+               OpExecutionMode %main OriginUpperLeft
+               OpSource ESSL 310
+               OpName %main "main"
+               OpName %FragColor "FragColor"
+               OpName %vA "vA"
+               OpName %vB "vB"
+               OpDecorate %FragColor RelaxedPrecision
+               OpDecorate %FragColor Location 0
+               OpDecorate %vA Flat
+               OpDecorate %vA Location 0
+               OpDecorate %vB Flat
+               OpDecorate %vB Location 1
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+        %int = OpTypeInt 32 1
+      %v4int = OpTypeVector %int 4
+%_ptr_Input_v4int = OpTypePointer Input %v4int
+         %vA = OpVariable %_ptr_Input_v4int Input
+         %vB = OpVariable %_ptr_Input_v4int Input
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %14 = OpLoad %v4int %vA
+         %16 = OpLoad %v4int %vB
+         %17 = OpLoad %v4int %vA
+         %18 = OpLoad %v4int %vB
+         %19 = OpSRem %v4int %17 %18
+         %20 = OpConvertSToF %v4float %19
+               OpStore %FragColor %20
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders/asm/frag/switch-merge-to-continue.asm.frag b/shaders/asm/frag/switch-merge-to-continue.asm.frag
new file mode 100644
index 0000000000..94ef5f538f
--- /dev/null
+++ b/shaders/asm/frag/switch-merge-to-continue.asm.frag
@@ -0,0 +1,85 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 3
+; Bound: 57
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor %v0
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %FragColor "FragColor"
+               OpName %i "i"
+               OpName %v0 "v0"
+               OpDecorate %FragColor Location 0
+               OpDecorate %v0 Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+    %float_1 = OpConstant %float 1
+         %11 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+        %int = OpTypeInt 32 1
+%_ptr_Function_int = OpTypePointer Function %int
+      %int_0 = OpConstant %int 0
+      %int_4 = OpConstant %int 4
+       %bool = OpTypeBool
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Output_float = OpTypePointer Output %float
+    %float_3 = OpConstant %float 3
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+      %int_1 = OpConstant %int 1
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+         %v0 = OpVariable %_ptr_Input_v4float Input
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+          %i = OpVariable %_ptr_Function_int Function
+               OpStore %FragColor %11
+               OpStore %i %int_0
+               OpBranch %16
+         %16 = OpLabel
+               OpLoopMerge %18 %19 None
+               OpBranch %20
+         %20 = OpLabel
+         %21 = OpLoad %int %i
+         %24 = OpSLessThan %bool %21 %int_4
+               OpBranchConditional %24 %17 %18
+         %17 = OpLabel
+         %25 = OpLoad %int %i
+               OpSelectionMerge %19 None
+               OpSwitch %25 %28 0 %26 1 %27
+         %28 = OpLabel
+         %46 = OpAccessChain %_ptr_Output_float %FragColor %uint_2
+         %47 = OpLoad %float %46
+         %48 = OpFAdd %float %47 %float_3
+         %49 = OpAccessChain %_ptr_Output_float %FragColor %uint_2
+               OpStore %49 %48
+               OpBranch %19
+         %26 = OpLabel
+         %33 = OpAccessChain %_ptr_Output_float %FragColor %uint_0
+         %34 = OpLoad %float %33
+         %35 = OpFAdd %float %34 %float_1
+         %36 = OpAccessChain %_ptr_Output_float %FragColor %uint_0
+               OpStore %36 %35
+               OpBranch %19
+         %27 = OpLabel
+         %40 = OpAccessChain %_ptr_Output_float %FragColor %uint_1
+         %41 = OpLoad %float %40
+         %42 = OpFAdd %float %41 %float_3
+         %43 = OpAccessChain %_ptr_Output_float %FragColor %uint_1
+               OpStore %43 %42
+               OpBranch %19
+         %19 = OpLabel
+         %52 = OpLoad %int %i
+         %54 = OpIAdd %int %52 %int_1
+               OpStore %i %54
+               OpBranch %16
+         %18 = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders/asm/frag/temporary-name-alias.asm.frag b/shaders/asm/frag/temporary-name-alias.asm.frag
new file mode 100644
index 0000000000..fc3c0c362b
--- /dev/null
+++ b/shaders/asm/frag/temporary-name-alias.asm.frag
@@ -0,0 +1,48 @@
+; SPIR-V
+; Version: 1.2
+; Generator: Khronos; 0
+; Bound: 51
+; Schema: 0
+               OpCapability Linkage
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %EntryPoint_Main "main"
+               OpExecutionMode %EntryPoint_Main OriginUpperLeft
+               OpSource Unknown 100
+               OpName %mat3 "mat3"
+               OpName %constituent "constituent"
+               OpName %constituent_0 "constituent"
+               OpName %constituent_1 "constituent"
+               OpName %constituent_2 "constituent"
+               OpName %constituent_3 "constituent"
+               OpName %constituent_4 "constituent"
+               OpName %constituent_5 "constituent"
+               OpName %constituent_6 "constituent"
+               OpName %EntryPoint_Main "EntryPoint_Main"
+       %void = OpTypeVoid
+%_ptr_Function_void = OpTypePointer Function %void
+      %float = OpTypeFloat 32
+        %int = OpTypeInt 32 1
+    %v3float = OpTypeVector %float 3
+%mat3v3float = OpTypeMatrix %v3float 3
+%_ptr_Function_mat3v3float = OpTypePointer Function %mat3v3float
+         %14 = OpTypeFunction %void
+      %int_0 = OpConstant %int 0
+      %int_1 = OpConstant %int 1
+%EntryPoint_Main = OpFunction %void None %14
+         %45 = OpLabel
+     %mat3 = OpVariable %_ptr_Function_mat3v3float Function
+%constituent = OpConvertSToF %float %int_0
+%constituent_0 = OpCompositeConstruct %v3float %constituent %constituent %constituent
+%constituent_1 = OpCompositeConstruct %v3float %constituent %constituent %constituent
+%constituent_2 = OpCompositeConstruct %v3float %constituent %constituent %constituent
+         %25 = OpCompositeConstruct %mat3v3float %constituent_0 %constituent_1 %constituent_2
+               OpStore %mat3 %25
+%constituent_3 = OpConvertSToF %float %int_1
+%constituent_4 = OpCompositeConstruct %v3float %constituent_3 %constituent_3 %constituent_3
+%constituent_5 = OpCompositeConstruct %v3float %constituent_3 %constituent_3 %constituent_3
+%constituent_6 = OpCompositeConstruct %v3float %constituent_3 %constituent_3 %constituent_3
+         %30 = OpCompositeConstruct %mat3v3float %constituent_4 %constituent_5 %constituent_6
+               OpStore %mat3 %30
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders/asm/frag/texel-fetch-no-lod.asm.frag b/shaders/asm/frag/texel-fetch-no-lod.asm.frag
new file mode 100644
index 0000000000..53dc63809c
--- /dev/null
+++ b/shaders/asm/frag/texel-fetch-no-lod.asm.frag
@@ -0,0 +1,46 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 6
+; Bound: 26
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor %gl_FragCoord
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %FragColor "FragColor"
+               OpName %uTexture "uTexture"
+               OpName %gl_FragCoord "gl_FragCoord"
+               OpDecorate %FragColor Location 0
+               OpDecorate %uTexture DescriptorSet 0
+               OpDecorate %uTexture Binding 0
+               OpDecorate %gl_FragCoord BuiltIn FragCoord
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+         %10 = OpTypeImage %float 2D 0 0 0 1 Unknown
+         %11 = OpTypeSampledImage %10
+%_ptr_UniformConstant_11 = OpTypePointer UniformConstant %11
+   %uTexture = OpVariable %_ptr_UniformConstant_11 UniformConstant
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+    %v2float = OpTypeVector %float 2
+        %int = OpTypeInt 32 1
+      %v2int = OpTypeVector %int 2
+      %int_0 = OpConstant %int 0
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %14 = OpLoad %11 %uTexture
+         %18 = OpLoad %v4float %gl_FragCoord
+         %19 = OpVectorShuffle %v2float %18 %18 0 1
+         %22 = OpConvertFToS %v2int %19
+         %24 = OpImage %10 %14
+         %25 = OpImageFetch %v4float %24 %22
+               OpStore %FragColor %25
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders/asm/geom/inout-split-access-chain-handle.asm.geom b/shaders/asm/geom/inout-split-access-chain-handle.asm.geom
new file mode 100644
index 0000000000..d011cc6967
--- /dev/null
+++ b/shaders/asm/geom/inout-split-access-chain-handle.asm.geom
@@ -0,0 +1,90 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 3
+; Bound: 42
+; Schema: 0
+               OpCapability Geometry
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Geometry %main "main" %gl_in %_
+               OpExecutionMode %main Triangles
+               OpExecutionMode %main Invocations 1
+               OpExecutionMode %main OutputTriangleStrip
+               OpExecutionMode %main OutputVertices 5
+               OpSource GLSL 440
+               OpName %main "main"
+               OpName %Data "Data"
+               OpMemberName %Data 0 "ApiPerspectivePosition"
+               OpName %Copy_struct_Data_vf41_3__ "Copy(struct-Data-vf41[3];"
+               OpName %inputStream "inputStream"
+               OpName %gl_PerVertex "gl_PerVertex"
+               OpMemberName %gl_PerVertex 0 "gl_Position"
+               OpMemberName %gl_PerVertex 1 "gl_PointSize"
+               OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
+               OpName %gl_in "gl_in"
+               OpName %inputStream_0 "inputStream"
+               OpName %param "param"
+               OpName %gl_PerVertex_0 "gl_PerVertex"
+               OpMemberName %gl_PerVertex_0 0 "gl_Position"
+               OpMemberName %gl_PerVertex_0 1 "gl_PointSize"
+               OpMemberName %gl_PerVertex_0 2 "gl_ClipDistance"
+               OpName %_ ""
+               OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+               OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
+               OpDecorate %gl_PerVertex Block
+               OpMemberDecorate %gl_PerVertex_0 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex_0 1 BuiltIn PointSize
+               OpMemberDecorate %gl_PerVertex_0 2 BuiltIn ClipDistance
+               OpDecorate %gl_PerVertex_0 Block
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+       %Data = OpTypeStruct %v4float
+       %uint = OpTypeInt 32 0
+     %uint_3 = OpConstant %uint 3
+%_arr_Data_uint_3 = OpTypeArray %Data %uint_3
+%_ptr_Function__Data = OpTypePointer Function %Data
+%_ptr_Function__arr_Data_uint_3 = OpTypePointer Function %_arr_Data_uint_3
+         %13 = OpTypeFunction %void %_ptr_Function__arr_Data_uint_3
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+     %uint_1 = OpConstant %uint 1
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1
+%_arr_gl_PerVertex_uint_3 = OpTypeArray %gl_PerVertex %uint_3
+%_ptr_Input__arr_gl_PerVertex_uint_3 = OpTypePointer Input %_arr_gl_PerVertex_uint_3
+      %gl_in = OpVariable %_ptr_Input__arr_gl_PerVertex_uint_3 Input
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%_ptr_Function_v4float = OpTypePointer Function %v4float
+%gl_PerVertex_0 = OpTypeStruct %v4float %float %_arr_float_uint_1
+%_ptr_Output_gl_PerVertex_0 = OpTypePointer Output %gl_PerVertex_0
+          %_ = OpVariable %_ptr_Output_gl_PerVertex_0 Output
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+%inputStream_0 = OpVariable %_ptr_Function__arr_Data_uint_3 Function
+      %param = OpVariable %_ptr_Function__arr_Data_uint_3 Function
+         %32 = OpLoad %_arr_Data_uint_3 %inputStream_0
+               OpStore %param %32
+         %33 = OpFunctionCall %void %Copy_struct_Data_vf41_3__ %param
+         %34 = OpLoad %_arr_Data_uint_3 %param
+               OpStore %inputStream_0 %34
+         %59 = OpAccessChain %_ptr_Function__Data %inputStream_0 %int_0
+         %38 = OpAccessChain %_ptr_Function_v4float %59 %int_0
+         %39 = OpLoad %v4float %38
+         %41 = OpAccessChain %_ptr_Output_v4float %_ %int_0
+               OpStore %41 %39
+               OpReturn
+               OpFunctionEnd
+%Copy_struct_Data_vf41_3__ = OpFunction %void None %13
+%inputStream = OpFunctionParameter %_ptr_Function__arr_Data_uint_3
+         %16 = OpLabel
+         %26 = OpAccessChain %_ptr_Input_v4float %gl_in %int_0 %int_0
+         %27 = OpLoad %v4float %26
+         %28 = OpAccessChain %_ptr_Function__Data %inputStream %int_0
+         %29 = OpAccessChain %_ptr_Function_v4float %28 %int_0
+               OpStore %29 %27
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders/asm/geom/split-access-chain-input.asm.geom b/shaders/asm/geom/split-access-chain-input.asm.geom
new file mode 100644
index 0000000000..1612c892c1
--- /dev/null
+++ b/shaders/asm/geom/split-access-chain-input.asm.geom
@@ -0,0 +1,52 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 3
+; Bound: 23
+; Schema: 0
+               OpCapability Geometry
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Geometry %main "main" %gl_in
+               OpExecutionMode %main Triangles
+               OpExecutionMode %main Invocations 1
+               OpExecutionMode %main OutputTriangleStrip
+               OpExecutionMode %main OutputVertices 3
+               OpSource GLSL 440
+               OpName %main "main"
+               OpName %position "position"
+               OpName %gl_PerVertex "gl_PerVertex"
+               OpMemberName %gl_PerVertex 0 "gl_Position"
+               OpMemberName %gl_PerVertex 1 "gl_PointSize"
+               OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
+               OpName %gl_in "gl_in"
+               OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+               OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
+               OpDecorate %gl_PerVertex Block
+			   OpDecorate %position BuiltIn Position
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Function_v4float = OpTypePointer Output %v4float
+       %uint = OpTypeInt 32 0
+     %uint_1 = OpConstant %uint 1
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1
+     %uint_3 = OpConstant %uint 3
+%_arr_gl_PerVertex_uint_3 = OpTypeArray %gl_PerVertex %uint_3
+%ptr_Input_gl_PerVertex = OpTypePointer Input %gl_PerVertex
+%_ptr_Input__arr_gl_PerVertex_uint_3 = OpTypePointer Input %_arr_gl_PerVertex_uint_3
+      %gl_in = OpVariable %_ptr_Input__arr_gl_PerVertex_uint_3 Input
+   %position = OpVariable %_ptr_Function_v4float Output
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %21 = OpAccessChain %ptr_Input_gl_PerVertex %gl_in %int_0
+         %22 = OpAccessChain %_ptr_Input_v4float %21 %int_0
+         %23 = OpLoad %v4float %22
+               OpStore %position %23
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders/asm/tesc/tess-fixed-input-array-builtin-array.asm.tesc b/shaders/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc
similarity index 100%
rename from shaders/asm/tesc/tess-fixed-input-array-builtin-array.asm.tesc
rename to shaders/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc
diff --git a/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert b/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert
new file mode 100644
index 0000000000..b566a3d1a0
--- /dev/null
+++ b/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert
@@ -0,0 +1,98 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 1
+; Bound: 58
+; Schema: 0
+               OpCapability Shader
+               OpCapability ClipDistance
+               OpCapability CullDistance
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Vertex %4 "main" %52 %output
+               OpSource GLSL 450
+               OpName %4 "main"
+               OpName %9 "pos"
+               OpName %50 "gl_PerVertex"
+               OpMemberName %50 0 "gl_Position"
+               OpMemberName %50 1 "gl_PointSize"
+               OpMemberName %50 2 "gl_ClipDistance"
+               OpMemberName %50 3 "gl_CullDistance"
+               OpName %52 ""
+               OpDecorate %13 SpecId 201
+               OpDecorate %24 SpecId 202
+               OpMemberDecorate %50 0 BuiltIn Position
+               OpMemberDecorate %50 1 BuiltIn PointSize
+               OpMemberDecorate %50 2 BuiltIn ClipDistance
+               OpMemberDecorate %50 3 BuiltIn CullDistance
+               OpDecorate %50 Block
+               OpDecorate %57 SpecId 200
+			   OpDecorate %output Flat
+			   OpDecorate %output Location 0
+          %2 = OpTypeVoid
+          %3 = OpTypeFunction %2
+          %6 = OpTypeFloat 32
+          %7 = OpTypeVector %6 4
+          %8 = OpTypePointer Function %7
+         %10 = OpConstant %6 0
+         %11 = OpConstantComposite %7 %10 %10 %10 %10
+         %12 = OpTypeInt 32 1
+		 %int_ptr = OpTypePointer Output %12
+         %13 = OpSpecConstant %12 -10
+         %14 = OpConstant %12 2
+         %15 = OpSpecConstantOp %12 IAdd %13 %14
+         %17 = OpTypeInt 32 0
+         %18 = OpConstant %17 1
+         %19 = OpTypePointer Function %6
+         %24 = OpSpecConstant %17 100
+         %25 = OpConstant %17 5
+         %26 = OpSpecConstantOp %17 UMod %24 %25
+         %28 = OpConstant %17 2
+         %33 = OpConstant %12 20
+         %34 = OpConstant %12 30
+         %35 = OpTypeVector %12 4
+         %36 = OpSpecConstantComposite %35 %33 %34 %15 %15
+         %40 = OpTypeVector %12 2
+         %41 = OpSpecConstantOp %40 VectorShuffle %36 %36 1 0
+		 %foo = OpSpecConstantOp %12 CompositeExtract %36 1
+         %42 = OpTypeVector %6 2
+         %49 = OpTypeArray %6 %18
+         %50 = OpTypeStruct %7 %6 %49 %49
+         %51 = OpTypePointer Output %50
+         %52 = OpVariable %51 Output
+		 %output = OpVariable %int_ptr Output
+         %53 = OpConstant %12 0
+         %55 = OpTypePointer Output %7
+         %57 = OpSpecConstant %6 3.14159
+          %4 = OpFunction %2 None %3
+          %5 = OpLabel
+          %9 = OpVariable %8 Function
+               OpStore %9 %11
+         %16 = OpConvertSToF %6 %15
+         %20 = OpAccessChain %19 %9 %18
+         %21 = OpLoad %6 %20
+         %22 = OpFAdd %6 %21 %16
+         %23 = OpAccessChain %19 %9 %18
+               OpStore %23 %22
+         %27 = OpConvertUToF %6 %26
+         %29 = OpAccessChain %19 %9 %28
+         %30 = OpLoad %6 %29
+         %31 = OpFAdd %6 %30 %27
+         %32 = OpAccessChain %19 %9 %28
+               OpStore %32 %31
+         %37 = OpConvertSToF %7 %36
+         %38 = OpLoad %7 %9
+         %39 = OpFAdd %7 %38 %37
+               OpStore %9 %39
+         %43 = OpConvertSToF %42 %41
+         %44 = OpLoad %7 %9
+         %45 = OpVectorShuffle %42 %44 %44 0 1
+         %46 = OpFAdd %42 %45 %43
+         %47 = OpLoad %7 %9
+         %48 = OpVectorShuffle %7 %47 %46 4 5 2 3
+               OpStore %9 %48
+         %54 = OpLoad %7 %9
+         %56 = OpAccessChain %55 %52 %53
+               OpStore %56 %54
+			   OpStore %output %foo
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders/comp/rmw-matrix.comp b/shaders/comp/rmw-matrix.comp
new file mode 100644
index 0000000000..c158ab4ddd
--- /dev/null
+++ b/shaders/comp/rmw-matrix.comp
@@ -0,0 +1,20 @@
+#version 310 es
+layout(local_size_x = 1) in;
+
+layout(std430, binding = 0) buffer SSBO
+{
+	float a;
+	vec4 b;
+	mat4 c;
+
+	float a1;
+	vec4 b1;
+	mat4 c1;
+};
+
+void main()
+{
+	a *= a1;
+	b *= b1;
+	c *= c1;
+}
diff --git a/shaders/comp/struct-packing.comp b/shaders/comp/struct-packing.comp
index 53a54e4927..7a1be0478d 100644
--- a/shaders/comp/struct-packing.comp
+++ b/shaders/comp/struct-packing.comp
@@ -44,7 +44,7 @@ struct Content
 	S4 m3s[8];
 };
 
-layout(binding = 1, std430) buffer SSBO1
+layout(binding = 1, std430) restrict buffer SSBO1
 {
     Content content;
     Content content1[2];
@@ -61,7 +61,7 @@ layout(binding = 1, std430) buffer SSBO1
     float array[];
 } ssbo_430;
 
-layout(binding = 0, std140) buffer SSBO0
+layout(binding = 0, std140) restrict buffer SSBO0
 {
     Content content;
     Content content1[2];
diff --git a/shaders/desktop-only/comp/fp64.desktop.comp b/shaders/desktop-only/comp/fp64.desktop.comp
index dd488a3077..2c2d5018d1 100644
--- a/shaders/desktop-only/comp/fp64.desktop.comp
+++ b/shaders/desktop-only/comp/fp64.desktop.comp
@@ -72,7 +72,7 @@ void main()
 	dvec3 e = cross(a.xyz, a.yzw);
 	a = faceforward(a, a, a);
 	a = reflect(a, a);
-	a = refract(a, a, a.x);
+	//a = refract(a, a, 1.45);
 
 	dmat4 l = matrixCompMult(amat, amat);
 	l = outerProduct(a, a);
diff --git a/shaders/desktop-only/frag/control-dependent-in-branch.desktop.frag b/shaders/desktop-only/frag/control-dependent-in-branch.desktop.frag
new file mode 100644
index 0000000000..7c75ffe1bd
--- /dev/null
+++ b/shaders/desktop-only/frag/control-dependent-in-branch.desktop.frag
@@ -0,0 +1,36 @@
+#version 450
+
+layout(location = 0) out vec4 FragColor;
+layout(binding = 0) uniform sampler2D uSampler;
+layout(location = 0) in vec4 vInput;
+
+void main()
+{
+	FragColor = vInput;
+	vec4 t = texture(uSampler, vInput.xy);
+	vec4 d0 = dFdx(vInput);
+	vec4 d1 = dFdy(vInput);
+	vec4 d2 = fwidth(vInput);
+	vec4 d3 = dFdxCoarse(vInput);
+	vec4 d4 = dFdyCoarse(vInput);
+	vec4 d5 = fwidthCoarse(vInput);
+	vec4 d6 = dFdxFine(vInput);
+	vec4 d7 = dFdyFine(vInput);
+	vec4 d8 = fwidthFine(vInput);
+	vec2 lod = textureQueryLod(uSampler, vInput.zw);
+	if (vInput.y > 10.0)
+	{
+		FragColor += t;
+		FragColor += d0;
+		FragColor += d1;
+		FragColor += d2;
+		FragColor += d3;
+		FragColor += d4;
+		FragColor += d5;
+		FragColor += d6;
+		FragColor += d7;
+		FragColor += d8;
+		FragColor += lod.xyxy;
+	}
+}
+
diff --git a/shaders/desktop-only/frag/dual-source-blending.desktop.frag b/shaders/desktop-only/frag/dual-source-blending.desktop.frag
new file mode 100644
index 0000000000..f322cf4c38
--- /dev/null
+++ b/shaders/desktop-only/frag/dual-source-blending.desktop.frag
@@ -0,0 +1,10 @@
+#version 450
+
+layout(location = 0, index = 0) out vec4 FragColor0;
+layout(location = 0, index = 1) out vec4 FragColor1;
+
+void main()
+{
+	FragColor0 = vec4(1.0);
+	FragColor1 = vec4(2.0);
+}
diff --git a/shaders/desktop-only/frag/fp16.desktop.frag b/shaders/desktop-only/frag/fp16.desktop.frag
new file mode 100644
index 0000000000..f3517a92f8
--- /dev/null
+++ b/shaders/desktop-only/frag/fp16.desktop.frag
@@ -0,0 +1,151 @@
+#version 450
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(location = 0) in float16_t v1;
+layout(location = 1) in f16vec2 v2;
+layout(location = 2) in f16vec3 v3;
+layout(location = 3) in f16vec4 v4;
+
+layout(location = 0) out float o1;
+layout(location = 1) out vec2 o2;
+layout(location = 2) out vec3 o3;
+layout(location = 3) out vec4 o4;
+
+f16mat2 test_mat2(f16vec2 a, f16vec2 b, f16vec2 c, f16vec2 d)
+{
+	return f16mat2(a, b) * f16mat2(c, d);
+}
+
+f16mat3 test_mat3(f16vec3 a, f16vec3 b, f16vec3 c, f16vec3 d, f16vec3 e, f16vec3 f)
+{
+	return f16mat3(a, b, c) * f16mat3(d, e, f);
+}
+
+void test_constants()
+{
+	float16_t a = 1.0hf;
+	float16_t b = 1.5hf;
+	float16_t c = -1.5hf; // Negatives
+	float16_t d = (0.0hf / 0.0hf); // NaN
+	float16_t e = (1.0hf / 0.0hf); // +Inf
+	float16_t f = (-1.0hf / 0.0hf); // -Inf
+	float16_t g = 1014.0hf; // Large.
+	float16_t h = 0.000001hf; // Denormal
+}
+
+float16_t test_result()
+{
+	return 1.0hf;
+}
+
+void test_conversions()
+{
+	float16_t one = test_result();
+	int a = int(one);
+	uint b = uint(one);
+	bool c = bool(one);
+	float d = float(one);
+	double e = double(one);
+	float16_t a2 = float16_t(a);
+	float16_t b2 = float16_t(b);
+	float16_t c2 = float16_t(c);
+	float16_t d2 = float16_t(d);
+	float16_t e2 = float16_t(e);
+}
+
+void test_builtins()
+{
+	f16vec4 res;
+	res = radians(v4);
+	res = degrees(v4);
+	res = sin(v4);
+	res = cos(v4);
+	res = tan(v4);
+	res = asin(v4);
+	res = atan(v4, v3.xyzz);
+	res = atan(v4);
+	res = sinh(v4);
+	res = cosh(v4);
+	res = tanh(v4);
+	res = asinh(v4);
+	res = acosh(v4);
+	res = atanh(v4);
+	res = pow(v4, v4);
+	res = exp(v4);
+	res = log(v4);
+	res = exp2(v4);
+	res = log2(v4);
+	res = sqrt(v4);
+	res = inversesqrt(v4);
+	res = abs(v4);
+	res = sign(v4);
+	res = floor(v4);
+	res = trunc(v4);
+	res = round(v4);
+	res = roundEven(v4);
+	res = ceil(v4);
+	res = fract(v4);
+	res = mod(v4, v4);
+	f16vec4 tmp;
+	res = modf(v4, tmp);
+	res = min(v4, v4);
+	res = max(v4, v4);
+	res = clamp(v4, v4, v4);
+	res = mix(v4, v4, v4);
+	res = mix(v4, v4, lessThan(v4, v4));
+	res = step(v4, v4);
+	res = smoothstep(v4, v4, v4);
+
+	bvec4 btmp = isnan(v4);
+	btmp = isinf(v4);
+	res = fma(v4, v4, v4);
+
+	ivec4 itmp;
+	res = frexp(v4, itmp);
+	res = ldexp(res, itmp);
+
+	uint pack0 = packFloat2x16(v4.xy);
+	uint pack1 = packFloat2x16(v4.zw);
+	res = f16vec4(unpackFloat2x16(pack0), unpackFloat2x16(pack1));
+
+	float16_t t0 = length(v4);
+	t0 = distance(v4, v4);
+	t0 = dot(v4, v4);
+	f16vec3 res3 = cross(v3, v3);
+	res = normalize(v4);
+	res = faceforward(v4, v4, v4);
+	res = reflect(v4, v4);
+	res = refract(v4, v4, v1);
+
+	btmp = lessThan(v4, v4);
+	btmp = lessThanEqual(v4, v4);
+	btmp = greaterThan(v4, v4);
+	btmp = greaterThanEqual(v4, v4);
+	btmp = equal(v4, v4);
+	btmp = notEqual(v4, v4);
+
+	res = dFdx(v4);
+	res = dFdy(v4);
+	res = dFdxFine(v4);
+	res = dFdyFine(v4);
+	res = dFdxCoarse(v4);
+	res = dFdyCoarse(v4);
+	res = fwidth(v4);
+	res = fwidthFine(v4);
+	res = fwidthCoarse(v4);
+
+	//res = interpolateAtCentroid(v4);
+	//res = interpolateAtSample(v4, 0);
+	//res = interpolateAtOffset(v4, f16vec2(0.1hf));
+}
+
+void main()
+{
+	// Basic matrix tests.
+	f16mat2 m0 = test_mat2(v2, v2, v3.xy, v3.xy);
+	f16mat3 m1 = test_mat3(v3, v3, v3, v4.xyz, v4.xyz, v4.yzw);
+
+	test_constants();
+	test_conversions();
+	test_builtins();
+}
diff --git a/shaders/frag/array-lut-no-loop-variable.frag b/shaders/frag/array-lut-no-loop-variable.frag
new file mode 100644
index 0000000000..3493e0cccc
--- /dev/null
+++ b/shaders/frag/array-lut-no-loop-variable.frag
@@ -0,0 +1,13 @@
+#version 310 es
+precision mediump float;
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) in vec4 v0;
+
+void main()
+{
+	float lut[5] = float[](1.0, 2.0, 3.0, 4.0, 5.0);
+	for (int i = 0; i < 4; i++, FragColor += lut[i])
+	{
+	}
+}
diff --git a/shaders/frag/gather-dref.frag b/shaders/frag/gather-dref.frag
new file mode 100644
index 0000000000..a8aac56cb5
--- /dev/null
+++ b/shaders/frag/gather-dref.frag
@@ -0,0 +1,11 @@
+#version 310 es
+precision mediump float;
+
+layout(binding = 0) uniform mediump sampler2DShadow uT;
+layout(location = 0) in vec3 vUV;
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+	FragColor = textureGather(uT, vUV.xy, vUV.z);
+}
diff --git a/shaders/frag/swizzle.frag b/shaders/frag/swizzle.frag
index 271ba6cb64..af22dd655d 100644
--- a/shaders/frag/swizzle.frag
+++ b/shaders/frag/swizzle.frag
@@ -1,7 +1,7 @@
 #version 310 es
 precision mediump float;
 
-layout(location = 0) uniform sampler2D samp;
+layout(binding = 0) uniform sampler2D samp;
 layout(location = 0) out vec4 FragColor;
 layout(location = 1) in vec3 vNormal;
 layout(location = 2) in vec2 vUV;
diff --git a/shaders/legacy/fragment/explicit-lod.legacy.frag b/shaders/legacy/fragment/explicit-lod.legacy.frag
index 5a2eeb9913..abe1ef2c30 100644
--- a/shaders/legacy/fragment/explicit-lod.legacy.frag
+++ b/shaders/legacy/fragment/explicit-lod.legacy.frag
@@ -2,7 +2,7 @@
 
 precision mediump float;
 
-uniform sampler2D tex;
+layout(binding = 0) uniform sampler2D tex;
 
 layout(location = 0) out vec4 FragColor;
 
diff --git a/shaders/legacy/vert/implicit-lod.legacy.vert b/shaders/legacy/vert/implicit-lod.legacy.vert
index 1f32faebdc..6065694274 100644
--- a/shaders/legacy/vert/implicit-lod.legacy.vert
+++ b/shaders/legacy/vert/implicit-lod.legacy.vert
@@ -1,6 +1,6 @@
 #version 310 es
 
-uniform sampler2D tex;
+layout(binding = 0) uniform sampler2D tex;
 
 void main()
 {
diff --git a/shaders/vert/read-from-row-major-array.vert b/shaders/vert/read-from-row-major-array.vert
new file mode 100644
index 0000000000..792fb8e36c
--- /dev/null
+++ b/shaders/vert/read-from-row-major-array.vert
@@ -0,0 +1,20 @@
+#version 310 es
+layout(location = 0) in highp vec4 a_position;
+layout(location = 0) out mediump float v_vtxResult;
+
+layout(set = 0, binding = 0, std140, row_major) uniform Block
+{
+	highp mat2x3 var[3][4];
+};
+
+mediump float compare_float    (highp float a, highp float b)  { return abs(a - b) < 0.05 ? 1.0 : 0.0; }
+mediump float compare_vec3     (highp vec3 a, highp vec3 b)    { return compare_float(a.x, b.x)*compare_float(a.y, b.y)*compare_float(a.z, b.z); }
+mediump float compare_mat2x3   (highp mat2x3 a, highp mat2x3 b){ return compare_vec3(a[0], b[0])*compare_vec3(a[1], b[1]); }
+
+void main (void)
+{
+	gl_Position = a_position;
+	mediump float result = 1.0;
+	result *= compare_mat2x3(var[0][0], mat2x3(2.0, 6.0, -6.0, 0.0, 5.0, 5.0));
+	v_vtxResult = result;
+}
diff --git a/shaders/vulkan/comp/subgroups.nocompat.invalid.vk.comp b/shaders/vulkan/comp/subgroups.nocompat.invalid.vk.comp
new file mode 100644
index 0000000000..68fc74f910
--- /dev/null
+++ b/shaders/vulkan/comp/subgroups.nocompat.invalid.vk.comp
@@ -0,0 +1,125 @@
+#version 450
+#extension GL_KHR_shader_subgroup_basic : require
+#extension GL_KHR_shader_subgroup_ballot : require
+#extension GL_KHR_shader_subgroup_vote : require
+#extension GL_KHR_shader_subgroup_shuffle : require
+#extension GL_KHR_shader_subgroup_shuffle_relative : require
+#extension GL_KHR_shader_subgroup_arithmetic : require
+#extension GL_KHR_shader_subgroup_clustered : require
+#extension GL_KHR_shader_subgroup_quad : require
+layout(local_size_x = 1) in;
+
+layout(std430, binding = 0) buffer SSBO
+{
+	float FragColor;
+};
+
+void main()
+{
+	// basic
+	FragColor = float(gl_NumSubgroups);
+	FragColor = float(gl_SubgroupID);
+	FragColor = float(gl_SubgroupSize);
+	FragColor = float(gl_SubgroupInvocationID);
+	subgroupBarrier();
+	subgroupMemoryBarrier();
+	subgroupMemoryBarrierBuffer();
+	subgroupMemoryBarrierShared();
+	subgroupMemoryBarrierImage();
+	bool elected = subgroupElect();
+
+	// ballot
+	FragColor = float(gl_SubgroupEqMask);
+	FragColor = float(gl_SubgroupGeMask);
+	FragColor = float(gl_SubgroupGtMask);
+	FragColor = float(gl_SubgroupLeMask);
+	FragColor = float(gl_SubgroupLtMask);
+	vec4 broadcasted = subgroupBroadcast(vec4(10.0), 8u);
+	vec3 first = subgroupBroadcastFirst(vec3(20.0));
+	uvec4 ballot_value = subgroupBallot(true);
+	bool inverse_ballot_value = subgroupInverseBallot(ballot_value);
+	bool bit_extracted = subgroupBallotBitExtract(uvec4(10u), 8u);
+	uint bit_count = subgroupBallotBitCount(ballot_value);
+	uint inclusive_bit_count = subgroupBallotInclusiveBitCount(ballot_value);
+	uint exclusive_bit_count = subgroupBallotExclusiveBitCount(ballot_value);
+	uint lsb = subgroupBallotFindLSB(ballot_value);
+	uint msb = subgroupBallotFindMSB(ballot_value);
+
+	// shuffle
+	uint shuffled = subgroupShuffle(10u, 8u);
+	uint shuffled_xor = subgroupShuffleXor(30u, 8u);
+
+	// shuffle relative 
+	uint shuffled_up = subgroupShuffleUp(20u, 4u);
+	uint shuffled_down = subgroupShuffleDown(20u, 4u);
+
+	// vote
+	bool has_all = subgroupAll(true);
+	bool has_any = subgroupAny(true);
+	bool has_equal = subgroupAllEqual(true);
+
+	// arithmetic
+	vec4 added = subgroupAdd(vec4(20.0));
+	ivec4 iadded = subgroupAdd(ivec4(20));
+	vec4 multiplied = subgroupMul(vec4(20.0));
+	ivec4 imultiplied = subgroupMul(ivec4(20));
+	vec4 lo = subgroupMin(vec4(20.0));
+	vec4 hi = subgroupMax(vec4(20.0));
+	ivec4 slo = subgroupMin(ivec4(20));
+	ivec4 shi = subgroupMax(ivec4(20));
+	uvec4 ulo = subgroupMin(uvec4(20));
+	uvec4 uhi = subgroupMax(uvec4(20));
+	uvec4 anded = subgroupAnd(ballot_value);
+	uvec4 ored = subgroupOr(ballot_value);
+	uvec4 xored = subgroupXor(ballot_value);
+
+	added = subgroupInclusiveAdd(added);
+	iadded = subgroupInclusiveAdd(iadded);
+	multiplied = subgroupInclusiveMul(multiplied);
+	imultiplied = subgroupInclusiveMul(imultiplied);
+	lo = subgroupInclusiveMin(lo);
+	hi = subgroupInclusiveMax(hi);
+	slo = subgroupInclusiveMin(slo);
+	shi = subgroupInclusiveMax(shi);
+	ulo = subgroupInclusiveMin(ulo);
+	uhi = subgroupInclusiveMax(uhi);
+	anded = subgroupInclusiveAnd(anded);
+	ored = subgroupInclusiveOr(ored);
+	xored = subgroupInclusiveXor(ored);
+	added = subgroupExclusiveAdd(lo);
+
+	added = subgroupExclusiveAdd(multiplied);
+	multiplied = subgroupExclusiveMul(multiplied);
+	iadded = subgroupExclusiveAdd(imultiplied);
+	imultiplied = subgroupExclusiveMul(imultiplied);
+	lo = subgroupExclusiveMin(lo);
+	hi = subgroupExclusiveMax(hi);
+	ulo = subgroupExclusiveMin(ulo);
+	uhi = subgroupExclusiveMax(uhi);
+	slo = subgroupExclusiveMin(slo);
+	shi = subgroupExclusiveMax(shi);
+	anded = subgroupExclusiveAnd(anded);
+	ored = subgroupExclusiveOr(ored);
+	xored = subgroupExclusiveXor(ored);
+
+	// clustered
+	added = subgroupClusteredAdd(added, 4u);
+	multiplied = subgroupClusteredMul(multiplied, 4u);
+	iadded = subgroupClusteredAdd(iadded, 4u);
+	imultiplied = subgroupClusteredMul(imultiplied, 4u);
+	lo = subgroupClusteredMin(lo, 4u);
+	hi = subgroupClusteredMax(hi, 4u);
+	ulo = subgroupClusteredMin(ulo, 4u);
+	uhi = subgroupClusteredMax(uhi, 4u);
+	slo = subgroupClusteredMin(slo, 4u);
+	shi = subgroupClusteredMax(shi, 4u);
+	anded = subgroupClusteredAnd(anded, 4u);
+	ored = subgroupClusteredOr(ored, 4u);
+	xored = subgroupClusteredXor(xored, 4u);
+
+	// quad
+	vec4 swap_horiz = subgroupQuadSwapHorizontal(vec4(20.0));
+	vec4 swap_vertical = subgroupQuadSwapVertical(vec4(20.0));
+	vec4 swap_diagonal = subgroupQuadSwapDiagonal(vec4(20.0));
+	vec4 quad_broadcast = subgroupQuadBroadcast(vec4(20.0), 3u);
+}
diff --git a/shaders/vulkan/frag/separate-combined-fake-overload.vk.frag b/shaders/vulkan/frag/separate-combined-fake-overload.vk.frag
new file mode 100644
index 0000000000..22d18a26a4
--- /dev/null
+++ b/shaders/vulkan/frag/separate-combined-fake-overload.vk.frag
@@ -0,0 +1,21 @@
+#version 450
+
+layout(location = 0) out vec4 FragColor;
+layout(binding = 0) uniform sampler2D uSamp;
+layout(binding = 1) uniform texture2D uT;
+layout(binding = 2) uniform sampler uS;
+
+vec4 samp(sampler2D uSamp)
+{
+	return texture(uSamp, vec2(0.5));
+}
+
+vec4 samp(texture2D T, sampler S)
+{
+	return texture(sampler2D(T, S), vec2(0.5));
+}
+
+void main()
+{
+	FragColor = samp(uSamp) + samp(uT, uS);
+}
diff --git a/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag b/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag
new file mode 100644
index 0000000000..b3501c1d8d
--- /dev/null
+++ b/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag
@@ -0,0 +1,42 @@
+#version 310 es
+precision mediump float;
+
+layout(set = 0, binding = 0) uniform mediump sampler uSampler;
+layout(set = 0, binding = 1) uniform mediump texture2D uTexture[4];
+layout(set = 0, binding = 2) uniform mediump texture3D uTexture3D[4];
+layout(set = 0, binding = 3) uniform mediump textureCube uTextureCube[4];
+layout(set = 0, binding = 4) uniform mediump texture2DArray uTextureArray[4];
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) in vec2 vTex;
+layout(location = 1) in vec3 vTex3;
+
+vec4 sample_func(mediump sampler samp, vec2 uv)
+{
+    return texture(sampler2D(uTexture[2], samp), uv);
+}
+
+vec4 sample_func_dual(mediump sampler samp, mediump texture2D tex, vec2 uv)
+{
+    return texture(sampler2D(tex, samp), uv);
+}
+
+vec4 sample_func_dual_array(mediump sampler samp, mediump texture2D tex[4], vec2 uv)
+{
+    return texture(sampler2D(tex[1], samp), uv);
+}
+
+void main()
+{
+    vec2 off = 1.0 / vec2(textureSize(sampler2D(uTexture[1], uSampler), 0));
+    vec2 off2 = 1.0 / vec2(textureSize(sampler2D(uTexture[2], uSampler), 1));
+
+    vec4 c0 = sample_func(uSampler, vTex + off + off2);
+    vec4 c1 = sample_func_dual(uSampler, uTexture[1], vTex + off + off2);
+    vec4 c2 = sample_func_dual_array(uSampler, uTexture, vTex + off + off2);
+    vec4 c3 = texture(sampler2DArray(uTextureArray[3], uSampler), vTex3);
+    vec4 c4 = texture(samplerCube(uTextureCube[1], uSampler), vTex3);
+    vec4 c5 = texture(sampler3D(uTexture3D[2], uSampler), vTex3);
+
+    FragColor = c0 + c1 + c2 + c3 + c4 + c5;
+}
diff --git a/spirv.hpp b/spirv.hpp
index efa1dbdf0e..1fc24fb288 100644
--- a/spirv.hpp
+++ b/spirv.hpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2014-2017 The Khronos Group Inc.
+// Copyright (c) 2014-2018 The Khronos Group Inc.
 // 
 // Permission is hereby granted, free of charge, to any person obtaining a copy
 // of this software and/or associated documentation files (the "Materials"),
@@ -46,12 +46,12 @@ namespace spv {
 
 typedef unsigned int Id;
 
-#define SPV_VERSION 0x10000
-#define SPV_REVISION 10
+#define SPV_VERSION 0x10300
+#define SPV_REVISION 1
 
 static const unsigned int MagicNumber = 0x07230203;
-static const unsigned int Version = 0x00010000;
-static const unsigned int Revision = 10;
+static const unsigned int Version = 0x00010300;
+static const unsigned int Revision = 1;
 static const unsigned int OpCodeMask = 0xffff;
 static const unsigned int WordCountShift = 16;
 
@@ -122,6 +122,15 @@ enum ExecutionMode {
     ExecutionModeOutputTriangleStrip = 29,
     ExecutionModeVecTypeHint = 30,
     ExecutionModeContractionOff = 31,
+    ExecutionModeInitializer = 33,
+    ExecutionModeFinalizer = 34,
+    ExecutionModeSubgroupSize = 35,
+    ExecutionModeSubgroupsPerWorkgroup = 36,
+    ExecutionModeSubgroupsPerWorkgroupId = 37,
+    ExecutionModeLocalSizeId = 38,
+    ExecutionModeLocalSizeHintId = 39,
+    ExecutionModePostDepthCoverage = 4446,
+    ExecutionModeStencilRefReplacingEXT = 5027,
     ExecutionModeMax = 0x7fffffff,
 };
 
@@ -376,10 +385,16 @@ enum Decoration {
     DecorationNoContraction = 42,
     DecorationInputAttachmentIndex = 43,
     DecorationAlignment = 44,
+    DecorationMaxByteOffset = 45,
+    DecorationAlignmentId = 46,
+    DecorationMaxByteOffsetId = 47,
+    DecorationExplicitInterpAMD = 4999,
     DecorationOverrideCoverageNV = 5248,
     DecorationPassthroughNV = 5250,
     DecorationViewportRelativeNV = 5252,
     DecorationSecondaryViewportRelativeNV = 5256,
+    DecorationHlslCounterBufferGOOGLE = 5634,
+    DecorationHlslSemanticGOOGLE = 5635,
     DecorationMax = 0x7fffffff,
 };
 
@@ -425,21 +440,35 @@ enum BuiltIn {
     BuiltInSubgroupLocalInvocationId = 41,
     BuiltInVertexIndex = 42,
     BuiltInInstanceIndex = 43,
+    BuiltInSubgroupEqMask = 4416,
     BuiltInSubgroupEqMaskKHR = 4416,
+    BuiltInSubgroupGeMask = 4417,
     BuiltInSubgroupGeMaskKHR = 4417,
+    BuiltInSubgroupGtMask = 4418,
     BuiltInSubgroupGtMaskKHR = 4418,
+    BuiltInSubgroupLeMask = 4419,
     BuiltInSubgroupLeMaskKHR = 4419,
+    BuiltInSubgroupLtMask = 4420,
     BuiltInSubgroupLtMaskKHR = 4420,
     BuiltInBaseVertex = 4424,
     BuiltInBaseInstance = 4425,
     BuiltInDrawIndex = 4426,
     BuiltInDeviceIndex = 4438,
     BuiltInViewIndex = 4440,
+    BuiltInBaryCoordNoPerspAMD = 4992,
+    BuiltInBaryCoordNoPerspCentroidAMD = 4993,
+    BuiltInBaryCoordNoPerspSampleAMD = 4994,
+    BuiltInBaryCoordSmoothAMD = 4995,
+    BuiltInBaryCoordSmoothCentroidAMD = 4996,
+    BuiltInBaryCoordSmoothSampleAMD = 4997,
+    BuiltInBaryCoordPullModelAMD = 4998,
+    BuiltInFragStencilRefEXT = 5014,
     BuiltInViewportMaskNV = 5253,
     BuiltInSecondaryPositionNV = 5257,
     BuiltInSecondaryViewportMaskNV = 5258,
     BuiltInPositionPerViewNV = 5261,
     BuiltInViewportMaskPerViewNV = 5262,
+    BuiltInFullyCoveredEXT = 5264,
     BuiltInMax = 0x7fffffff,
 };
 
@@ -458,6 +487,8 @@ enum SelectionControlMask {
 enum LoopControlShift {
     LoopControlUnrollShift = 0,
     LoopControlDontUnrollShift = 1,
+    LoopControlDependencyInfiniteShift = 2,
+    LoopControlDependencyLengthShift = 3,
     LoopControlMax = 0x7fffffff,
 };
 
@@ -465,6 +496,8 @@ enum LoopControlMask {
     LoopControlMaskNone = 0,
     LoopControlUnrollMask = 0x00000001,
     LoopControlDontUnrollMask = 0x00000002,
+    LoopControlDependencyInfiniteMask = 0x00000004,
+    LoopControlDependencyLengthMask = 0x00000008,
 };
 
 enum FunctionControlShift {
@@ -538,6 +571,7 @@ enum GroupOperation {
     GroupOperationReduce = 0,
     GroupOperationInclusiveScan = 1,
     GroupOperationExclusiveScan = 2,
+    GroupOperationClusteredReduce = 3,
     GroupOperationMax = 0x7fffffff,
 };
 
@@ -615,6 +649,17 @@ enum Capability {
     CapabilityStorageImageReadWithoutFormat = 55,
     CapabilityStorageImageWriteWithoutFormat = 56,
     CapabilityMultiViewport = 57,
+    CapabilitySubgroupDispatch = 58,
+    CapabilityNamedBarrier = 59,
+    CapabilityPipeStorage = 60,
+    CapabilityGroupNonUniform = 61,
+    CapabilityGroupNonUniformVote = 62,
+    CapabilityGroupNonUniformArithmetic = 63,
+    CapabilityGroupNonUniformBallot = 64,
+    CapabilityGroupNonUniformShuffle = 65,
+    CapabilityGroupNonUniformShuffleRelative = 66,
+    CapabilityGroupNonUniformClustered = 67,
+    CapabilityGroupNonUniformQuad = 68,
     CapabilitySubgroupBallotKHR = 4423,
     CapabilityDrawParameters = 4427,
     CapabilitySubgroupVoteKHR = 4431,
@@ -628,12 +673,24 @@ enum Capability {
     CapabilityMultiView = 4439,
     CapabilityVariablePointersStorageBuffer = 4441,
     CapabilityVariablePointers = 4442,
+    CapabilityAtomicStorageOps = 4445,
+    CapabilitySampleMaskPostDepthCoverage = 4447,
+    CapabilityFloat16ImageAMD = 5008,
+    CapabilityImageGatherBiasLodAMD = 5009,
+    CapabilityFragmentMaskAMD = 5010,
+    CapabilityStencilExportEXT = 5013,
+    CapabilityImageReadWriteLodAMD = 5015,
     CapabilitySampleMaskOverrideCoverageNV = 5249,
     CapabilityGeometryShaderPassthroughNV = 5251,
+    CapabilityShaderViewportIndexLayerEXT = 5254,
     CapabilityShaderViewportIndexLayerNV = 5254,
     CapabilityShaderViewportMaskNV = 5255,
     CapabilityShaderStereoViewNV = 5259,
     CapabilityPerViewAttributesNV = 5260,
+    CapabilityFragmentFullyCoveredEXT = 5265,
+    CapabilitySubgroupShuffleINTEL = 5568,
+    CapabilitySubgroupBufferBlockIOINTEL = 5569,
+    CapabilitySubgroupImageBlockIOINTEL = 5570,
     CapabilityMax = 0x7fffffff,
 };
 
@@ -932,6 +989,52 @@ enum Op {
     OpAtomicFlagTestAndSet = 318,
     OpAtomicFlagClear = 319,
     OpImageSparseRead = 320,
+    OpSizeOf = 321,
+    OpTypePipeStorage = 322,
+    OpConstantPipeStorage = 323,
+    OpCreatePipeFromPipeStorage = 324,
+    OpGetKernelLocalSizeForSubgroupCount = 325,
+    OpGetKernelMaxNumSubgroups = 326,
+    OpTypeNamedBarrier = 327,
+    OpNamedBarrierInitialize = 328,
+    OpMemoryNamedBarrier = 329,
+    OpModuleProcessed = 330,
+    OpExecutionModeId = 331,
+    OpDecorateId = 332,
+    OpGroupNonUniformElect = 333,
+    OpGroupNonUniformAll = 334,
+    OpGroupNonUniformAny = 335,
+    OpGroupNonUniformAllEqual = 336,
+    OpGroupNonUniformBroadcast = 337,
+    OpGroupNonUniformBroadcastFirst = 338,
+    OpGroupNonUniformBallot = 339,
+    OpGroupNonUniformInverseBallot = 340,
+    OpGroupNonUniformBallotBitExtract = 341,
+    OpGroupNonUniformBallotBitCount = 342,
+    OpGroupNonUniformBallotFindLSB = 343,
+    OpGroupNonUniformBallotFindMSB = 344,
+    OpGroupNonUniformShuffle = 345,
+    OpGroupNonUniformShuffleXor = 346,
+    OpGroupNonUniformShuffleUp = 347,
+    OpGroupNonUniformShuffleDown = 348,
+    OpGroupNonUniformIAdd = 349,
+    OpGroupNonUniformFAdd = 350,
+    OpGroupNonUniformIMul = 351,
+    OpGroupNonUniformFMul = 352,
+    OpGroupNonUniformSMin = 353,
+    OpGroupNonUniformUMin = 354,
+    OpGroupNonUniformFMin = 355,
+    OpGroupNonUniformSMax = 356,
+    OpGroupNonUniformUMax = 357,
+    OpGroupNonUniformFMax = 358,
+    OpGroupNonUniformBitwiseAnd = 359,
+    OpGroupNonUniformBitwiseOr = 360,
+    OpGroupNonUniformBitwiseXor = 361,
+    OpGroupNonUniformLogicalAnd = 362,
+    OpGroupNonUniformLogicalOr = 363,
+    OpGroupNonUniformLogicalXor = 364,
+    OpGroupNonUniformQuadBroadcast = 365,
+    OpGroupNonUniformQuadSwap = 366,
     OpSubgroupBallotKHR = 4421,
     OpSubgroupFirstInvocationKHR = 4422,
     OpSubgroupAllKHR = 4428,
@@ -948,6 +1051,16 @@ enum Op {
     OpGroupSMaxNonUniformAMD = 5007,
     OpFragmentMaskFetchAMD = 5011,
     OpFragmentFetchAMD = 5012,
+    OpSubgroupShuffleINTEL = 5571,
+    OpSubgroupShuffleDownINTEL = 5572,
+    OpSubgroupShuffleUpINTEL = 5573,
+    OpSubgroupShuffleXorINTEL = 5574,
+    OpSubgroupBlockReadINTEL = 5575,
+    OpSubgroupBlockWriteINTEL = 5576,
+    OpSubgroupImageBlockReadINTEL = 5577,
+    OpSubgroupImageBlockWriteINTEL = 5578,
+    OpDecorateStringGOOGLE = 5632,
+    OpMemberDecorateStringGOOGLE = 5633,
     OpMax = 0x7fffffff,
 };
 
diff --git a/spirv_cfg.cpp b/spirv_cfg.cpp
index c76698a740..aae31cae0c 100644
--- a/spirv_cfg.cpp
+++ b/spirv_cfg.cpp
@@ -226,4 +226,4 @@ void DominatorBuilder::lift_continue_block_dominator()
 	if (back_edge_dominator)
 		dominator = cfg.get_function().entry_block;
 }
-}
+} // namespace spirv_cross
diff --git a/spirv_cfg.hpp b/spirv_cfg.hpp
index 104e494c22..0df88e1034 100644
--- a/spirv_cfg.hpp
+++ b/spirv_cfg.hpp
@@ -114,6 +114,6 @@ private:
 	const CFG &cfg;
 	uint32_t dominator = 0;
 };
-}
+} // namespace spirv_cross
 
 #endif
diff --git a/spirv_common.hpp b/spirv_common.hpp
index b4d6ef1dca..c4716a2388 100644
--- a/spirv_common.hpp
+++ b/spirv_common.hpp
@@ -19,6 +19,7 @@
 
 #include "spirv.hpp"
 
+#include <algorithm>
 #include <cstdio>
 #include <cstring>
 #include <functional>
@@ -27,6 +28,7 @@
 #include <sstream>
 #include <stack>
 #include <stdexcept>
+#include <stdint.h>
 #include <string>
 #include <unordered_map>
 #include <unordered_set>
@@ -90,7 +92,126 @@ void join_helper(std::ostringstream &stream, T &&t, Ts &&... ts)
 	stream << std::forward<T>(t);
 	join_helper(stream, std::forward<Ts>(ts)...);
 }
-}
+} // namespace inner
+
+class Bitset
+{
+public:
+	Bitset() = default;
+	explicit inline Bitset(uint64_t lower_)
+	    : lower(lower_)
+	{
+	}
+
+	inline bool get(uint32_t bit) const
+	{
+		if (bit < 64)
+			return (lower & (1ull << bit)) != 0;
+		else
+			return higher.count(bit) != 0;
+	}
+
+	inline void set(uint32_t bit)
+	{
+		if (bit < 64)
+			lower |= 1ull << bit;
+		else
+			higher.insert(bit);
+	}
+
+	inline void clear(uint32_t bit)
+	{
+		if (bit < 64)
+			lower &= ~(1ull << bit);
+		else
+			higher.erase(bit);
+	}
+
+	inline uint64_t get_lower() const
+	{
+		return lower;
+	}
+
+	inline void reset()
+	{
+		lower = 0;
+		higher.clear();
+	}
+
+	inline void merge_and(const Bitset &other)
+	{
+		lower &= other.lower;
+		std::unordered_set<uint32_t> tmp_set;
+		for (auto &v : higher)
+			if (other.higher.count(v) != 0)
+				tmp_set.insert(v);
+		higher = std::move(tmp_set);
+	}
+
+	inline void merge_or(const Bitset &other)
+	{
+		lower |= other.lower;
+		for (auto &v : other.higher)
+			higher.insert(v);
+	}
+
+	inline bool operator==(const Bitset &other) const
+	{
+		if (lower != other.lower)
+			return false;
+
+		if (higher.size() != other.higher.size())
+			return false;
+
+		for (auto &v : higher)
+			if (other.higher.count(v) == 0)
+				return false;
+
+		return true;
+	}
+
+	inline bool operator!=(const Bitset &other) const
+	{
+		return !(*this == other);
+	}
+
+	template <typename Op>
+	void for_each_bit(const Op &op) const
+	{
+		// TODO: Add ctz-based iteration.
+		for (uint32_t i = 0; i < 64; i++)
+		{
+			if (lower & (1ull << i))
+				op(i);
+		}
+
+		if (higher.empty())
+			return;
+
+		// Need to enforce an order here for reproducible results,
+		// but hitting this path should happen extremely rarely, so having this slow path is fine.
+		std::vector<uint32_t> bits;
+		bits.reserve(higher.size());
+		for (auto &v : higher)
+			bits.push_back(v);
+		std::sort(std::begin(bits), std::end(bits));
+
+		for (auto &v : bits)
+			op(v);
+	}
+
+	inline bool empty() const
+	{
+		return lower == 0 && higher.empty();
+	}
+
+private:
+	// The most common bits to set are all lower than 64,
+	// so optimize for this case. Bits spilling outside 64 go into a slower data structure.
+	// In almost all cases, higher data structure will not be used.
+	uint64_t lower = 0;
+	std::unordered_set<uint32_t> higher;
+};
 
 // Helper template to avoid lots of nasty string temporary munging.
 template <typename... Ts>
@@ -263,6 +384,7 @@ struct SPIRType : IVariant
 		Int64,
 		UInt64,
 		AtomicCounter,
+		Half,
 		Float,
 		Double,
 		Struct,
@@ -361,7 +483,7 @@ struct SPIREntryPoint
 	std::string orig_name;
 	std::vector<uint32_t> interface_variables;
 
-	uint64_t flags = 0;
+	Bitset flags;
 	struct
 	{
 		uint32_t x = 0, y = 0, z = 0;
@@ -459,7 +581,8 @@ struct SPIRBlock : IVariant
 	enum Method
 	{
 		MergeToSelectForLoop,
-		MergeToDirectForLoop
+		MergeToDirectForLoop,
+		MergeToSelectContinueForLoop
 	};
 
 	enum ContinueBlockType
@@ -513,6 +636,10 @@ struct SPIRBlock : IVariant
 	// Used for handling complex continue blocks which have side effects.
 	std::vector<std::pair<uint32_t, uint32_t>> declare_temporary;
 
+	// Declare these temporaries, but only conditionally if this block turns out to be
+	// a complex loop header.
+	std::vector<std::pair<uint32_t, uint32_t>> potential_declare_temporary;
+
 	struct Case
 	{
 		uint32_t value;
@@ -539,6 +666,11 @@ struct SPIRBlock : IVariant
 	// fail to use a classic for-loop,
 	// we remove these variables, and fall back to regular variables outside the loop.
 	std::vector<uint32_t> loop_variables;
+
+	// Some expressions are control-flow dependent, i.e. any instruction which relies on derivatives or
+	// sub-group-like operations.
+	// Make sure that we only use these expressions in the original block.
+	std::vector<uint32_t> invalidate_expressions;
 };
 
 struct SPIRFunction : IVariant
@@ -611,6 +743,14 @@ struct SPIRFunction : IVariant
 		arguments.push_back({ parameter_type, id, 0u, 0u, alias_global_variable });
 	}
 
+	// Statements to be emitted when the function returns.
+	// Mostly used for lowering internal data structures onto flattened structures.
+	std::vector<std::string> fixup_statements_out;
+
+	// Statements to be emitted when the function begins.
+	// Mostly used for populating internal data structures from flattened structures.
+	std::vector<std::string> fixup_statements_in;
+
 	bool active = false;
 	bool flush_undeclared = true;
 	bool do_combined_parameters = true;
@@ -731,6 +871,7 @@ struct SPIRConstant : IVariant
 		// Workaround for MSVC 2013, initializing an array breaks.
 		ConstantVector()
 		{
+			memset(r, 0, sizeof(r));
 			for (unsigned i = 0; i < 4; i++)
 				id[i] = 0;
 		}
@@ -751,6 +892,57 @@ struct SPIRConstant : IVariant
 		}
 	};
 
+	static inline float f16_to_f32(uint16_t u16_value)
+	{
+		// Based on the GLM implementation.
+		int s = (u16_value >> 15) & 0x1;
+		int e = (u16_value >> 10) & 0x1f;
+		int m = (u16_value >> 0) & 0x3ff;
+
+		union {
+			float f32;
+			uint32_t u32;
+		} u;
+
+		if (e == 0)
+		{
+			if (m == 0)
+			{
+				u.u32 = uint32_t(s) << 31;
+				return u.f32;
+			}
+			else
+			{
+				while ((m & 0x400) == 0)
+				{
+					m <<= 1;
+					e--;
+				}
+
+				e++;
+				m &= ~0x400;
+			}
+		}
+		else if (e == 31)
+		{
+			if (m == 0)
+			{
+				u.u32 = (uint32_t(s) << 31) | 0x7f800000u;
+				return u.f32;
+			}
+			else
+			{
+				u.u32 = (uint32_t(s) << 31) | 0x7f800000u | (m << 13);
+				return u.f32;
+			}
+		}
+
+		e += 127 - 15;
+		m <<= 13;
+		u.u32 = (uint32_t(s) << 31) | (e << 23) | m;
+		return u.f32;
+	}
+
 	inline uint32_t specialization_constant_id(uint32_t col, uint32_t row) const
 	{
 		return m.c[col].id[row];
@@ -766,6 +958,16 @@ struct SPIRConstant : IVariant
 		return m.c[col].r[row].u32;
 	}
 
+	inline uint16_t scalar_u16(uint32_t col = 0, uint32_t row = 0) const
+	{
+		return uint16_t(m.c[col].r[row].u32 & 0xffffu);
+	}
+
+	inline float scalar_f16(uint32_t col = 0, uint32_t row = 0) const
+	{
+		return f16_to_f32(scalar_u16(col, row));
+	}
+
 	inline float scalar_f32(uint32_t col = 0, uint32_t row = 0) const
 	{
 		return m.c[col].r[row].f32;
@@ -808,7 +1010,7 @@ struct SPIRConstant : IVariant
 
 	inline void make_null(const SPIRType &constant_type_)
 	{
-		std::memset(&m, 0, sizeof(m));
+		m = {};
 		m.columns = constant_type_.columns;
 		for (auto &c : m.c)
 			c.vecsize = constant_type_.vecsize;
@@ -819,6 +1021,8 @@ struct SPIRConstant : IVariant
 	{
 	}
 
+	SPIRConstant() = default;
+
 	SPIRConstant(uint32_t constant_type_, const uint32_t *elements, uint32_t num_elements, bool specialized)
 	    : constant_type(constant_type_)
 	    , specialization(specialized)
@@ -882,9 +1086,11 @@ struct SPIRConstant : IVariant
 
 	uint32_t constant_type;
 	ConstantMatrix m;
-	bool specialization = false; // If this constant is a specialization constant (i.e. created with OpSpecConstant*).
-	bool is_used_as_array_length =
-	    false; // If this constant is used as an array length which creates specialization restrictions on some backends.
+
+	// If this constant is a specialization constant (i.e. created with OpSpecConstant*).
+	bool specialization = false;
+	// If this constant is used as an array length which creates specialization restrictions on some backends.
+	bool is_used_as_array_length = false;
 
 	// For composites which are constant arrays, etc.
 	std::vector<uint32_t> subconstants;
@@ -913,9 +1119,10 @@ public:
 	void set(std::unique_ptr<IVariant> val, uint32_t new_type)
 	{
 		holder = std::move(val);
-		if (type != TypeNone && type != new_type)
+		if (!allow_type_rewrite && type != TypeNone && type != new_type)
 			SPIRV_CROSS_THROW("Overwriting a variant with new type.");
 		type = new_type;
+		allow_type_rewrite = false;
 	}
 
 	template <typename T>
@@ -956,9 +1163,15 @@ public:
 		type = TypeNone;
 	}
 
+	void set_allow_type_rewrite()
+	{
+		allow_type_rewrite = true;
+	}
+
 private:
 	std::unique_ptr<IVariant> holder;
 	uint32_t type = TypeNone;
+	bool allow_type_rewrite = false;
 };
 
 template <typename T>
@@ -988,7 +1201,8 @@ struct Meta
 	{
 		std::string alias;
 		std::string qualified_alias;
-		uint64_t decoration_flags = 0;
+		std::string hlsl_semantic;
+		Bitset decoration_flags;
 		spv::BuiltIn builtin_type;
 		uint32_t location = 0;
 		uint32_t set = 0;
@@ -998,6 +1212,7 @@ struct Meta
 		uint32_t matrix_stride = 0;
 		uint32_t input_attachment = 0;
 		uint32_t spec_id = 0;
+		uint32_t index = 0;
 		bool builtin = false;
 	};
 
@@ -1014,6 +1229,11 @@ struct Meta
 	// is not a valid identifier in any high-level language.
 	std::string hlsl_magic_counter_buffer_name;
 	bool hlsl_magic_counter_buffer_candidate = false;
+
+	// For SPV_GOOGLE_hlsl_functionality1, this avoids the workaround.
+	bool hlsl_is_magic_counter_buffer = false;
+	// ID for the sibling counter buffer.
+	uint32_t hlsl_magic_counter_buffer = 0;
 };
 
 // A user callback that remaps the type of any variable.
@@ -1054,6 +1274,11 @@ public:
 private:
 	uint64_t h = 0xcbf29ce484222325ull;
 };
+
+static inline bool type_is_floating_point(const SPIRType &type)
+{
+	return type.basetype == SPIRType::Half || type.basetype == SPIRType::Float || type.basetype == SPIRType::Double;
 }
+} // namespace spirv_cross
 
 #endif
diff --git a/spirv_cpp.cpp b/spirv_cpp.cpp
index 9500195ccb..9302a07441 100644
--- a/spirv_cpp.cpp
+++ b/spirv_cpp.cpp
@@ -53,7 +53,7 @@ void CompilerCPP::emit_interface_block(const SPIRVariable &var)
 
 	string buffer_name;
 	auto flags = meta[type.self].decoration.decoration_flags;
-	if (flags & (1ull << DecorationBlock))
+	if (flags.get(DecorationBlock))
 	{
 		emit_block_struct(type);
 		buffer_name = to_name(type.self);
@@ -115,7 +115,7 @@ void CompilerCPP::emit_push_constant_block(const SPIRVariable &var)
 
 	auto &type = get<SPIRType>(var.basetype);
 	auto &flags = meta[var.self].decoration.decoration_flags;
-	if ((flags & (1ull << DecorationBinding)) || (flags & (1ull << DecorationDescriptorSet)))
+	if (flags.get(DecorationBinding) || flags.get(DecorationDescriptorSet))
 		SPIRV_CROSS_THROW("Push constant blocks cannot be compiled to GLSL with Binding or Set syntax. "
 		                  "Remap to location with reflection API first or disable these decorations.");
 
@@ -151,8 +151,8 @@ void CompilerCPP::emit_resources()
 		{
 			auto &type = id.get<SPIRType>();
 			if (type.basetype == SPIRType::Struct && type.array.empty() && !type.pointer &&
-			    (meta[type.self].decoration.decoration_flags &
-			     ((1ull << DecorationBlock) | (1ull << DecorationBufferBlock))) == 0)
+			    (!meta[type.self].decoration.decoration_flags.get(DecorationBlock) &&
+			     !meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock)))
 			{
 				emit_struct(type);
 			}
@@ -172,8 +172,8 @@ void CompilerCPP::emit_resources()
 
 			if (var.storage != StorageClassFunction && type.pointer && type.storage == StorageClassUniform &&
 			    !is_hidden_variable(var) &&
-			    (meta[type.self].decoration.decoration_flags &
-			     ((1ull << DecorationBlock) | (1ull << DecorationBufferBlock))))
+			    (meta[type.self].decoration.decoration_flags.get(DecorationBlock) ||
+			     meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock)))
 			{
 				emit_buffer_block(var);
 			}
@@ -317,7 +317,7 @@ string CompilerCPP::compile()
 		emit_header();
 		emit_resources();
 
-		emit_function(get<SPIRFunction>(entry_point), 0);
+		emit_function(get<SPIRFunction>(entry_point), Bitset());
 
 		pass_count++;
 	} while (force_recompile);
@@ -373,7 +373,7 @@ void CompilerCPP::emit_c_linkage()
 	end_scope();
 }
 
-void CompilerCPP::emit_function_prototype(SPIRFunction &func, uint64_t)
+void CompilerCPP::emit_function_prototype(SPIRFunction &func, const Bitset &)
 {
 	if (func.self != entry_point)
 		add_function_overload(func);
diff --git a/spirv_cpp.hpp b/spirv_cpp.hpp
index 57f815f3c0..a61dc8ec3c 100644
--- a/spirv_cpp.hpp
+++ b/spirv_cpp.hpp
@@ -51,7 +51,7 @@ public:
 private:
 	void emit_header() override;
 	void emit_c_linkage();
-	void emit_function_prototype(SPIRFunction &func, uint64_t return_flags) override;
+	void emit_function_prototype(SPIRFunction &func, const Bitset &return_flags) override;
 
 	void emit_resources();
 	void emit_buffer_block(const SPIRVariable &type) override;
@@ -72,6 +72,6 @@ private:
 
 	std::string interface_name;
 };
-}
+} // namespace spirv_cross
 
 #endif
diff --git a/spirv_cross.cpp b/spirv_cross.cpp
index 3342aaabe4..0402562e0b 100644
--- a/spirv_cross.cpp
+++ b/spirv_cross.cpp
@@ -101,10 +101,16 @@ bool Compiler::variable_storage_is_aliased(const SPIRVariable &v)
 {
 	auto &type = get<SPIRType>(v.basetype);
 	bool ssbo = v.storage == StorageClassStorageBuffer ||
-	            ((meta[type.self].decoration.decoration_flags & (1ull << DecorationBufferBlock)) != 0);
+	            meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
 	bool image = type.basetype == SPIRType::Image;
 	bool counter = type.basetype == SPIRType::AtomicCounter;
-	bool is_restrict = (meta[v.self].decoration.decoration_flags & (1ull << DecorationRestrict)) != 0;
+
+	bool is_restrict;
+	if (ssbo)
+		is_restrict = get_buffer_block_flags(v).get(DecorationRestrict);
+	else
+		is_restrict = has_decoration(v.self, DecorationRestrict);
+
 	return !is_restrict && (ssbo || image || counter);
 }
 
@@ -188,7 +194,12 @@ string Compiler::to_name(uint32_t id, bool allow_alias) const
 		// as that can be overridden by the reflection APIs after parse.
 		auto &type = get<SPIRType>(id);
 		if (type.type_alias)
-			return to_name(type.type_alias);
+		{
+			// If the alias master has been specially packed, we will have emitted a clean variant as well,
+			// so skip the name aliasing here.
+			if (!has_decoration(type.type_alias, DecorationCPacked))
+				return to_name(type.type_alias);
+		}
 	}
 
 	if (meta[id].decoration.alias.empty())
@@ -346,6 +357,14 @@ void Compiler::flush_all_atomic_capable_variables()
 	flush_all_aliased_variables();
 }
 
+void Compiler::flush_control_dependent_expressions(uint32_t block_id)
+{
+	auto &block = get<SPIRBlock>(block_id);
+	for (auto &expr : block.invalidate_expressions)
+		invalid_expressions.insert(expr);
+	block.invalidate_expressions.clear();
+}
+
 void Compiler::flush_all_active_variables()
 {
 	// Invalidate all temporaries we read from variables in this block since they were forwarded.
@@ -683,7 +702,7 @@ ShaderResources Compiler::get_shader_resources(const unordered_set<uint32_t> *ac
 		// Input
 		if (var.storage == StorageClassInput && interface_variable_exists_in_entry_point(var.self))
 		{
-			if (meta[type.self].decoration.decoration_flags & (1ull << DecorationBlock))
+			if (meta[type.self].decoration.decoration_flags.get(DecorationBlock))
 				res.stage_inputs.push_back(
 				    { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self) });
 			else
@@ -697,7 +716,7 @@ ShaderResources Compiler::get_shader_resources(const unordered_set<uint32_t> *ac
 		// Outputs
 		else if (var.storage == StorageClassOutput && interface_variable_exists_in_entry_point(var.self))
 		{
-			if (meta[type.self].decoration.decoration_flags & (1ull << DecorationBlock))
+			if (meta[type.self].decoration.decoration_flags.get(DecorationBlock))
 				res.stage_outputs.push_back(
 				    { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self) });
 			else
@@ -705,14 +724,14 @@ ShaderResources Compiler::get_shader_resources(const unordered_set<uint32_t> *ac
 		}
 		// UBOs
 		else if (type.storage == StorageClassUniform &&
-		         (meta[type.self].decoration.decoration_flags & (1ull << DecorationBlock)))
+		         (meta[type.self].decoration.decoration_flags.get(DecorationBlock)))
 		{
 			res.uniform_buffers.push_back(
 			    { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self) });
 		}
 		// Old way to declare SSBOs.
 		else if (type.storage == StorageClassUniform &&
-		         (meta[type.self].decoration.decoration_flags & (1ull << DecorationBufferBlock)))
+		         (meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock)))
 		{
 			res.storage_buffers.push_back(
 			    { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self) });
@@ -795,6 +814,7 @@ static bool is_valid_spirv_version(uint32_t version)
 	case 0x10000: // SPIR-V 1.0
 	case 0x10100: // SPIR-V 1.1
 	case 0x10200: // SPIR-V 1.2
+	case 0x10300: // SPIR-V 1.3
 		return true;
 
 	default:
@@ -802,6 +822,70 @@ static bool is_valid_spirv_version(uint32_t version)
 	}
 }
 
+bool Compiler::type_is_block_like(const SPIRType &type) const
+{
+	if (type.basetype != SPIRType::Struct)
+		return false;
+
+	if (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock))
+	{
+		return true;
+	}
+
+	// Block-like types may have Offset decorations.
+	for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
+		if (has_member_decoration(type.self, i, DecorationOffset))
+			return true;
+
+	return false;
+}
+
+void Compiler::fixup_type_alias()
+{
+	// Due to how some backends work, the "master" type of type_alias must be a block-like type if it exists.
+	// FIXME: Multiple alias types which are both block-like will be awkward, for now, it's best to just drop the type
+	// alias if the slave type is a block type.
+	for (auto &id : ids)
+	{
+		if (id.get_type() != TypeType)
+			continue;
+
+		auto &type = id.get<SPIRType>();
+
+		if (type.type_alias && type_is_block_like(type))
+		{
+			// Become the master.
+			for (auto &other_id : ids)
+			{
+				if (other_id.get_type() != TypeType)
+					continue;
+				if (other_id.get_id() == type.self)
+					continue;
+
+				auto &other_type = other_id.get<SPIRType>();
+				if (other_type.type_alias == type.type_alias)
+					other_type.type_alias = type.self;
+			}
+
+			get<SPIRType>(type.type_alias).type_alias = id.get_id();
+			type.type_alias = 0;
+		}
+	}
+
+	for (auto &id : ids)
+	{
+		if (id.get_type() != TypeType)
+			continue;
+
+		auto &type = id.get<SPIRType>();
+		if (type.type_alias && type_is_block_like(type))
+		{
+			// This is not allowed, drop the type_alias.
+			type.type_alias = 0;
+		}
+	}
+}
+
 void Compiler::parse()
 {
 	auto len = spirv.size();
@@ -853,19 +937,21 @@ void Compiler::parse()
 			}
 		}
 	}
+
+	fixup_type_alias();
 }
 
 void Compiler::flatten_interface_block(uint32_t id)
 {
 	auto &var = get<SPIRVariable>(id);
 	auto &type = get<SPIRType>(var.basetype);
-	auto flags = meta.at(type.self).decoration.decoration_flags;
+	auto &flags = meta.at(type.self).decoration.decoration_flags;
 
 	if (!type.array.empty())
 		SPIRV_CROSS_THROW("Type is array of UBOs.");
 	if (type.basetype != SPIRType::Struct)
 		SPIRV_CROSS_THROW("Type is not a struct.");
-	if ((flags & (1ull << DecorationBlock)) == 0)
+	if (!flags.get(DecorationBlock))
 		SPIRV_CROSS_THROW("Type is not a block.");
 	if (type.member_types.empty())
 		SPIRV_CROSS_THROW("Member list of struct is empty.");
@@ -912,12 +998,27 @@ void Compiler::update_name_cache(unordered_set<string> &cache, string &name)
 	uint32_t counter = 0;
 	auto tmpname = name;
 
+	bool use_linked_underscore = true;
+
+	if (tmpname == "_")
+	{
+		// We cannot just append numbers, as we will end up creating internally reserved names.
+		// Make it like _0_<counter> instead.
+		tmpname += "0";
+	}
+	else if (tmpname.back() == '_')
+	{
+		// The last_character is an underscore, so we don't need to link in underscore.
+		// This would violate double underscore rules.
+		use_linked_underscore = false;
+	}
+
 	// If there is a collision (very rare),
 	// keep tacking on extra identifier until it's unique.
 	do
 	{
 		counter++;
-		name = tmpname + "_" + convert_to_string(counter);
+		name = tmpname + (use_linked_underscore ? "_" : "") + convert_to_string(counter);
 	} while (cache.find(name) != end(cache));
 	cache.insert(name);
 }
@@ -932,6 +1033,7 @@ void Compiler::set_name(uint32_t id, const std::string &name)
 
 	// glslang uses identifiers to pass along meaningful information
 	// about HLSL reflection.
+	// FIXME: This should be deprecated eventually.
 	auto &m = meta.at(id);
 	if (source.hlsl && name.size() >= 6 && name.find("@count") == name.size() - 6)
 	{
@@ -961,11 +1063,29 @@ const SPIRType &Compiler::get_type_from_variable(uint32_t id) const
 	return get<SPIRType>(get<SPIRVariable>(id).basetype);
 }
 
+void Compiler::set_member_decoration_string(uint32_t id, uint32_t index, spv::Decoration decoration,
+                                            const std::string &argument)
+{
+	meta.at(id).members.resize(max(meta[id].members.size(), size_t(index) + 1));
+	auto &dec = meta.at(id).members[index];
+	dec.decoration_flags.set(decoration);
+
+	switch (decoration)
+	{
+	case DecorationHlslSemanticGOOGLE:
+		dec.hlsl_semantic = argument;
+		break;
+
+	default:
+		break;
+	}
+}
+
 void Compiler::set_member_decoration(uint32_t id, uint32_t index, Decoration decoration, uint32_t argument)
 {
 	meta.at(id).members.resize(max(meta[id].members.size(), size_t(index) + 1));
 	auto &dec = meta.at(id).members[index];
-	dec.decoration_flags |= 1ull << decoration;
+	dec.decoration_flags.set(decoration);
 
 	switch (decoration)
 	{
@@ -994,6 +1114,10 @@ void Compiler::set_member_decoration(uint32_t id, uint32_t index, Decoration dec
 		dec.matrix_stride = argument;
 		break;
 
+	case DecorationIndex:
+		dec.index = argument;
+		break;
+
 	default:
 		break;
 	}
@@ -1051,7 +1175,7 @@ uint32_t Compiler::get_member_decoration(uint32_t id, uint32_t index, Decoration
 		return 0;
 
 	auto &dec = m.members[index];
-	if (!(dec.decoration_flags & (1ull << decoration)))
+	if (!dec.decoration_flags.get(decoration))
 		return 0;
 
 	switch (decoration)
@@ -1066,23 +1190,33 @@ uint32_t Compiler::get_member_decoration(uint32_t id, uint32_t index, Decoration
 		return dec.offset;
 	case DecorationSpecId:
 		return dec.spec_id;
+	case DecorationIndex:
+		return dec.index;
 	default:
 		return 1;
 	}
 }
 
 uint64_t Compiler::get_member_decoration_mask(uint32_t id, uint32_t index) const
+{
+	return get_member_decoration_bitset(id, index).get_lower();
+}
+
+const Bitset &Compiler::get_member_decoration_bitset(uint32_t id, uint32_t index) const
 {
 	auto &m = meta.at(id);
 	if (index >= m.members.size())
-		return 0;
+	{
+		static const Bitset cleared = {};
+		return cleared;
+	}
 
 	return m.members[index].decoration_flags;
 }
 
 bool Compiler::has_member_decoration(uint32_t id, uint32_t index, Decoration decoration) const
 {
-	return get_member_decoration_mask(id, index) & (1ull << decoration);
+	return get_member_decoration_bitset(id, index).get(decoration);
 }
 
 void Compiler::unset_member_decoration(uint32_t id, uint32_t index, Decoration decoration)
@@ -1093,7 +1227,7 @@ void Compiler::unset_member_decoration(uint32_t id, uint32_t index, Decoration d
 
 	auto &dec = m.members[index];
 
-	dec.decoration_flags &= ~(1ull << decoration);
+	dec.decoration_flags.clear(decoration);
 	switch (decoration)
 	{
 	case DecorationBuiltIn:
@@ -1112,6 +1246,26 @@ void Compiler::unset_member_decoration(uint32_t id, uint32_t index, Decoration d
 		dec.spec_id = 0;
 		break;
 
+	case DecorationHlslSemanticGOOGLE:
+		dec.hlsl_semantic.clear();
+		break;
+
+	default:
+		break;
+	}
+}
+
+void Compiler::set_decoration_string(uint32_t id, spv::Decoration decoration, const std::string &argument)
+{
+	auto &dec = meta.at(id).decoration;
+	dec.decoration_flags.set(decoration);
+
+	switch (decoration)
+	{
+	case DecorationHlslSemanticGOOGLE:
+		dec.hlsl_semantic = argument;
+		break;
+
 	default:
 		break;
 	}
@@ -1120,7 +1274,7 @@ void Compiler::unset_member_decoration(uint32_t id, uint32_t index, Decoration d
 void Compiler::set_decoration(uint32_t id, Decoration decoration, uint32_t argument)
 {
 	auto &dec = meta.at(id).decoration;
-	dec.decoration_flags |= 1ull << decoration;
+	dec.decoration_flags.set(decoration);
 
 	switch (decoration)
 	{
@@ -1161,6 +1315,15 @@ void Compiler::set_decoration(uint32_t id, Decoration decoration, uint32_t argum
 		dec.spec_id = argument;
 		break;
 
+	case DecorationIndex:
+		dec.index = argument;
+		break;
+
+	case DecorationHlslCounterBufferGOOGLE:
+		meta.at(id).hlsl_magic_counter_buffer = argument;
+		meta.at(argument).hlsl_is_magic_counter_buffer = true;
+		break;
+
 	default:
 		break;
 	}
@@ -1191,6 +1354,11 @@ const std::string Compiler::get_block_fallback_name(uint32_t id) const
 }
 
 uint64_t Compiler::get_decoration_mask(uint32_t id) const
+{
+	return get_decoration_bitset(id).get_lower();
+}
+
+const Bitset &Compiler::get_decoration_bitset(uint32_t id) const
 {
 	auto &dec = meta.at(id).decoration;
 	return dec.decoration_flags;
@@ -1198,13 +1366,31 @@ uint64_t Compiler::get_decoration_mask(uint32_t id) const
 
 bool Compiler::has_decoration(uint32_t id, Decoration decoration) const
 {
-	return get_decoration_mask(id) & (1ull << decoration);
+	return get_decoration_bitset(id).get(decoration);
+}
+
+const string &Compiler::get_decoration_string(uint32_t id, spv::Decoration decoration) const
+{
+	auto &dec = meta.at(id).decoration;
+	static const string empty;
+
+	if (!dec.decoration_flags.get(decoration))
+		return empty;
+
+	switch (decoration)
+	{
+	case DecorationHlslSemanticGOOGLE:
+		return dec.hlsl_semantic;
+
+	default:
+		return empty;
+	}
 }
 
 uint32_t Compiler::get_decoration(uint32_t id, Decoration decoration) const
 {
 	auto &dec = meta.at(id).decoration;
-	if (!(dec.decoration_flags & (1ull << decoration)))
+	if (!dec.decoration_flags.get(decoration))
 		return 0;
 
 	switch (decoration)
@@ -1227,6 +1413,8 @@ uint32_t Compiler::get_decoration(uint32_t id, Decoration decoration) const
 		return dec.array_stride;
 	case DecorationMatrixStride:
 		return dec.matrix_stride;
+	case DecorationIndex:
+		return dec.index;
 	default:
 		return 1;
 	}
@@ -1235,7 +1423,7 @@ uint32_t Compiler::get_decoration(uint32_t id, Decoration decoration) const
 void Compiler::unset_decoration(uint32_t id, Decoration decoration)
 {
 	auto &dec = meta.at(id).decoration;
-	dec.decoration_flags &= ~(1ull << decoration);
+	dec.decoration_flags.clear(decoration);
 	switch (decoration)
 	{
 	case DecorationBuiltIn:
@@ -1266,6 +1454,21 @@ void Compiler::unset_decoration(uint32_t id, Decoration decoration)
 		dec.spec_id = 0;
 		break;
 
+	case DecorationHlslSemanticGOOGLE:
+		dec.hlsl_semantic.clear();
+		break;
+
+	case DecorationHlslCounterBufferGOOGLE:
+	{
+		auto &counter = meta.at(id).hlsl_magic_counter_buffer;
+		if (counter)
+		{
+			meta.at(counter).hlsl_is_magic_counter_buffer = false;
+			counter = 0;
+		}
+		break;
+	}
+
 	default:
 		break;
 	}
@@ -1403,7 +1606,7 @@ void Compiler::parse(const Instruction &instruction)
 	{
 		auto &execution = entry_points[ops[0]];
 		auto mode = static_cast<ExecutionMode>(ops[1]);
-		execution.flags |= 1ull << mode;
+		execution.flags.set(mode);
 
 		switch (mode)
 		{
@@ -1443,6 +1646,7 @@ void Compiler::parse(const Instruction &instruction)
 	}
 
 	case OpDecorate:
+	case OpDecorateId:
 	{
 		uint32_t id = ops[0];
 
@@ -1458,6 +1662,14 @@ void Compiler::parse(const Instruction &instruction)
 		break;
 	}
 
+	case OpDecorateStringGOOGLE:
+	{
+		uint32_t id = ops[0];
+		auto decoration = static_cast<Decoration>(ops[1]);
+		set_decoration_string(id, decoration, extract_string(spirv, instruction.offset + 2));
+		break;
+	}
+
 	case OpMemberDecorate:
 	{
 		uint32_t id = ops[0];
@@ -1470,6 +1682,15 @@ void Compiler::parse(const Instruction &instruction)
 		break;
 	}
 
+	case OpMemberDecorateStringGOOGLE:
+	{
+		uint32_t id = ops[0];
+		uint32_t member = ops[1];
+		auto decoration = static_cast<Decoration>(ops[2]);
+		set_member_decoration_string(id, member, decoration, extract_string(spirv, instruction.offset + 3));
+		break;
+	}
+
 	// Build up basic types.
 	case OpTypeVoid:
 	{
@@ -1493,7 +1714,14 @@ void Compiler::parse(const Instruction &instruction)
 		uint32_t id = ops[0];
 		uint32_t width = ops[1];
 		auto &type = set<SPIRType>(id);
-		type.basetype = width > 32 ? SPIRType::Double : SPIRType::Float;
+		if (width == 64)
+			type.basetype = SPIRType::Double;
+		else if (width == 32)
+			type.basetype = SPIRType::Float;
+		else if (width == 16)
+			type.basetype = SPIRType::Half;
+		else
+			SPIRV_CROSS_THROW("Unrecognized bit-width of floating point type.");
 		type.width = width;
 		break;
 	}
@@ -1810,9 +2038,28 @@ void Compiler::parse(const Instruction &instruction)
 			if (elements > 4)
 				SPIRV_CROSS_THROW("OpConstantComposite only supports 1, 2, 3 and 4 elements.");
 
+			SPIRConstant remapped_constant_ops[4];
 			const SPIRConstant *c[4];
 			for (uint32_t i = 0; i < elements; i++)
-				c[i] = &get<SPIRConstant>(ops[2 + i]);
+			{
+				// Specialization constants operations can also be part of this.
+				// We do not know their value, so any attempt to query SPIRConstant later
+				// will fail. We can only propagate the ID of the expression and use to_expression on it.
+				auto *constant_op = maybe_get<SPIRConstantOp>(ops[2 + i]);
+				if (constant_op)
+				{
+					if (op == OpConstantComposite)
+						SPIRV_CROSS_THROW("Specialization constant operation used in OpConstantComposite.");
+
+					remapped_constant_ops[i].make_null(get<SPIRType>(constant_op->basetype));
+					remapped_constant_ops[i].self = constant_op->self;
+					remapped_constant_ops[i].constant_type = constant_op->basetype;
+					remapped_constant_ops[i].specialization = true;
+					c[i] = &remapped_constant_ops[i];
+				}
+				else
+					c[i] = &get<SPIRConstant>(ops[2 + i]);
+			}
 			set<SPIRConstant>(id, type, c, elements, op == OpSpecConstantComposite);
 		}
 		break;
@@ -2030,7 +2277,7 @@ bool Compiler::block_is_loop_candidate(const SPIRBlock &block, SPIRBlock::Method
 	if (block.disable_block_optimization || block.complex_continue)
 		return false;
 
-	if (method == SPIRBlock::MergeToSelectForLoop)
+	if (method == SPIRBlock::MergeToSelectForLoop || method == SPIRBlock::MergeToSelectContinueForLoop)
 	{
 		// Try to detect common for loop pattern
 		// which the code backend can use to create cleaner code.
@@ -2040,6 +2287,9 @@ bool Compiler::block_is_loop_candidate(const SPIRBlock &block, SPIRBlock::Method
 		           block.true_block != block.merge_block && block.true_block != block.self &&
 		           block.false_block == block.merge_block;
 
+		if (ret && method == SPIRBlock::MergeToSelectContinueForLoop)
+			ret = block.true_block == block.continue_block;
+
 		// If we have OpPhi which depends on branches which came from our own block,
 		// we need to flush phi variables in else block instead of a trivial break,
 		// so we cannot assume this is a for loop candidate.
@@ -2237,7 +2487,7 @@ uint32_t Compiler::type_struct_member_offset(const SPIRType &type, uint32_t inde
 {
 	// Decoration must be set in valid SPIR-V, otherwise throw.
 	auto &dec = meta[type.self].members.at(index);
-	if (dec.decoration_flags & (1ull << DecorationOffset))
+	if (dec.decoration_flags.get(DecorationOffset))
 		return dec.offset;
 	else
 		SPIRV_CROSS_THROW("Struct member does not have Offset set.");
@@ -2248,7 +2498,7 @@ uint32_t Compiler::type_struct_member_array_stride(const SPIRType &type, uint32_
 	// Decoration must be set in valid SPIR-V, otherwise throw.
 	// ArrayStride is part of the array type not OpMemberDecorate.
 	auto &dec = meta[type.member_types[index]].decoration;
-	if (dec.decoration_flags & (1ull << DecorationArrayStride))
+	if (dec.decoration_flags.get(DecorationArrayStride))
 		return dec.array_stride;
 	else
 		SPIRV_CROSS_THROW("Struct member does not have ArrayStride set.");
@@ -2259,7 +2509,7 @@ uint32_t Compiler::type_struct_member_matrix_stride(const SPIRType &type, uint32
 	// Decoration must be set in valid SPIR-V, otherwise throw.
 	// MatrixStride is part of OpMemberDecorate.
 	auto &dec = meta[type.self].members[index];
-	if (dec.decoration_flags & (1ull << DecorationMatrixStride))
+	if (dec.decoration_flags.get(DecorationMatrixStride))
 		return dec.matrix_stride;
 	else
 		SPIRV_CROSS_THROW("Struct member does not have MatrixStride set.");
@@ -2267,6 +2517,9 @@ uint32_t Compiler::type_struct_member_matrix_stride(const SPIRType &type, uint32
 
 size_t Compiler::get_declared_struct_size(const SPIRType &type) const
 {
+	if (type.member_types.empty())
+		SPIRV_CROSS_THROW("Declared struct in block cannot be empty.");
+
 	uint32_t last = uint32_t(type.member_types.size() - 1);
 	size_t offset = type_struct_member_offset(type, last);
 	size_t size = get_declared_struct_member_size(type, last);
@@ -2275,7 +2528,10 @@ size_t Compiler::get_declared_struct_size(const SPIRType &type) const
 
 size_t Compiler::get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const
 {
-	auto flags = get_member_decoration_mask(struct_type.self, index);
+	if (struct_type.member_types.empty())
+		SPIRV_CROSS_THROW("Declared struct in block cannot be empty.");
+
+	auto &flags = get_member_decoration_bitset(struct_type.self, index);
 	auto &type = get<SPIRType>(struct_type.member_types[index]);
 
 	switch (type.basetype)
@@ -2320,9 +2576,9 @@ size_t Compiler::get_declared_struct_member_size(const SPIRType &struct_type, ui
 			uint32_t matrix_stride = type_struct_member_matrix_stride(struct_type, index);
 
 			// Per SPIR-V spec, matrices must be tightly packed and aligned up for vec3 accesses.
-			if (flags & (1ull << DecorationRowMajor))
+			if (flags.get(DecorationRowMajor))
 				return matrix_stride * vecsize;
-			else if (flags & (1ull << DecorationColMajor))
+			else if (flags.get(DecorationColMajor))
 				return matrix_stride * columns;
 			else
 				SPIRV_CROSS_THROW("Either row-major or column-major must be declared for matrices.");
@@ -2430,6 +2686,11 @@ bool Compiler::types_are_logically_equivalent(const SPIRType &a, const SPIRType
 }
 
 uint64_t Compiler::get_execution_mode_mask() const
+{
+	return get_entry_point().flags.get_lower();
+}
+
+const Bitset &Compiler::get_execution_mode_bitset() const
 {
 	return get_entry_point().flags;
 }
@@ -2438,7 +2699,7 @@ void Compiler::set_execution_mode(ExecutionMode mode, uint32_t arg0, uint32_t ar
 {
 	auto &execution = get_entry_point();
 
-	execution.flags |= 1ull << mode;
+	execution.flags.set(mode);
 	switch (mode)
 	{
 	case ExecutionModeLocalSize:
@@ -2463,7 +2724,7 @@ void Compiler::set_execution_mode(ExecutionMode mode, uint32_t arg0, uint32_t ar
 void Compiler::unset_execution_mode(ExecutionMode mode)
 {
 	auto &execution = get_entry_point();
-	execution.flags &= ~(1ull << mode);
+	execution.flags.clear(mode);
 }
 
 uint32_t Compiler::get_work_group_size_specialization_constants(SpecializationConstant &x, SpecializationConstant &y,
@@ -2566,6 +2827,14 @@ void Compiler::inherit_expression_dependencies(uint32_t dst, uint32_t source_exp
 	}
 
 	auto &e = get<SPIRExpression>(dst);
+	auto *phi = maybe_get<SPIRVariable>(source_expression);
+	if (phi && phi->phi_variable)
+	{
+		// We have used a phi variable, which can change at the end of the block,
+		// so make sure we take a dependency on this phi variable.
+		phi->dependees.push_back(dst);
+	}
+
 	auto *s = maybe_get<SPIRExpression>(source_expression);
 	if (!s)
 		return;
@@ -2578,6 +2847,7 @@ void Compiler::inherit_expression_dependencies(uint32_t dst, uint32_t source_exp
 	e_deps.insert(end(e_deps), begin(s_deps), end(s_deps));
 
 	// Eliminate duplicated dependencies.
+	sort(begin(e_deps), end(e_deps));
 	e_deps.erase(unique(begin(e_deps), end(e_deps)), end(e_deps));
 }
 
@@ -2879,8 +3149,10 @@ void Compiler::CombinedImageSamplerHandler::register_combined_image_sampler(SPIR
 
 		// Inherit RelaxedPrecision (and potentially other useful flags if deemed relevant).
 		auto &new_flags = compiler.meta[combined_id].decoration.decoration_flags;
-		auto old_flags = compiler.meta[sampler_id].decoration.decoration_flags;
-		new_flags = old_flags & (1ull << DecorationRelaxedPrecision);
+		auto &old_flags = compiler.meta[sampler_id].decoration.decoration_flags;
+		new_flags.reset();
+		if (old_flags.get(DecorationRelaxedPrecision))
+			new_flags.set(DecorationRelaxedPrecision);
 
 		param.id = combined_id;
 
@@ -2925,8 +3197,12 @@ bool Compiler::DummySamplerForCombinedImageHandler::handle(Op opcode, const uint
 	}
 
 	case OpImageFetch:
+	case OpImageQuerySizeLod:
+	case OpImageQuerySize:
+	case OpImageQueryLevels:
+	case OpImageQuerySamples:
 	{
-		// If we are fetching from a plain OpTypeImage, we must pre-combine with our dummy sampler.
+		// If we are fetching or querying LOD from a plain OpTypeImage, we must pre-combine with our dummy sampler.
 		auto *var = compiler.maybe_get_backing_variable(args[2]);
 		if (var)
 		{
@@ -2944,12 +3220,20 @@ bool Compiler::DummySamplerForCombinedImageHandler::handle(Op opcode, const uint
 		if (length < 3)
 			return false;
 
-		auto &type = compiler.get<SPIRType>(args[0]);
+		uint32_t result_type = args[0];
+		auto &type = compiler.get<SPIRType>(result_type);
 		bool separate_image =
 		    type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer;
-		if (separate_image)
-			SPIRV_CROSS_THROW("Attempting to use arrays or structs of separate images. This is not possible to "
-			                  "statically remap to plain GLSL.");
+		if (!separate_image)
+			return true;
+
+		uint32_t id = args[1];
+		uint32_t ptr = args[2];
+		compiler.set<SPIRExpression>(id, "", result_type, true);
+		compiler.register_read(id, ptr, true);
+
+		// Other backends might use SPIRAccessChain for this later.
+		compiler.ids[id].set_allow_type_rewrite();
 		break;
 	}
 
@@ -3001,22 +3285,33 @@ bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *ar
 		// but this seems ridiculously complicated for a problem which is easy to work around.
 		// Checking access chains like this assumes we don't have samplers or textures inside uniform structs, but this makes no sense.
 
-		auto &type = compiler.get<SPIRType>(args[0]);
+		uint32_t result_type = args[0];
+
+		auto &type = compiler.get<SPIRType>(result_type);
 		bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1;
 		bool separate_sampler = type.basetype == SPIRType::Sampler;
-		if (separate_image)
-			SPIRV_CROSS_THROW("Attempting to use arrays or structs of separate images. This is not possible to "
-			                  "statically remap to plain GLSL.");
 		if (separate_sampler)
 			SPIRV_CROSS_THROW(
 			    "Attempting to use arrays or structs of separate samplers. This is not possible to statically "
 			    "remap to plain GLSL.");
+
+		if (separate_image)
+		{
+			uint32_t id = args[1];
+			uint32_t ptr = args[2];
+			compiler.set<SPIRExpression>(id, "", result_type, true);
+			compiler.register_read(id, ptr, true);
+		}
 		return true;
 	}
 
 	case OpImageFetch:
+	case OpImageQuerySizeLod:
+	case OpImageQuerySize:
+	case OpImageQueryLevels:
+	case OpImageQuerySamples:
 	{
-		// If we are fetching from a plain OpTypeImage, we must pre-combine with our dummy sampler.
+		// If we are fetching from a plain OpTypeImage or querying LOD, we must pre-combine with our dummy sampler.
 		auto *var = compiler.maybe_get_backing_variable(args[2]);
 		if (!var)
 			return true;
@@ -3044,9 +3339,6 @@ bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *ar
 		return true;
 	}
 
-	if (length < 4)
-		return false;
-
 	// Registers sampler2D calls used in case they are parameters so
 	// that their callees know which combined image samplers to propagate down the call stack.
 	if (!functions.empty())
@@ -3093,6 +3385,7 @@ bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *ar
 			type = compiler.expression_type(args[2]);
 			type.self = sampled_type;
 			type.basetype = SPIRType::SampledImage;
+			type.image.depth = false;
 		}
 		else
 		{
@@ -3117,8 +3410,19 @@ bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *ar
 		// Inherit RelaxedPrecision (and potentially other useful flags if deemed relevant).
 		auto &new_flags = compiler.meta[combined_id].decoration.decoration_flags;
 		// Fetch inherits precision from the image, not sampler (there is no sampler).
-		auto old_flags = compiler.meta[is_fetch ? image_id : sampler_id].decoration.decoration_flags;
-		new_flags = old_flags & (1ull << DecorationRelaxedPrecision);
+		auto &old_flags = compiler.meta[is_fetch ? image_id : sampler_id].decoration.decoration_flags;
+		new_flags.reset();
+		if (old_flags.get(DecorationRelaxedPrecision))
+			new_flags.set(DecorationRelaxedPrecision);
+
+		// Propagate the array type for the original image as well.
+		auto *var = compiler.maybe_get_backing_variable(image_id);
+		if (var)
+		{
+			auto &parent_type = compiler.get<SPIRType>(var->basetype);
+			type.array = parent_type.array;
+			type.array_size_literal = parent_type.array_size_literal;
+		}
 
 		compiler.combined_image_samplers.push_back({ combined_id, image_id, sampler_id });
 	}
@@ -3182,10 +3486,8 @@ vector<SpecializationConstant> Compiler::get_specialization_constants() const
 		if (id.get_type() == TypeConstant)
 		{
 			auto &c = id.get<SPIRConstant>();
-			if (c.specialization)
-			{
+			if (c.specialization && has_decoration(c.self, DecorationSpecId))
 				spec_consts.push_back({ c.self, get_decoration(c.self, DecorationSpecId) });
-			}
 		}
 	}
 	return spec_consts;
@@ -3435,6 +3737,12 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry)
 					notify_variable_access(args[i], current_block->self);
 
 				// The result of an access chain is a fixed expression and is not really considered a temporary.
+				auto &e = compiler.set<SPIRExpression>(args[1], "", args[0], true);
+				auto *backing_variable = compiler.maybe_get_backing_variable(ptr);
+				e.loaded_from = backing_variable ? backing_variable->self : 0;
+
+				// Other backends might use SPIRAccessChain for this later.
+				compiler.ids[args[1]].set_allow_type_rewrite();
 				break;
 			}
 
@@ -3601,7 +3909,8 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry)
 				// so we will have to lift the dominator up to the relevant loop header instead.
 				builder.add_block(this->continue_block_to_loop_header[block]);
 
-				if (type.vecsize == 1 && type.columns == 1)
+				// Arrays or structs cannot be loop variables.
+				if (type.vecsize == 1 && type.columns == 1 && type.basetype != SPIRType::Struct && type.array.empty())
 				{
 					// The variable is used in multiple continue blocks, this is not a loop
 					// candidate, signal that by setting block to -1u.
@@ -3643,6 +3952,7 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry)
 		}
 
 		DominatorBuilder builder(cfg);
+		bool force_temporary = false;
 
 		// Figure out which block is dominating all accesses of those temporaries.
 		auto &blocks = var.second;
@@ -3654,6 +3964,11 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry)
 			// access up to loop header like we did for variables.
 			if (blocks.size() != 1 && this->is_continue(block))
 				builder.add_block(this->continue_block_to_loop_header[block]);
+			else if (blocks.size() != 1 && this->is_single_block_loop(block))
+			{
+				// Awkward case, because the loop header is also the continue block.
+				force_temporary = true;
+			}
 		}
 
 		uint32_t dominating_block = builder.get_dominator();
@@ -3663,7 +3978,7 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry)
 			// SPIR-V normally mandates this, but we have extra cases for temporary use inside loops.
 			bool first_use_is_dominator = blocks.count(dominating_block) != 0;
 
-			if (!first_use_is_dominator)
+			if (!first_use_is_dominator || force_temporary)
 			{
 				// This should be very rare, but if we try to declare a temporary inside a loop,
 				// and that temporary is used outside the loop as well (spirv-opt inliner likes this)
@@ -3674,6 +3989,16 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry)
 				auto &block_temporaries = this->get<SPIRBlock>(dominating_block).declare_temporary;
 				block_temporaries.emplace_back(handler.result_id_to_type[var.first], var.first);
 			}
+			else if (blocks.size() > 1)
+			{
+				// Keep track of the temporary as we might have to declare this temporary.
+				// This can happen if the loop header dominates a temporary, but we have a complex fallback loop.
+				// In this case, the header is actually inside the for (;;) {} block, and we have problems.
+				// What we need to do is hoist the temporaries outside the for (;;) {} block in case the header block
+				// declares the temporary.
+				auto &block_temporaries = this->get<SPIRBlock>(dominating_block).potential_declare_temporary;
+				block_temporaries.emplace_back(handler.result_id_to_type[var.first], var.first);
+			}
 		}
 	}
 
@@ -3768,7 +4093,12 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry)
 	}
 }
 
-uint64_t Compiler::get_buffer_block_flags(const SPIRVariable &var)
+Bitset Compiler::get_buffer_block_flags(uint32_t id) const
+{
+	return get_buffer_block_flags(get<SPIRVariable>(id));
+}
+
+Bitset Compiler::get_buffer_block_flags(const SPIRVariable &var) const
 {
 	auto &type = get<SPIRType>(var.basetype);
 	assert(type.basetype == SPIRType::Struct);
@@ -3776,16 +4106,17 @@ uint64_t Compiler::get_buffer_block_flags(const SPIRVariable &var)
 	// Some flags like non-writable, non-readable are actually found
 	// as member decorations. If all members have a decoration set, propagate
 	// the decoration up as a regular variable decoration.
-	uint64_t base_flags = meta[var.self].decoration.decoration_flags;
+	Bitset base_flags = meta[var.self].decoration.decoration_flags;
 
 	if (type.member_types.empty())
 		return base_flags;
 
-	uint64_t all_members_flag_mask = ~(0ull);
-	for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
-		all_members_flag_mask &= get_member_decoration_mask(type.self, i);
+	Bitset all_members_flags = get_member_decoration_bitset(type.self, 0);
+	for (uint32_t i = 1; i < uint32_t(type.member_types.size()); i++)
+		all_members_flags.merge_and(get_member_decoration_bitset(type.self, i));
 
-	return base_flags | all_members_flag_mask;
+	base_flags.merge_or(all_members_flags);
+	return base_flags;
 }
 
 bool Compiler::get_common_basic_type(const SPIRType &type, SPIRType::BaseType &base_type)
@@ -3813,7 +4144,8 @@ bool Compiler::get_common_basic_type(const SPIRType &type, SPIRType::BaseType &b
 	}
 }
 
-void Compiler::ActiveBuiltinHandler::handle_builtin(const SPIRType &type, BuiltIn builtin, uint64_t decoration_flags)
+void Compiler::ActiveBuiltinHandler::handle_builtin(const SPIRType &type, BuiltIn builtin,
+                                                    const Bitset &decoration_flags)
 {
 	// If used, we will need to explicitly declare a new array size for these builtins.
 
@@ -3837,7 +4169,7 @@ void Compiler::ActiveBuiltinHandler::handle_builtin(const SPIRType &type, BuiltI
 	}
 	else if (builtin == BuiltInPosition)
 	{
-		if (decoration_flags & (1ull << DecorationInvariant))
+		if (decoration_flags.get(DecorationInvariant))
 			compiler.position_invariant = true;
 	}
 }
@@ -3854,7 +4186,7 @@ bool Compiler::ActiveBuiltinHandler::handle(spv::Op opcode, const uint32_t *args
 			auto &type = compiler.get<SPIRType>(var->basetype);
 			auto &flags =
 			    type.storage == StorageClassInput ? compiler.active_input_builtins : compiler.active_output_builtins;
-			flags |= 1ull << decorations.builtin_type;
+			flags.set(decorations.builtin_type);
 			handle_builtin(type, decorations.builtin_type, decorations.decoration_flags);
 		}
 	};
@@ -3942,7 +4274,7 @@ bool Compiler::ActiveBuiltinHandler::handle(spv::Op opcode, const uint32_t *args
 					auto &decorations = compiler.meta[type->self].members[index];
 					if (decorations.builtin)
 					{
-						flags |= 1ull << decorations.builtin_type;
+						flags.set(decorations.builtin_type);
 						handle_builtin(compiler.get<SPIRType>(type->member_types[index]), decorations.builtin_type,
 						               decorations.decoration_flags);
 					}
@@ -3968,8 +4300,8 @@ bool Compiler::ActiveBuiltinHandler::handle(spv::Op opcode, const uint32_t *args
 
 void Compiler::update_active_builtins()
 {
-	active_input_builtins = 0;
-	active_output_builtins = 0;
+	active_input_builtins.reset();
+	active_output_builtins.reset();
 	cull_distance_count = 0;
 	clip_distance_count = 0;
 	ActiveBuiltinHandler handler(*this);
@@ -3979,20 +4311,20 @@ void Compiler::update_active_builtins()
 // Returns whether this shader uses a builtin of the storage class
 bool Compiler::has_active_builtin(BuiltIn builtin, StorageClass storage)
 {
-	uint64_t flags;
+	const Bitset *flags;
 	switch (storage)
 	{
 	case StorageClassInput:
-		flags = active_input_builtins;
+		flags = &active_input_builtins;
 		break;
 	case StorageClassOutput:
-		flags = active_output_builtins;
+		flags = &active_output_builtins;
 		break;
 
 	default:
 		return false;
 	}
-	return flags & (1ull << builtin);
+	return flags->get(builtin);
 }
 
 void Compiler::analyze_image_and_sampler_usage()
@@ -4087,6 +4419,13 @@ bool Compiler::CombinedImageSamplerUsageHandler::handle(Op opcode, const uint32_
 
 bool Compiler::buffer_is_hlsl_counter_buffer(uint32_t id) const
 {
+	// First, check for the proper decoration.
+	if (meta.at(id).hlsl_is_magic_counter_buffer)
+		return true;
+
+	// Check for legacy fallback method.
+	// FIXME: This should be deprecated eventually.
+
 	if (meta.at(id).hlsl_magic_counter_buffer_candidate)
 	{
 		auto *var = maybe_get<SPIRVariable>(id);
@@ -4100,6 +4439,16 @@ bool Compiler::buffer_is_hlsl_counter_buffer(uint32_t id) const
 
 bool Compiler::buffer_get_hlsl_counter_buffer(uint32_t id, uint32_t &counter_id) const
 {
+	// First, check for the proper decoration.
+	if (meta[id].hlsl_magic_counter_buffer != 0)
+	{
+		counter_id = meta[id].hlsl_magic_counter_buffer;
+		return true;
+	}
+
+	// Check for legacy fallback method.
+	// FIXME: This should be deprecated eventually.
+
 	auto &name = get_name(id);
 	uint32_t id_bound = get_current_id_bound();
 	for (uint32_t i = 0; i < id_bound; i++)
diff --git a/spirv_cross.hpp b/spirv_cross.hpp
index 193efc7477..12dcae34e9 100644
--- a/spirv_cross.hpp
+++ b/spirv_cross.hpp
@@ -136,6 +136,7 @@ public:
 
 	// Applies a decoration to an ID. Effectively injects OpDecorate.
 	void set_decoration(uint32_t id, spv::Decoration decoration, uint32_t argument = 0);
+	void set_decoration_string(uint32_t id, spv::Decoration decoration, const std::string &argument);
 
 	// Overrides the identifier OpName of an ID.
 	// Identifiers beginning with underscores or identifiers which contain double underscores
@@ -144,7 +145,9 @@ public:
 
 	// Gets a bitmask for the decorations which are applied to ID.
 	// I.e. (1ull << spv::DecorationFoo) | (1ull << spv::DecorationBar)
+	SPIRV_CROSS_DEPRECATED("Please use get_decoration_bitset instead.")
 	uint64_t get_decoration_mask(uint32_t id) const;
+	const Bitset &get_decoration_bitset(uint32_t id) const;
 
 	// Returns whether the decoration has been applied to the ID.
 	bool has_decoration(uint32_t id, spv::Decoration decoration) const;
@@ -155,6 +158,7 @@ public:
 	// If decoration doesn't exist or decoration is not recognized,
 	// 0 will be returned.
 	uint32_t get_decoration(uint32_t id, spv::Decoration decoration) const;
+	const std::string &get_decoration_string(uint32_t id, spv::Decoration decoration) const;
 
 	// Removes the decoration for a an ID.
 	void unset_decoration(uint32_t id, spv::Decoration decoration);
@@ -183,6 +187,7 @@ public:
 
 	// Given an OpTypeStruct in ID, obtain the OpMemberDecoration for member number "index".
 	uint32_t get_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration) const;
+	const std::string &get_member_decoration_string(uint32_t id, uint32_t index, spv::Decoration decoration) const;
 
 	// Sets the member identifier for OpTypeStruct ID, member number "index".
 	void set_member_name(uint32_t id, uint32_t index, const std::string &name);
@@ -195,13 +200,17 @@ public:
 	void set_member_qualified_name(uint32_t type_id, uint32_t index, const std::string &name);
 
 	// Gets the decoration mask for a member of a struct, similar to get_decoration_mask.
+	SPIRV_CROSS_DEPRECATED("Please use get_member_decoration_bitset instead.")
 	uint64_t get_member_decoration_mask(uint32_t id, uint32_t index) const;
+	const Bitset &get_member_decoration_bitset(uint32_t id, uint32_t index) const;
 
 	// Returns whether the decoration has been applied to a member of a struct.
 	bool has_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration) const;
 
 	// Similar to set_decoration, but for struct members.
 	void set_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration, uint32_t argument = 0);
+	void set_member_decoration_string(uint32_t id, uint32_t index, spv::Decoration decoration,
+	                                  const std::string &argument);
 
 	// Unsets a member decoration, similar to unset_decoration.
 	void unset_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration);
@@ -311,7 +320,10 @@ public:
 	                                                 spv::ExecutionModel execution_model) const;
 
 	// Query and modify OpExecutionMode.
+	SPIRV_CROSS_DEPRECATED("Please use get_execution_mode_bitset instead.")
 	uint64_t get_execution_mode_mask() const;
+	const Bitset &get_execution_mode_bitset() const;
+
 	void unset_execution_mode(spv::ExecutionMode mode);
 	void set_execution_mode(spv::ExecutionMode mode, uint32_t arg0 = 0, uint32_t arg1 = 0, uint32_t arg2 = 0);
 
@@ -372,6 +384,8 @@ public:
 	// so this can be added before compile() if desired.
 	//
 	// Combined image samplers originating from this set are always considered active variables.
+	// Arrays of separate samplers are not supported, but arrays of separate images are supported.
+	// Array of images + sampler -> Array of combined image samplers.
 	void build_combined_image_samplers();
 
 	// Gets a remapping for the combined image samplers.
@@ -434,14 +448,18 @@ public:
 	// which lets us link the two buffers together.
 
 	// Queries if a variable ID is a counter buffer which "belongs" to a regular buffer object.
-	// NOTE: This query is purely based on OpName identifiers as found in the SPIR-V module, and will
+
+	// If SPV_GOOGLE_hlsl_functionality1 is used, this can be used even with a stripped SPIR-V module.
+	// Otherwise, this query is purely based on OpName identifiers as found in the SPIR-V module, and will
 	// only return true if OpSource was reported HLSL.
 	// To rely on this functionality, ensure that the SPIR-V module is not stripped.
+
 	bool buffer_is_hlsl_counter_buffer(uint32_t id) const;
 
 	// Queries if a buffer object has a neighbor "counter" buffer.
 	// If so, the ID of that counter buffer will be returned in counter_id.
-	// NOTE: This query is purely based on OpName identifiers as found in the SPIR-V module, and will
+	// If SPV_GOOGLE_hlsl_functionality1 is used, this can be used even with a stripped SPIR-V module.
+	// Otherwise, this query is purely based on OpName identifiers as found in the SPIR-V module, and will
 	// only return true if OpSource was reported HLSL.
 	// To rely on this functionality, ensure that the SPIR-V module is not stripped.
 	bool buffer_get_hlsl_counter_buffer(uint32_t id, uint32_t &counter_id) const;
@@ -465,6 +483,12 @@ public:
 	// ID is the name of a variable as returned by Resource::id, and must be a variable with a Block-like type.
 	std::string get_remapped_declared_block_name(uint32_t id) const;
 
+	// For buffer block variables, get the decorations for that variable.
+	// Sometimes, decorations for buffer blocks are found in member decorations instead
+	// of direct decorations on the variable itself.
+	// The most common use here is to check if a buffer is readonly or writeonly.
+	Bitset get_buffer_block_flags(uint32_t id) const;
+
 protected:
 	const uint32_t *stream(const Instruction &instr) const
 	{
@@ -579,12 +603,23 @@ protected:
 		return continue_blocks.find(next) != end(continue_blocks);
 	}
 
+	inline bool is_single_block_loop(uint32_t next) const
+	{
+		auto &block = get<SPIRBlock>(next);
+		return block.merge == SPIRBlock::MergeLoop && block.continue_block == next;
+	}
+
 	inline bool is_break(uint32_t next) const
 	{
 		return loop_merge_targets.find(next) != end(loop_merge_targets) ||
 		       multiselect_merge_targets.find(next) != end(multiselect_merge_targets);
 	}
 
+	inline bool is_loop_break(uint32_t next) const
+	{
+		return loop_merge_targets.find(next) != end(loop_merge_targets);
+	}
+
 	inline bool is_conditional(uint32_t next) const
 	{
 		return selection_merge_targets.find(next) != end(selection_merge_targets) &&
@@ -594,6 +629,7 @@ protected:
 	// Dependency tracking for temporaries read from variables.
 	void flush_dependees(SPIRVariable &var);
 	void flush_all_active_variables();
+	void flush_control_dependent_expressions(uint32_t block);
 	void flush_all_atomic_capable_variables();
 	void flush_all_aliased_variables();
 	void register_global_read_dependencies(const SPIRBlock &func, uint32_t id);
@@ -742,7 +778,7 @@ protected:
 		bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) override;
 		Compiler &compiler;
 
-		void handle_builtin(const SPIRType &type, spv::BuiltIn builtin, uint64_t decoration_flags);
+		void handle_builtin(const SPIRType &type, spv::BuiltIn builtin, const Bitset &decoration_flags);
 	};
 
 	bool traverse_all_reachable_opcodes(const SPIRBlock &block, OpcodeHandler &handler) const;
@@ -754,15 +790,15 @@ protected:
 
 	VariableTypeRemapCallback variable_remap_callback;
 
-	uint64_t get_buffer_block_flags(const SPIRVariable &var);
+	Bitset get_buffer_block_flags(const SPIRVariable &var) const;
 	bool get_common_basic_type(const SPIRType &type, SPIRType::BaseType &base_type);
 
 	std::unordered_set<uint32_t> forced_temporaries;
 	std::unordered_set<uint32_t> forwarded_temporaries;
 	std::unordered_set<uint32_t> hoisted_temporaries;
 
-	uint64_t active_input_builtins = 0;
-	uint64_t active_output_builtins = 0;
+	Bitset active_input_builtins;
+	Bitset active_output_builtins;
 	uint32_t clip_distance_count = 0;
 	uint32_t cull_distance_count = 0;
 	bool position_invariant = false;
@@ -824,7 +860,10 @@ private:
 	// Used only to implement the old deprecated get_entry_point() interface.
 	const SPIREntryPoint &get_first_entry_point(const std::string &name) const;
 	SPIREntryPoint &get_first_entry_point(const std::string &name);
+
+	void fixup_type_alias();
+	bool type_is_block_like(const SPIRType &type) const;
 };
-}
+} // namespace spirv_cross
 
 #endif
diff --git a/spirv_cross_util.cpp b/spirv_cross_util.cpp
index 5bc2f3517a..4cf336e87d 100644
--- a/spirv_cross_util.cpp
+++ b/spirv_cross_util.cpp
@@ -48,4 +48,23 @@ void rename_interface_variable(spirv_cross::Compiler &compiler, const std::vecto
 		compiler.set_name(v.id, name);
 	}
 }
+
+void inherit_combined_sampler_bindings(spirv_cross::Compiler &compiler)
+{
+	auto &samplers = compiler.get_combined_image_samplers();
+	for (auto &s : samplers)
+	{
+		if (compiler.has_decoration(s.image_id, spv::DecorationDescriptorSet))
+		{
+			uint32_t set = compiler.get_decoration(s.image_id, spv::DecorationDescriptorSet);
+			compiler.set_decoration(s.combined_id, spv::DecorationDescriptorSet, set);
+		}
+
+		if (compiler.has_decoration(s.image_id, spv::DecorationBinding))
+		{
+			uint32_t binding = compiler.get_decoration(s.image_id, spv::DecorationBinding);
+			compiler.set_decoration(s.combined_id, spv::DecorationBinding, binding);
+		}
+	}
 }
+} // namespace spirv_cross_util
diff --git a/spirv_cross_util.hpp b/spirv_cross_util.hpp
index 509c7c09ce..cabfa413f9 100644
--- a/spirv_cross_util.hpp
+++ b/spirv_cross_util.hpp
@@ -23,6 +23,7 @@ namespace spirv_cross_util
 {
 void rename_interface_variable(spirv_cross::Compiler &compiler, const std::vector<spirv_cross::Resource> &resources,
                                uint32_t location, const std::string &name);
-}
+void inherit_combined_sampler_bindings(spirv_cross::Compiler &compiler);
+} // namespace spirv_cross_util
 
 #endif
diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp
index 882d241f5f..500c78b9da 100644
--- a/spirv_glsl.cpp
+++ b/spirv_glsl.cpp
@@ -26,6 +26,45 @@ using namespace spv;
 using namespace spirv_cross;
 using namespace std;
 
+static bool is_unsigned_opcode(Op op)
+{
+	// Don't have to be exhaustive, only relevant for legacy target checking ...
+	switch (op)
+	{
+	case OpShiftRightLogical:
+	case OpUGreaterThan:
+	case OpUGreaterThanEqual:
+	case OpULessThan:
+	case OpULessThanEqual:
+	case OpUConvert:
+	case OpUDiv:
+	case OpUMod:
+	case OpUMulExtended:
+	case OpConvertUToF:
+	case OpConvertFToU:
+		return true;
+
+	default:
+		return false;
+	}
+}
+
+static bool is_unsigned_glsl_opcode(GLSLstd450 op)
+{
+	// Don't have to be exhaustive, only relevant for legacy target checking ...
+	switch (op)
+	{
+	case GLSLstd450UClamp:
+	case GLSLstd450UMin:
+	case GLSLstd450UMax:
+	case GLSLstd450FindUMsb:
+		return true;
+
+	default:
+		return false;
+	}
+}
+
 static bool packing_is_vec4_padded(BufferPackingStandard packing)
 {
 	switch (packing)
@@ -309,7 +348,7 @@ void CompilerGLSL::find_static_extensions()
 				if (options.es)
 					SPIRV_CROSS_THROW("FP64 not supported in ES profile.");
 				if (!options.es && options.version < 400)
-					require_extension("GL_ARB_gpu_shader_fp64");
+					require_extension_internal("GL_ARB_gpu_shader_fp64");
 			}
 
 			if (type.basetype == SPIRType::Int64 || type.basetype == SPIRType::UInt64)
@@ -317,8 +356,11 @@ void CompilerGLSL::find_static_extensions()
 				if (options.es)
 					SPIRV_CROSS_THROW("64-bit integers not supported in ES profile.");
 				if (!options.es)
-					require_extension("GL_ARB_gpu_shader_int64");
+					require_extension_internal("GL_ARB_gpu_shader_int64");
 			}
+
+			if (type.basetype == SPIRType::Half)
+				require_extension_internal("GL_AMD_gpu_shader_half_float");
 		}
 	}
 
@@ -327,31 +369,31 @@ void CompilerGLSL::find_static_extensions()
 	{
 	case ExecutionModelGLCompute:
 		if (!options.es && options.version < 430)
-			require_extension("GL_ARB_compute_shader");
+			require_extension_internal("GL_ARB_compute_shader");
 		if (options.es && options.version < 310)
 			SPIRV_CROSS_THROW("At least ESSL 3.10 required for compute shaders.");
 		break;
 
 	case ExecutionModelGeometry:
 		if (options.es && options.version < 320)
-			require_extension("GL_EXT_geometry_shader");
+			require_extension_internal("GL_EXT_geometry_shader");
 		if (!options.es && options.version < 150)
-			require_extension("GL_ARB_geometry_shader4");
+			require_extension_internal("GL_ARB_geometry_shader4");
 
-		if ((execution.flags & (1ull << ExecutionModeInvocations)) && execution.invocations != 1)
+		if (execution.flags.get(ExecutionModeInvocations) && execution.invocations != 1)
 		{
 			// Instanced GS is part of 400 core or this extension.
 			if (!options.es && options.version < 400)
-				require_extension("GL_ARB_gpu_shader5");
+				require_extension_internal("GL_ARB_gpu_shader5");
 		}
 		break;
 
 	case ExecutionModelTessellationEvaluation:
 	case ExecutionModelTessellationControl:
 		if (options.es && options.version < 320)
-			require_extension("GL_EXT_tessellation_shader");
+			require_extension_internal("GL_EXT_tessellation_shader");
 		if (!options.es && options.version < 400)
-			require_extension("GL_ARB_tessellation_shader");
+			require_extension_internal("GL_ARB_tessellation_shader");
 		break;
 
 	default:
@@ -359,10 +401,10 @@ void CompilerGLSL::find_static_extensions()
 	}
 
 	if (!pls_inputs.empty() || !pls_outputs.empty())
-		require_extension("GL_EXT_shader_pixel_local_storage");
+		require_extension_internal("GL_EXT_shader_pixel_local_storage");
 
 	if (options.separate_shader_objects && !options.es && options.version < 410)
-		require_extension("GL_ARB_separate_shader_objects");
+		require_extension_internal("GL_ARB_separate_shader_objects");
 }
 
 string CompilerGLSL::compile()
@@ -395,7 +437,7 @@ string CompilerGLSL::compile()
 		emit_header();
 		emit_resources();
 
-		emit_function(get<SPIRFunction>(entry_point), 0);
+		emit_function(get<SPIRFunction>(entry_point), Bitset());
 
 		pass_count++;
 	} while (force_recompile);
@@ -426,8 +468,8 @@ void CompilerGLSL::emit_header()
 			statement("#endif");
 		}
 		// Needed for: layout(early_fragment_tests) in;
-		if (execution.flags & (1ull << ExecutionModeEarlyFragmentTests))
-			require_extension("GL_ARB_shader_image_load_store");
+		if (execution.flags.get(ExecutionModeEarlyFragmentTests))
+			require_extension_internal("GL_ARB_shader_image_load_store");
 	}
 
 	for (auto &ext : forced_extensions)
@@ -443,54 +485,54 @@ void CompilerGLSL::emit_header()
 	{
 	case ExecutionModelGeometry:
 		outputs.push_back(join("max_vertices = ", execution.output_vertices));
-		if ((execution.flags & (1ull << ExecutionModeInvocations)) && execution.invocations != 1)
+		if ((execution.flags.get(ExecutionModeInvocations)) && execution.invocations != 1)
 			inputs.push_back(join("invocations = ", execution.invocations));
-		if (execution.flags & (1ull << ExecutionModeInputPoints))
+		if (execution.flags.get(ExecutionModeInputPoints))
 			inputs.push_back("points");
-		if (execution.flags & (1ull << ExecutionModeInputLines))
+		if (execution.flags.get(ExecutionModeInputLines))
 			inputs.push_back("lines");
-		if (execution.flags & (1ull << ExecutionModeInputLinesAdjacency))
+		if (execution.flags.get(ExecutionModeInputLinesAdjacency))
 			inputs.push_back("lines_adjacency");
-		if (execution.flags & (1ull << ExecutionModeTriangles))
+		if (execution.flags.get(ExecutionModeTriangles))
 			inputs.push_back("triangles");
-		if (execution.flags & (1ull << ExecutionModeInputTrianglesAdjacency))
+		if (execution.flags.get(ExecutionModeInputTrianglesAdjacency))
 			inputs.push_back("triangles_adjacency");
-		if (execution.flags & (1ull << ExecutionModeOutputTriangleStrip))
+		if (execution.flags.get(ExecutionModeOutputTriangleStrip))
 			outputs.push_back("triangle_strip");
-		if (execution.flags & (1ull << ExecutionModeOutputPoints))
+		if (execution.flags.get(ExecutionModeOutputPoints))
 			outputs.push_back("points");
-		if (execution.flags & (1ull << ExecutionModeOutputLineStrip))
+		if (execution.flags.get(ExecutionModeOutputLineStrip))
 			outputs.push_back("line_strip");
 		break;
 
 	case ExecutionModelTessellationControl:
-		if (execution.flags & (1ull << ExecutionModeOutputVertices))
+		if (execution.flags.get(ExecutionModeOutputVertices))
 			outputs.push_back(join("vertices = ", execution.output_vertices));
 		break;
 
 	case ExecutionModelTessellationEvaluation:
-		if (execution.flags & (1ull << ExecutionModeQuads))
+		if (execution.flags.get(ExecutionModeQuads))
 			inputs.push_back("quads");
-		if (execution.flags & (1ull << ExecutionModeTriangles))
+		if (execution.flags.get(ExecutionModeTriangles))
 			inputs.push_back("triangles");
-		if (execution.flags & (1ull << ExecutionModeIsolines))
+		if (execution.flags.get(ExecutionModeIsolines))
 			inputs.push_back("isolines");
-		if (execution.flags & (1ull << ExecutionModePointMode))
+		if (execution.flags.get(ExecutionModePointMode))
 			inputs.push_back("point_mode");
 
-		if ((execution.flags & (1ull << ExecutionModeIsolines)) == 0)
+		if (!execution.flags.get(ExecutionModeIsolines))
 		{
-			if (execution.flags & (1ull << ExecutionModeVertexOrderCw))
+			if (execution.flags.get(ExecutionModeVertexOrderCw))
 				inputs.push_back("cw");
-			if (execution.flags & (1ull << ExecutionModeVertexOrderCcw))
+			if (execution.flags.get(ExecutionModeVertexOrderCcw))
 				inputs.push_back("ccw");
 		}
 
-		if (execution.flags & (1ull << ExecutionModeSpacingFractionalEven))
+		if (execution.flags.get(ExecutionModeSpacingFractionalEven))
 			inputs.push_back("fractional_even_spacing");
-		if (execution.flags & (1ull << ExecutionModeSpacingFractionalOdd))
+		if (execution.flags.get(ExecutionModeSpacingFractionalOdd))
 			inputs.push_back("fractional_odd_spacing");
-		if (execution.flags & (1ull << ExecutionModeSpacingEqual))
+		if (execution.flags.get(ExecutionModeSpacingEqual))
 			inputs.push_back("equal_spacing");
 		break;
 
@@ -580,11 +622,11 @@ void CompilerGLSL::emit_header()
 			}
 		}
 
-		if (execution.flags & (1ull << ExecutionModeEarlyFragmentTests))
+		if (execution.flags.get(ExecutionModeEarlyFragmentTests))
 			inputs.push_back("early_fragment_tests");
-		if (execution.flags & (1ull << ExecutionModeDepthGreater))
+		if (execution.flags.get(ExecutionModeDepthGreater))
 			inputs.push_back("depth_greater");
-		if (execution.flags & (1ull << ExecutionModeDepthLess))
+		if (execution.flags.get(ExecutionModeDepthLess))
 			inputs.push_back("depth_less");
 
 		break;
@@ -612,13 +654,8 @@ void CompilerGLSL::emit_struct(SPIRType &type)
 	// with just different offsets, matrix layouts, etc ...
 	// Type-punning with these types is legal, which complicates things
 	// when we are storing struct and array types in an SSBO for example.
-	if (type.type_alias != 0)
-		return;
-
-	// Don't declare empty structs in GLSL, this is not allowed.
-	// Empty structs is a corner case of HLSL output, and only sensible thing to do is avoiding to declare
-	// these types.
-	if (type_is_empty(type))
+	// If the type master is packed however, we can no longer assume that the struct declaration will be redundant.
+	if (type.type_alias != 0 && !has_decoration(type.type_alias, DecorationCPacked))
 		return;
 
 	add_resource_name(type.self);
@@ -638,45 +675,55 @@ void CompilerGLSL::emit_struct(SPIRType &type)
 		i++;
 		emitted = true;
 	}
+
+	// Don't declare empty structs in GLSL, this is not allowed.
+	if (type_is_empty(type) && !backend.supports_empty_struct)
+	{
+		statement("int empty_struct_member;");
+		emitted = true;
+	}
+
 	end_scope_decl();
 
 	if (emitted)
 		statement("");
 }
 
-uint64_t CompilerGLSL::combined_decoration_for_member(const SPIRType &type, uint32_t index)
+Bitset CompilerGLSL::combined_decoration_for_member(const SPIRType &type, uint32_t index)
 {
-	uint64_t flags = 0;
+	Bitset flags;
 	auto &memb = meta[type.self].members;
 	if (index >= memb.size())
-		return 0;
+		return flags;
 	auto &dec = memb[index];
 
 	// If our type is a struct, traverse all the members as well recursively.
-	flags |= dec.decoration_flags;
+	flags.merge_or(dec.decoration_flags);
 	for (uint32_t i = 0; i < type.member_types.size(); i++)
-		flags |= combined_decoration_for_member(get<SPIRType>(type.member_types[i]), i);
+		flags.merge_or(combined_decoration_for_member(get<SPIRType>(type.member_types[i]), i));
 
 	return flags;
 }
 
-string CompilerGLSL::to_interpolation_qualifiers(uint64_t flags)
+string CompilerGLSL::to_interpolation_qualifiers(const Bitset &flags)
 {
 	string res;
 	//if (flags & (1ull << DecorationSmooth))
 	//    res += "smooth ";
-	if (flags & (1ull << DecorationFlat))
+	if (flags.get(DecorationFlat))
 		res += "flat ";
-	if (flags & (1ull << DecorationNoPerspective))
+	if (flags.get(DecorationNoPerspective))
 		res += "noperspective ";
-	if (flags & (1ull << DecorationCentroid))
+	if (flags.get(DecorationCentroid))
 		res += "centroid ";
-	if (flags & (1ull << DecorationPatch))
+	if (flags.get(DecorationPatch))
 		res += "patch ";
-	if (flags & (1ull << DecorationSample))
+	if (flags.get(DecorationSample))
 		res += "sample ";
-	if (flags & (1ull << DecorationInvariant))
+	if (flags.get(DecorationInvariant))
 		res += "invariant ";
+	if (flags.get(DecorationExplicitInterpAMD))
+		res += "__explicitInterpAMD ";
 
 	return res;
 }
@@ -686,8 +733,8 @@ string CompilerGLSL::layout_for_member(const SPIRType &type, uint32_t index)
 	if (is_legacy())
 		return "";
 
-	bool is_block = (meta[type.self].decoration.decoration_flags &
-	                 ((1ull << DecorationBlock) | (1ull << DecorationBufferBlock))) != 0;
+	bool is_block = meta[type.self].decoration.decoration_flags.get(DecorationBlock) ||
+	                meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
 	if (!is_block)
 		return "";
 
@@ -714,18 +761,18 @@ string CompilerGLSL::layout_for_member(const SPIRType &type, uint32_t index)
 	// buffer UBO { layout(row_major) Foo foo; }; // Apply the layout on top-level.
 	auto flags = combined_decoration_for_member(type, index);
 
-	if (flags & (1ull << DecorationRowMajor))
+	if (flags.get(DecorationRowMajor))
 		attr.push_back("row_major");
 	// We don't emit any global layouts, so column_major is default.
 	//if (flags & (1ull << DecorationColMajor))
 	//    attr.push_back("column_major");
 
-	if ((dec.decoration_flags & (1ull << DecorationLocation)) != 0 && can_use_io_location(type.storage))
+	if (dec.decoration_flags.get(DecorationLocation) && can_use_io_location(type.storage, true))
 		attr.push_back(join("location = ", dec.location));
 
 	// DecorationCPacked is set by layout_for_variable earlier to mark that we need to emit offset qualifiers.
 	// This is only done selectively in GLSL as needed.
-	if (has_decoration(type.self, DecorationCPacked) && (dec.decoration_flags & (1ull << DecorationOffset)) != 0)
+	if (has_decoration(type.self, DecorationCPacked) && dec.decoration_flags.get(DecorationOffset))
 		attr.push_back(join("offset = ", dec.offset));
 
 	if (attr.empty())
@@ -865,15 +912,21 @@ uint32_t CompilerGLSL::type_to_packed_base_size(const SPIRType &type, BufferPack
 	case SPIRType::Int64:
 	case SPIRType::UInt64:
 		return 8;
-	default:
+	case SPIRType::Float:
+	case SPIRType::Int:
+	case SPIRType::UInt:
 		return 4;
+	case SPIRType::Half:
+		return 2;
+
+	default:
+		SPIRV_CROSS_THROW("Unrecognized type in type_to_packed_base_size.");
 	}
 }
 
-uint32_t CompilerGLSL::type_to_packed_alignment(const SPIRType &type, uint64_t flags, BufferPackingStandard packing)
+uint32_t CompilerGLSL::type_to_packed_alignment(const SPIRType &type, const Bitset &flags,
+                                                BufferPackingStandard packing)
 {
-	const uint32_t base_alignment = type_to_packed_base_size(type, packing);
-
 	if (!type.array.empty())
 	{
 		uint32_t minimum_alignment = 1;
@@ -907,6 +960,8 @@ uint32_t CompilerGLSL::type_to_packed_alignment(const SPIRType &type, uint64_t f
 	}
 	else
 	{
+		const uint32_t base_alignment = type_to_packed_base_size(type, packing);
+
 		// Vectors are *not* aligned in HLSL, but there's an extra rule where vectors cannot straddle
 		// a vec4, this is handled outside since that part knows our current offset.
 		if (type.columns == 1 && packing_is_hlsl(packing))
@@ -929,7 +984,7 @@ uint32_t CompilerGLSL::type_to_packed_alignment(const SPIRType &type, uint64_t f
 
 		// Rule 5. Column-major matrices are stored as arrays of
 		// vectors.
-		if ((flags & (1ull << DecorationColMajor)) && type.columns > 1)
+		if (flags.get(DecorationColMajor) && type.columns > 1)
 		{
 			if (packing_is_vec4_padded(packing))
 				return 4 * base_alignment;
@@ -942,7 +997,7 @@ uint32_t CompilerGLSL::type_to_packed_alignment(const SPIRType &type, uint64_t f
 		// Rule 6 implied.
 
 		// Rule 7.
-		if ((flags & (1ull << DecorationRowMajor)) && type.vecsize > 1)
+		if (flags.get(DecorationRowMajor) && type.vecsize > 1)
 		{
 			if (packing_is_vec4_padded(packing))
 				return 4 * base_alignment;
@@ -958,7 +1013,8 @@ uint32_t CompilerGLSL::type_to_packed_alignment(const SPIRType &type, uint64_t f
 	SPIRV_CROSS_THROW("Did not find suitable rule for type. Bogus decorations?");
 }
 
-uint32_t CompilerGLSL::type_to_packed_array_stride(const SPIRType &type, uint64_t flags, BufferPackingStandard packing)
+uint32_t CompilerGLSL::type_to_packed_array_stride(const SPIRType &type, const Bitset &flags,
+                                                   BufferPackingStandard packing)
 {
 	// Array stride is equal to aligned size of the underlying type.
 	uint32_t parent = type.parent_type;
@@ -980,7 +1036,7 @@ uint32_t CompilerGLSL::type_to_packed_array_stride(const SPIRType &type, uint64_
 	}
 }
 
-uint32_t CompilerGLSL::type_to_packed_size(const SPIRType &type, uint64_t flags, BufferPackingStandard packing)
+uint32_t CompilerGLSL::type_to_packed_size(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing)
 {
 	if (!type.array.empty())
 	{
@@ -988,7 +1044,6 @@ uint32_t CompilerGLSL::type_to_packed_size(const SPIRType &type, uint64_t flags,
 		       type_to_packed_array_stride(type, flags, packing);
 	}
 
-	const uint32_t base_alignment = type_to_packed_base_size(type, packing);
 	uint32_t size = 0;
 
 	if (type.basetype == SPIRType::Struct)
@@ -1016,10 +1071,12 @@ uint32_t CompilerGLSL::type_to_packed_size(const SPIRType &type, uint64_t flags,
 	}
 	else
 	{
+		const uint32_t base_alignment = type_to_packed_base_size(type, packing);
+
 		if (type.columns == 1)
 			size = type.vecsize * base_alignment;
 
-		if ((flags & (1ull << DecorationColMajor)) && type.columns > 1)
+		if (flags.get(DecorationColMajor) && type.columns > 1)
 		{
 			if (packing_is_vec4_padded(packing))
 				size = type.columns * 4 * base_alignment;
@@ -1029,7 +1086,7 @@ uint32_t CompilerGLSL::type_to_packed_size(const SPIRType &type, uint64_t flags,
 				size = type.columns * type.vecsize * base_alignment;
 		}
 
-		if ((flags & (1ull << DecorationRowMajor)) && type.vecsize > 1)
+		if (flags.get(DecorationRowMajor) && type.vecsize > 1)
 		{
 			if (packing_is_vec4_padded(packing))
 				size = type.vecsize * 4 * base_alignment;
@@ -1126,14 +1183,17 @@ bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackin
 	return true;
 }
 
-bool CompilerGLSL::can_use_io_location(StorageClass storage)
+bool CompilerGLSL::can_use_io_location(StorageClass storage, bool block)
 {
 	// Location specifiers are must have in SPIR-V, but they aren't really supported in earlier versions of GLSL.
 	// Be very explicit here about how to solve the issue.
 	if ((get_execution_model() != ExecutionModelVertex && storage == StorageClassInput) ||
 	    (get_execution_model() != ExecutionModelFragment && storage == StorageClassOutput))
 	{
-		if (!options.es && options.version < 410 && !options.separate_shader_objects)
+		uint32_t minimum_desktop_version = block ? 440 : 410;
+		// ARB_enhanced_layouts vs ARB_separate_shader_objects ...
+
+		if (!options.es && options.version < minimum_desktop_version && !options.separate_shader_objects)
 			return false;
 		else if (options.es && options.version < 310)
 			return false;
@@ -1148,6 +1208,14 @@ bool CompilerGLSL::can_use_io_location(StorageClass storage)
 			return false;
 	}
 
+	if (storage == StorageClassUniform || storage == StorageClassUniformConstant)
+	{
+		if (options.es && options.version < 310)
+			return false;
+		else if (!options.es && options.version < 430)
+			return false;
+	}
+
 	return true;
 }
 
@@ -1170,56 +1238,67 @@ string CompilerGLSL::layout_for_variable(const SPIRVariable &var)
 	if (options.vulkan_semantics && var.storage == StorageClassPushConstant)
 		attr.push_back("push_constant");
 
-	if (flags & (1ull << DecorationRowMajor))
+	if (flags.get(DecorationRowMajor))
 		attr.push_back("row_major");
-	if (flags & (1ull << DecorationColMajor))
+	if (flags.get(DecorationColMajor))
 		attr.push_back("column_major");
 
 	if (options.vulkan_semantics)
 	{
-		if (flags & (1ull << DecorationInputAttachmentIndex))
+		if (flags.get(DecorationInputAttachmentIndex))
 			attr.push_back(join("input_attachment_index = ", dec.input_attachment));
 	}
 
-	if ((flags & (1ull << DecorationLocation)) != 0 && can_use_io_location(var.storage))
+	bool is_block = has_decoration(type.self, DecorationBlock);
+	if (flags.get(DecorationLocation) && can_use_io_location(var.storage, is_block))
 	{
-		uint64_t combined_decoration = 0;
+		Bitset combined_decoration;
 		for (uint32_t i = 0; i < meta[type.self].members.size(); i++)
-			combined_decoration |= combined_decoration_for_member(type, i);
+			combined_decoration.merge_or(combined_decoration_for_member(type, i));
 
 		// If our members have location decorations, we don't need to
 		// emit location decorations at the top as well (looks weird).
-		if ((combined_decoration & (1ull << DecorationLocation)) == 0)
+		if (!combined_decoration.get(DecorationLocation))
 			attr.push_back(join("location = ", dec.location));
 	}
 
-	// set = 0 is the default. Do not emit set = decoration in regular GLSL output, but
-	// we should preserve it in Vulkan GLSL mode.
+	if (flags.get(DecorationIndex))
+		attr.push_back(join("index = ", dec.index));
+
+	// Do not emit set = decoration in regular GLSL output, but
+	// we need to preserve it in Vulkan GLSL mode.
 	if (var.storage != StorageClassPushConstant)
 	{
-		if ((flags & (1ull << DecorationDescriptorSet)) && (dec.set != 0 || options.vulkan_semantics))
+		if (flags.get(DecorationDescriptorSet) && options.vulkan_semantics)
 			attr.push_back(join("set = ", dec.set));
 	}
 
+	// GL 3.0/GLSL 1.30 is not considered legacy, but it doesn't have UBOs ...
+	bool can_use_buffer_blocks = (options.es && options.version >= 300) || (!options.es && options.version >= 140);
+
 	bool can_use_binding;
 	if (options.es)
 		can_use_binding = options.version >= 310;
 	else
 		can_use_binding = options.enable_420pack_extension || (options.version >= 420);
 
-	if (can_use_binding && (flags & (1ull << DecorationBinding)))
+	// Make sure we don't emit binding layout for a classic uniform on GLSL 1.30.
+	if (!can_use_buffer_blocks && var.storage == StorageClassUniform)
+		can_use_binding = false;
+
+	if (can_use_binding && flags.get(DecorationBinding))
 		attr.push_back(join("binding = ", dec.binding));
 
-	if (flags & (1ull << DecorationOffset))
+	if (flags.get(DecorationOffset))
 		attr.push_back(join("offset = ", dec.offset));
 
 	bool push_constant_block = options.vulkan_semantics && var.storage == StorageClassPushConstant;
 	bool ssbo_block = var.storage == StorageClassStorageBuffer ||
-	                  (var.storage == StorageClassUniform && (typeflags & (1ull << DecorationBufferBlock)));
+	                  (var.storage == StorageClassUniform && typeflags.get(DecorationBufferBlock));
 
 	// Instead of adding explicit offsets for every element here, just assume we're using std140 or std430.
 	// If SPIR-V does not comply with either layout, we cannot really work around it.
-	if (var.storage == StorageClassUniform && (typeflags & (1ull << DecorationBlock)))
+	if (can_use_buffer_blocks && var.storage == StorageClassUniform && typeflags.get(DecorationBlock))
 	{
 		if (buffer_is_packing_standard(type, BufferPackingStd140))
 			attr.push_back("std140");
@@ -1233,7 +1312,7 @@ string CompilerGLSL::layout_for_variable(const SPIRVariable &var)
 				SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do "
 				                  "not support GL_ARB_enhanced_layouts.");
 			if (!options.es && !options.vulkan_semantics && options.version < 440)
-				require_extension("GL_ARB_enhanced_layouts");
+				require_extension_internal("GL_ARB_enhanced_layouts");
 
 			// This is a very last minute to check for this, but use this unused decoration to mark that we should emit
 			// explicit offsets for this block type.
@@ -1249,7 +1328,7 @@ string CompilerGLSL::layout_for_variable(const SPIRVariable &var)
 			                  "support a more flexible layout.");
 		}
 	}
-	else if (push_constant_block || ssbo_block)
+	else if (can_use_buffer_blocks && (push_constant_block || ssbo_block))
 	{
 		if (buffer_is_packing_standard(type, BufferPackingStd430))
 			attr.push_back("std430");
@@ -1266,7 +1345,7 @@ string CompilerGLSL::layout_for_variable(const SPIRVariable &var)
 				SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do "
 				                  "not support GL_ARB_enhanced_layouts.");
 			if (!options.es && !options.vulkan_semantics && options.version < 440)
-				require_extension("GL_ARB_enhanced_layouts");
+				require_extension_internal("GL_ARB_enhanced_layouts");
 
 			set_decoration(type.self, DecorationCPacked);
 		}
@@ -1277,7 +1356,7 @@ string CompilerGLSL::layout_for_variable(const SPIRVariable &var)
 				SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do "
 				                  "not support GL_ARB_enhanced_layouts.");
 			if (!options.es && !options.vulkan_semantics && options.version < 440)
-				require_extension("GL_ARB_enhanced_layouts");
+				require_extension_internal("GL_ARB_enhanced_layouts");
 
 			set_decoration(type.self, DecorationCPacked);
 		}
@@ -1327,7 +1406,8 @@ void CompilerGLSL::emit_push_constant_block_glsl(const SPIRVariable &var)
 	auto &type = get<SPIRType>(var.basetype);
 
 	auto &flags = meta[var.self].decoration.decoration_flags;
-	flags &= ~((1ull << DecorationBinding) | (1ull << DecorationDescriptorSet));
+	flags.clear(DecorationBinding);
+	flags.clear(DecorationDescriptorSet);
 
 #if 0
     if (flags & ((1ull << DecorationBinding) | (1ull << DecorationDescriptorSet)))
@@ -1338,12 +1418,13 @@ void CompilerGLSL::emit_push_constant_block_glsl(const SPIRVariable &var)
 	// We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily.
 	// Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed.
 	auto &block_flags = meta[type.self].decoration.decoration_flags;
-	uint64_t block_flag = block_flags & (1ull << DecorationBlock);
-	block_flags &= ~block_flag;
+	bool block_flag = block_flags.get(DecorationBlock);
+	block_flags.clear(DecorationBlock);
 
 	emit_struct(type);
 
-	block_flags |= block_flag;
+	if (block_flag)
+		block_flags.set(DecorationBlock);
 
 	emit_uniform(var);
 	statement("");
@@ -1353,7 +1434,7 @@ void CompilerGLSL::emit_buffer_block(const SPIRVariable &var)
 {
 	if (flattened_buffer_blocks.count(var.self))
 		emit_buffer_block_flattened(var);
-	else if (is_legacy())
+	else if (is_legacy() || (!options.es && options.version == 130))
 		emit_buffer_block_legacy(var);
 	else
 		emit_buffer_block_native(var);
@@ -1363,17 +1444,18 @@ void CompilerGLSL::emit_buffer_block_legacy(const SPIRVariable &var)
 {
 	auto &type = get<SPIRType>(var.basetype);
 	bool ssbo = var.storage == StorageClassStorageBuffer ||
-	            ((meta[type.self].decoration.decoration_flags & (1ull << DecorationBufferBlock)) != 0);
+	            meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
 	if (ssbo)
 		SPIRV_CROSS_THROW("SSBOs not supported in legacy targets.");
 
 	// We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily.
 	// Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed.
 	auto &block_flags = meta[type.self].decoration.decoration_flags;
-	uint64_t block_flag = block_flags & (1ull << DecorationBlock);
-	block_flags &= ~block_flag;
+	bool block_flag = block_flags.get(DecorationBlock);
+	block_flags.clear(DecorationBlock);
 	emit_struct(type);
-	block_flags |= block_flag;
+	if (block_flag)
+		block_flags.set(DecorationBlock);
 	emit_uniform(var);
 	statement("");
 }
@@ -1382,13 +1464,13 @@ void CompilerGLSL::emit_buffer_block_native(const SPIRVariable &var)
 {
 	auto &type = get<SPIRType>(var.basetype);
 
-	uint64_t flags = get_buffer_block_flags(var);
+	Bitset flags = get_buffer_block_flags(var);
 	bool ssbo = var.storage == StorageClassStorageBuffer ||
-	            ((meta[type.self].decoration.decoration_flags & (1ull << DecorationBufferBlock)) != 0);
-	bool is_restrict = ssbo && (flags & (1ull << DecorationRestrict)) != 0;
-	bool is_writeonly = ssbo && (flags & (1ull << DecorationNonReadable)) != 0;
-	bool is_readonly = ssbo && (flags & (1ull << DecorationNonWritable)) != 0;
-	bool is_coherent = ssbo && (flags & (1ull << DecorationCoherent)) != 0;
+	            meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
+	bool is_restrict = ssbo && flags.get(DecorationRestrict);
+	bool is_writeonly = ssbo && flags.get(DecorationNonReadable);
+	bool is_readonly = ssbo && flags.get(DecorationNonWritable);
+	bool is_coherent = ssbo && flags.get(DecorationCoherent);
 
 	// Block names should never alias, but from HLSL input they kind of can because block types are reused for UAVs ...
 	auto buffer_name = to_name(type.self, false);
@@ -1485,7 +1567,7 @@ void CompilerGLSL::emit_flattened_io_block(const SPIRVariable &var, const char *
 
 	auto old_flags = meta[type.self].decoration.decoration_flags;
 	// Emit the members as if they are part of a block to get all qualifiers.
-	meta[type.self].decoration.decoration_flags |= 1ull << DecorationBlock;
+	meta[type.self].decoration.decoration_flags.set(DecorationBlock);
 
 	type.member_name_cache.clear();
 
@@ -1522,7 +1604,7 @@ void CompilerGLSL::emit_interface_block(const SPIRVariable &var)
 	auto &type = get<SPIRType>(var.basetype);
 
 	// Either make it plain in/out or in/out blocks depending on what shader is doing ...
-	bool block = (meta[type.self].decoration.decoration_flags & (1ull << DecorationBlock)) != 0;
+	bool block = meta[type.self].decoration.decoration_flags.get(DecorationBlock);
 	const char *qual = to_storage_qualifiers_glsl(var);
 
 	if (block)
@@ -1542,7 +1624,7 @@ void CompilerGLSL::emit_interface_block(const SPIRVariable &var)
 			{
 				// Geometry and tessellation extensions imply this extension.
 				if (!has_extension("GL_EXT_geometry_shader") && !has_extension("GL_EXT_tessellation_shader"))
-					require_extension("GL_EXT_shader_io_blocks");
+					require_extension_internal("GL_EXT_shader_io_blocks");
 			}
 
 			// Block names should never alias.
@@ -1597,7 +1679,7 @@ void CompilerGLSL::emit_uniform(const SPIRVariable &var)
 	if (type.basetype == SPIRType::Image && type.image.sampled == 2)
 	{
 		if (!options.es && options.version < 420)
-			require_extension("GL_ARB_shader_image_load_store");
+			require_extension_internal("GL_ARB_shader_image_load_store");
 		else if (options.es && options.version < 310)
 			SPIRV_CROSS_THROW("At least ESSL 3.10 required for shader image load store.");
 	}
@@ -1606,6 +1688,13 @@ void CompilerGLSL::emit_uniform(const SPIRVariable &var)
 	statement(layout_for_variable(var), variable_decl(var), ";");
 }
 
+void CompilerGLSL::emit_specialization_constant_op(const SPIRConstantOp &constant)
+{
+	auto &type = get<SPIRType>(constant.basetype);
+	auto name = to_name(constant.self);
+	statement("const ", variable_decl(type, name), " = ", constant_op_expression(constant), ";");
+}
+
 void CompilerGLSL::emit_specialization_constant(const SPIRConstant &constant)
 {
 	auto &type = get<SPIRType>(constant.constant_type);
@@ -1633,11 +1722,38 @@ void CompilerGLSL::emit_specialization_constant(const SPIRConstant &constant)
 	}
 }
 
+void CompilerGLSL::emit_entry_point_declarations()
+{
+}
+
 void CompilerGLSL::replace_illegal_names()
 {
 	// clang-format off
 	static const unordered_set<string> keywords = {
-		"active", "asm", "atomic_uint", "attribute", "bool", "break",
+		"abs", "acos", "acosh", "all", "any", "asin", "asinh", "atan", "atanh",
+		"atomicAdd", "atomicCompSwap", "atomicCounter", "atomicCounterDecrement", "atomicCounterIncrement",
+		"atomicExchange", "atomicMax", "atomicMin", "atomicOr", "atomicXor",
+		"bitCount", "bitfieldExtract", "bitfieldInsert", "bitfieldReverse",
+		"ceil", "cos", "cosh", "cross", "degrees",
+		"dFdx", "dFdxCoarse", "dFdxFine",
+		"dFdy", "dFdyCoarse", "dFdyFine",
+		"distance", "dot", "EmitStreamVertex", "EmitVertex", "EndPrimitive", "EndStreamPrimitive", "equal", "exp", "exp2",
+		"faceforward", "findLSB", "findMSB", "floatBitsToInt", "floatBitsToUint", "floor", "fma", "fract", "frexp", "fwidth", "fwidthCoarse", "fwidthFine",
+		"greaterThan", "greaterThanEqual", "groupMemoryBarrier",
+		"imageAtomicAdd", "imageAtomicAnd", "imageAtomicCompSwap", "imageAtomicExchange", "imageAtomicMax", "imageAtomicMin", "imageAtomicOr", "imageAtomicXor",
+		"imageLoad", "imageSamples", "imageSize", "imageStore", "imulExtended", "intBitsToFloat", "interpolateAtOffset", "interpolateAtCentroid", "interpolateAtSample",
+		"inverse", "inversesqrt", "isinf", "isnan", "ldexp", "length", "lessThan", "lessThanEqual", "log", "log2",
+		"matrixCompMult", "max", "memoryBarrier", "memoryBarrierAtomicCounter", "memoryBarrierBuffer", "memoryBarrierImage", "memoryBarrierShared",
+		"min", "mix", "mod", "modf", "noise", "noise1", "noise2", "noise3", "noise4", "normalize", "not", "notEqual",
+		"outerProduct", "packDouble2x32", "packHalf2x16", "packSnorm2x16", "packSnorm4x8", "packUnorm2x16", "packUnorm4x8", "pow",
+		"radians", "reflect", "refract", "round", "roundEven", "sign", "sin", "sinh", "smoothstep", "sqrt", "step",
+		"tan", "tanh", "texelFetch", "texelFetchOffset", "texture", "textureGather", "textureGatherOffset", "textureGatherOffsets",
+		"textureGrad", "textureGradOffset", "textureLod", "textureLodOffset", "textureOffset", "textureProj", "textureProjGrad",
+		"textureProjGradOffset", "textureProjLod", "textureProjLodOffset", "textureProjOffset", "textureQueryLevels", "textureQueryLod", "textureSamples", "textureSize",
+		"transpose", "trunc", "uaddCarry", "uintBitsToFloat", "umulExtended", "unpackDouble2x32", "unpackHalf2x16", "unpackSnorm2x16", "unpackSnorm4x8",
+		"unpackUnorm2x16", "unpackUnorm4x8", "usubBorrow",
+
+		"active", "asm", "atomic_uint", "attribute", "bool", "break", "buffer",
 		"bvec2", "bvec3", "bvec4", "case", "cast", "centroid", "class", "coherent", "common", "const", "continue", "default", "discard",
 		"dmat2", "dmat2x2", "dmat2x3", "dmat2x4", "dmat3", "dmat3x2", "dmat3x3", "dmat3x4", "dmat4", "dmat4x2", "dmat4x3", "dmat4x4",
 		"do", "double", "dvec2", "dvec3", "dvec4", "else", "enum", "extern", "external", "false", "filter", "fixed", "flat", "float",
@@ -1646,19 +1762,19 @@ void CompilerGLSL::replace_illegal_names()
 		"iimageCubeArray", "image1D", "image1DArray", "image2D", "image2DArray", "image2DMS", "image2DMSArray", "image2DRect",
 		"image3D", "imageBuffer", "imageCube", "imageCubeArray", "in", "inline", "inout", "input", "int", "interface", "invariant",
 		"isampler1D", "isampler1DArray", "isampler2D", "isampler2DArray", "isampler2DMS", "isampler2DMSArray", "isampler2DRect",
-		"isampler3D", "isamplerBuffer", "isamplerCube", "isamplerCubeArray", "ivec2", "ivec3", "ivec4", "layout", "line", "linear", "long", "lowp",
-		"mat2", "mat2x2", "mat2x3", "mat2x4", "mat3", "mat3x2", "mat3x3", "mat3x4", "mat4", "mat4x2", "mat4x3", "mat4x4", "matrix", "mediump",
-		"namespace", "noinline", "noperspective", "out", "output", "packed", "partition", "patch", "point", "precision", "public", "readonly",
-		"resource", "restrict", "return", "row_major", "sample", "sampler", "sampler1D", "sampler1DArray", "sampler1DArrayShadow",
+		"isampler3D", "isamplerBuffer", "isamplerCube", "isamplerCubeArray", "ivec2", "ivec3", "ivec4", "layout", "long", "lowp",
+		"mat2", "mat2x2", "mat2x3", "mat2x4", "mat3", "mat3x2", "mat3x3", "mat3x4", "mat4", "mat4x2", "mat4x3", "mat4x4", "mediump",
+		"namespace", "noinline", "noperspective", "out", "output", "packed", "partition", "patch", "precise", "precision", "public", "readonly",
+		"resource", "restrict", "return", "sample", "sampler1D", "sampler1DArray", "sampler1DArrayShadow",
 		"sampler1DShadow", "sampler2D", "sampler2DArray", "sampler2DArrayShadow", "sampler2DMS", "sampler2DMSArray",
 		"sampler2DRect", "sampler2DRectShadow", "sampler2DShadow", "sampler3D", "sampler3DRect", "samplerBuffer",
-		"samplerCube", "samplerCubeArray", "samplerCubeArrayShadow", "samplerCubeShadow", "short", "sizeof", "smooth", "static",
+		"samplerCube", "samplerCubeArray", "samplerCubeArrayShadow", "samplerCubeShadow", "shared", "short", "sizeof", "smooth", "static",
 		"struct", "subroutine", "superp", "switch", "template", "this", "true", "typedef", "uimage1D", "uimage1DArray", "uimage2D",
 		"uimage2DArray", "uimage2DMS", "uimage2DMSArray", "uimage2DRect", "uimage3D", "uimageBuffer", "uimageCube",
 		"uimageCubeArray", "uint", "uniform", "union", "unsigned", "usampler1D", "usampler1DArray", "usampler2D", "usampler2DArray",
 		"usampler2DMS", "usampler2DMSArray", "usampler2DRect", "usampler3D", "usamplerBuffer", "usamplerCube",
-		"usamplerCubeArray", "using", "uvec2", "uvec3", "uvec4", "varying", "vec2", "vec3", "vec4", "void", "volatile", "volatile",
-		"while", "writeonly", "texture"
+		"usamplerCubeArray", "using", "uvec2", "uvec3", "uvec4", "varying", "vec2", "vec3", "vec4", "void", "volatile",
+		"while", "writeonly",
 	};
 	// clang-format on
 
@@ -1681,7 +1797,7 @@ void CompilerGLSL::replace_fragment_output(SPIRVariable &var)
 {
 	auto &m = meta[var.self].decoration;
 	uint32_t location = 0;
-	if (m.decoration_flags & (1ull << DecorationLocation))
+	if (m.decoration_flags.get(DecorationLocation))
 		location = m.location;
 
 	// If our variable is arrayed, we must not emit the array part of this as the SPIR-V will
@@ -1694,7 +1810,7 @@ void CompilerGLSL::replace_fragment_output(SPIRVariable &var)
 		m.alias = join("gl_FragData[", location, "]");
 
 		if (is_legacy_es() && location != 0)
-			require_extension("GL_EXT_draw_buffers");
+			require_extension_internal("GL_EXT_draw_buffers");
 	}
 	else if (type.array.size() == 1)
 	{
@@ -1707,7 +1823,7 @@ void CompilerGLSL::replace_fragment_output(SPIRVariable &var)
 			                  "This is unimplemented in SPIRV-Cross.");
 
 		if (is_legacy_es())
-			require_extension("GL_EXT_draw_buffers");
+			require_extension_internal("GL_EXT_draw_buffers");
 	}
 	else
 		SPIRV_CROSS_THROW("Array-of-array output variable used. This cannot be implemented in legacy GLSL.");
@@ -1739,6 +1855,7 @@ string CompilerGLSL::remap_swizzle(const SPIRType &out_type, uint32_t input_comp
 		return join(type_to_glsl(out_type), "(", expr, ")");
 	else
 	{
+		// FIXME: This will not work with packed expressions.
 		auto e = enclose_expression(expr) + ".";
 		// Just clamp the swizzle index if we have more outputs than inputs.
 		for (uint32_t c = 0; c < out_type.vecsize; c++)
@@ -1800,18 +1917,19 @@ void CompilerGLSL::fixup_image_load_store_access()
 			// If any no-read/no-write flags are actually set, assume that the compiler knows what it's doing.
 
 			auto &flags = meta.at(var).decoration.decoration_flags;
-			static const uint64_t NoWrite = 1ull << DecorationNonWritable;
-			static const uint64_t NoRead = 1ull << DecorationNonReadable;
-			if ((flags & (NoWrite | NoRead)) == 0)
-				flags |= NoRead | NoWrite;
+			if (!flags.get(DecorationNonWritable) && !flags.get(DecorationNonReadable))
+			{
+				flags.set(DecorationNonWritable);
+				flags.set(DecorationNonReadable);
+			}
 		}
 	}
 }
 
 void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionModel model)
 {
-	uint64_t emitted_builtins = 0;
-	uint64_t global_builtins = 0;
+	Bitset emitted_builtins;
+	Bitset global_builtins;
 	const SPIRVariable *block_var = nullptr;
 	bool emitted_block = false;
 	bool builtin_array = false;
@@ -1829,7 +1947,7 @@ void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionMo
 		auto &var = id.get<SPIRVariable>();
 		auto &type = get<SPIRType>(var.basetype);
 		bool block = has_decoration(type.self, DecorationBlock);
-		uint64_t builtins = 0;
+		Bitset builtins;
 
 		if (var.storage == storage && block && is_builtin_variable(var))
 		{
@@ -1838,7 +1956,7 @@ void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionMo
 			{
 				if (m.builtin)
 				{
-					builtins |= 1ull << m.builtin_type;
+					builtins.set(m.builtin_type);
 					if (m.builtin_type == BuiltInCullDistance)
 						cull_distance_size = get<SPIRType>(type.member_types[index]).array.front();
 					else if (m.builtin_type == BuiltInClipDistance)
@@ -1853,7 +1971,7 @@ void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionMo
 			auto &m = meta[var.self].decoration;
 			if (m.builtin)
 			{
-				global_builtins |= 1ull << m.builtin_type;
+				global_builtins.set(m.builtin_type);
 				if (m.builtin_type == BuiltInCullDistance)
 					cull_distance_size = type.array.front();
 				else if (m.builtin_type == BuiltInClipDistance)
@@ -1861,7 +1979,7 @@ void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionMo
 			}
 		}
 
-		if (!builtins)
+		if (builtins.empty())
 			continue;
 
 		if (emitted_block)
@@ -1873,15 +1991,16 @@ void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionMo
 		block_var = &var;
 	}
 
-	global_builtins &= (1ull << BuiltInPosition) | (1ull << BuiltInPointSize) | (1ull << BuiltInClipDistance) |
-	                   (1ull << BuiltInCullDistance);
+	global_builtins =
+	    Bitset(global_builtins.get_lower() & ((1ull << BuiltInPosition) | (1ull << BuiltInPointSize) |
+	                                          (1ull << BuiltInClipDistance) | (1ull << BuiltInCullDistance)));
 
 	// Try to collect all other declared builtins.
 	if (!emitted_block)
 		emitted_builtins = global_builtins;
 
 	// Can't declare an empty interface block.
-	if (!emitted_builtins)
+	if (emitted_builtins.empty())
 		return;
 
 	if (storage == StorageClassOutput)
@@ -1890,13 +2009,13 @@ void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionMo
 		statement("in gl_PerVertex");
 
 	begin_scope();
-	if (emitted_builtins & (1ull << BuiltInPosition))
+	if (emitted_builtins.get(BuiltInPosition))
 		statement("vec4 gl_Position;");
-	if (emitted_builtins & (1ull << BuiltInPointSize))
+	if (emitted_builtins.get(BuiltInPointSize))
 		statement("float gl_PointSize;");
-	if (emitted_builtins & (1ull << BuiltInClipDistance))
+	if (emitted_builtins.get(BuiltInClipDistance))
 		statement("float gl_ClipDistance[", clip_distance_size, "];");
-	if (emitted_builtins & (1ull << BuiltInCullDistance))
+	if (emitted_builtins.get(BuiltInCullDistance))
 		statement("float gl_CullDistance[", cull_distance_size, "];");
 
 	bool tessellation = model == ExecutionModelTessellationEvaluation || model == ExecutionModelTessellationControl;
@@ -2010,6 +2129,11 @@ void CompilerGLSL::emit_resources()
 				emit_specialization_constant(c);
 				emitted = true;
 			}
+			else if (id.get_type() == TypeConstantOp)
+			{
+				emit_specialization_constant_op(id.get<SPIRConstantOp>());
+				emitted = true;
+			}
 		}
 	}
 
@@ -2025,8 +2149,8 @@ void CompilerGLSL::emit_resources()
 		{
 			auto &type = id.get<SPIRType>();
 			if (type.basetype == SPIRType::Struct && type.array.empty() && !type.pointer &&
-			    (meta[type.self].decoration.decoration_flags &
-			     ((1ull << DecorationBlock) | (1ull << DecorationBufferBlock))) == 0)
+			    (!meta[type.self].decoration.decoration_flags.get(DecorationBlock) &&
+			     !meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock)))
 			{
 				emit_struct(type);
 			}
@@ -2042,8 +2166,8 @@ void CompilerGLSL::emit_resources()
 			auto &type = get<SPIRType>(var.basetype);
 
 			bool is_block_storage = type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform;
-			bool has_block_flags = (meta[type.self].decoration.decoration_flags &
-			                        ((1ull << DecorationBlock) | (1ull << DecorationBufferBlock))) != 0;
+			bool has_block_flags = meta[type.self].decoration.decoration_flags.get(DecorationBlock) ||
+			                       meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
 
 			if (var.storage != StorageClassFunction && type.pointer && is_block_storage && !is_hidden_variable(var) &&
 			    has_block_flags)
@@ -2111,11 +2235,6 @@ void CompilerGLSL::emit_resources()
 			auto &var = id.get<SPIRVariable>();
 			auto &type = get<SPIRType>(var.basetype);
 
-			// HLSL output from glslang may emit interface variables which are "empty".
-			// Just avoid declaring them.
-			if (type_is_empty(type))
-				continue;
-
 			if (var.storage != StorageClassFunction && type.pointer &&
 			    (var.storage == StorageClassInput || var.storage == StorageClassOutput) &&
 			    interface_variable_exists_in_entry_point(var.self) && !is_hidden_variable(var))
@@ -2254,6 +2373,37 @@ string CompilerGLSL::to_enclosed_expression(uint32_t id)
 	return enclose_expression(to_expression(id));
 }
 
+string CompilerGLSL::to_unpacked_expression(uint32_t id)
+{
+	// If we need to transpose, it will also take care of unpacking rules.
+	auto *e = maybe_get<SPIRExpression>(id);
+	bool need_transpose = e && e->need_transpose;
+	if (!need_transpose && has_decoration(id, DecorationCPacked))
+		return unpack_expression_type(to_expression(id), expression_type(id));
+	else
+		return to_expression(id);
+}
+
+string CompilerGLSL::to_enclosed_unpacked_expression(uint32_t id)
+{
+	// If we need to transpose, it will also take care of unpacking rules.
+	auto *e = maybe_get<SPIRExpression>(id);
+	bool need_transpose = e && e->need_transpose;
+	if (!need_transpose && has_decoration(id, DecorationCPacked))
+		return unpack_expression_type(to_expression(id), expression_type(id));
+	else
+		return to_enclosed_expression(id);
+}
+
+string CompilerGLSL::to_extract_component_expression(uint32_t id, uint32_t index)
+{
+	auto expr = to_enclosed_expression(id);
+	if (has_decoration(id, DecorationCPacked))
+		return join(expr, "[", index, "]");
+	else
+		return join(expr, ".", index_to_swizzle(index));
+}
+
 string CompilerGLSL::to_expression(uint32_t id)
 {
 	auto itr = invalid_expressions.find(id);
@@ -2327,7 +2477,10 @@ string CompilerGLSL::to_expression(uint32_t id)
 	}
 
 	case TypeConstantOp:
-		return constant_op_expression(get<SPIRConstantOp>(id));
+		if (options.vulkan_semantics)
+			return to_name(id);
+		else
+			return constant_op_expression(get<SPIRConstantOp>(id));
 
 	case TypeVariable:
 	{
@@ -2379,6 +2532,9 @@ string CompilerGLSL::constant_op_expression(const SPIRConstantOp &cop)
 	bool unary = false;
 	string op;
 
+	if (is_legacy() && is_unsigned_opcode(cop.opcode))
+		SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy targets.");
+
 	// TODO: Find a clean way to reuse emit_instruction.
 	switch (cop.opcode)
 	{
@@ -2450,6 +2606,41 @@ string CompilerGLSL::constant_op_expression(const SPIRConstantOp &cop)
 		break;
 	}
 
+	case OpVectorShuffle:
+	{
+		string expr = type_to_glsl_constructor(type);
+		expr += "(";
+
+		uint32_t left_components = expression_type(cop.arguments[0]).vecsize;
+		string left_arg = to_enclosed_expression(cop.arguments[0]);
+		string right_arg = to_enclosed_expression(cop.arguments[1]);
+
+		for (uint32_t i = 2; i < uint32_t(cop.arguments.size()); i++)
+		{
+			uint32_t index = cop.arguments[i];
+			if (index >= left_components)
+				expr += right_arg + "." + "xyzw"[index - left_components];
+			else
+				expr += left_arg + "." + "xyzw"[index];
+
+			if (i + 1 < uint32_t(cop.arguments.size()))
+				expr += ", ";
+		}
+
+		expr += ")";
+		return expr;
+	}
+
+	case OpCompositeExtract:
+	{
+		auto expr =
+		    access_chain_internal(cop.arguments[0], &cop.arguments[1], uint32_t(cop.arguments.size() - 1), true, false);
+		return expr;
+	}
+
+	case OpCompositeInsert:
+		SPIRV_CROSS_THROW("OpCompositeInsert spec constant op is not supported.");
+
 	default:
 		// Some opcodes are unimplemented here, these are currently not possible to test from glslang.
 		SPIRV_CROSS_THROW("Unimplemented spec constant op.");
@@ -2566,6 +2757,61 @@ string CompilerGLSL::constant_expression(const SPIRConstant &c)
 #pragma warning(disable : 4996)
 #endif
 
+string CompilerGLSL::convert_half_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
+{
+	string res;
+	float float_value = c.scalar_f16(col, row);
+
+	if (std::isnan(float_value) || std::isinf(float_value))
+	{
+		if (backend.half_literal_suffix)
+		{
+			// There is no uintBitsToFloat for 16-bit, so have to rely on legacy fallback here.
+			if (float_value == numeric_limits<float>::infinity())
+				res = join("(1.0", backend.half_literal_suffix, " / 0.0", backend.half_literal_suffix, ")");
+			else if (float_value == -numeric_limits<float>::infinity())
+				res = join("(-1.0", backend.half_literal_suffix, " / 0.0", backend.half_literal_suffix, ")");
+			else if (std::isnan(float_value))
+				res = join("(0.0", backend.half_literal_suffix, " / 0.0", backend.half_literal_suffix, ")");
+			else
+				SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
+		}
+		else
+		{
+			SPIRType type;
+			type.basetype = SPIRType::Half;
+			type.vecsize = 1;
+			type.columns = 1;
+
+			if (float_value == numeric_limits<float>::infinity())
+				res = join(type_to_glsl(type), "(1.0 / 0.0)");
+			else if (float_value == -numeric_limits<float>::infinity())
+				res = join(type_to_glsl(type), "(-1.0 / 0.0)");
+			else if (std::isnan(float_value))
+				res = join(type_to_glsl(type), "(0.0 / 0.0)");
+			else
+				SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
+		}
+	}
+	else
+	{
+		if (backend.half_literal_suffix)
+			res = convert_to_string(float_value) + backend.half_literal_suffix;
+		else
+		{
+			// In HLSL (FXC), it's important to cast the literals to half precision right away.
+			// There is no literal for it.
+			SPIRType type;
+			type.basetype = SPIRType::Half;
+			type.vecsize = 1;
+			type.columns = 1;
+			res = join(type_to_glsl(type), "(", convert_to_string(float_value), ")");
+		}
+	}
+
+	return res;
+}
+
 string CompilerGLSL::convert_float_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
 {
 	string res;
@@ -2649,7 +2895,7 @@ std::string CompilerGLSL::convert_double_to_string(const SPIRConstant &c, uint32
 
 			if (options.es)
 				SPIRV_CROSS_THROW("64-bit integers/float not supported in ES profile.");
-			require_extension("GL_ARB_gpu_shader_int64");
+			require_extension_internal("GL_ARB_gpu_shader_int64");
 
 			char print_buffer[64];
 			sprintf(print_buffer, "0x%llx%s", static_cast<unsigned long long>(u64_value),
@@ -2661,7 +2907,7 @@ std::string CompilerGLSL::convert_double_to_string(const SPIRConstant &c, uint32
 			if (options.es)
 				SPIRV_CROSS_THROW("FP64 not supported in ES profile.");
 			if (options.version < 400)
-				require_extension("GL_ARB_gpu_shader_fp64");
+				require_extension_internal("GL_ARB_gpu_shader_fp64");
 
 			if (double_value == numeric_limits<double>::infinity())
 			{
@@ -2711,7 +2957,7 @@ string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t
 	bool splat = backend.use_constructor_splatting && c.vector_size() > 1;
 	bool swizzle_splat = backend.can_swizzle_scalar && c.vector_size() > 1;
 
-	if (type.basetype != SPIRType::Float && type.basetype != SPIRType::Double)
+	if (!type_is_floating_point(type))
 	{
 		// Cannot swizzle literal integers as a special case.
 		swizzle_splat = false;
@@ -2765,6 +3011,28 @@ string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t
 
 	switch (type.basetype)
 	{
+	case SPIRType::Half:
+		if (splat || swizzle_splat)
+		{
+			res += convert_half_to_string(c, vector, 0);
+			if (swizzle_splat)
+				res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
+		}
+		else
+		{
+			for (uint32_t i = 0; i < c.vector_size(); i++)
+			{
+				if (options.vulkan_semantics && c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
+					res += to_name(c.specialization_constant_id(vector, i));
+				else
+					res += convert_half_to_string(c, vector, i);
+
+				if (i + 1 < c.vector_size())
+					res += ", ";
+			}
+		}
+		break;
+
 	case SPIRType::Float:
 		if (splat || swizzle_splat)
 		{
@@ -2873,7 +3141,14 @@ string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t
 		if (splat)
 		{
 			res += convert_to_string(c.scalar(vector, 0));
-			if (backend.uint32_t_literal_suffix)
+			if (is_legacy())
+			{
+				// Fake unsigned constant literals with signed ones if possible.
+				// Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed.
+				if (c.scalar_i32(vector, 0) < 0)
+					SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made the literal negative.");
+			}
+			else if (backend.uint32_t_literal_suffix)
 				res += "u";
 		}
 		else
@@ -2885,7 +3160,15 @@ string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t
 				else
 				{
 					res += convert_to_string(c.scalar(vector, i));
-					if (backend.uint32_t_literal_suffix)
+					if (is_legacy())
+					{
+						// Fake unsigned constant literals with signed ones if possible.
+						// Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed.
+						if (c.scalar_i32(vector, i) < 0)
+							SPIRV_CROSS_THROW(
+							    "Tried to convert uint literal into int, but this made the literal negative.");
+					}
+					else if (backend.uint32_t_literal_suffix)
 						res += "u";
 				}
 
@@ -2970,6 +3253,7 @@ string CompilerGLSL::declare_temporary(uint32_t result_type, uint32_t result_id)
 	else
 	{
 		// The result_id has not been made into an expression yet, so use flags interface.
+		add_local_variable_name(result_id);
 		return join(flags_to_precision_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), " = ");
 	}
 }
@@ -3002,15 +3286,15 @@ SPIRExpression &CompilerGLSL::emit_op(uint32_t result_type, uint32_t result_id,
 void CompilerGLSL::emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
 {
 	bool forward = should_forward(op0);
-	emit_op(result_type, result_id, join(op, to_enclosed_expression(op0)), forward);
+	emit_op(result_type, result_id, join(op, to_enclosed_unpacked_expression(op0)), forward);
 	inherit_expression_dependencies(result_id, op0);
 }
 
 void CompilerGLSL::emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op)
 {
 	bool forward = should_forward(op0) && should_forward(op1);
-	emit_op(result_type, result_id, join(to_enclosed_expression(op0), " ", op, " ", to_enclosed_expression(op1)),
-	        forward);
+	emit_op(result_type, result_id,
+	        join(to_enclosed_unpacked_expression(op0), " ", op, " ", to_enclosed_unpacked_expression(op1)), forward);
 
 	inherit_expression_dependencies(result_id, op0);
 	inherit_expression_dependencies(result_id, op1);
@@ -3026,9 +3310,7 @@ void CompilerGLSL::emit_unrolled_unary_op(uint32_t result_type, uint32_t result_
 		// Make sure to call to_expression multiple times to ensure
 		// that these expressions are properly flushed to temporaries if needed.
 		expr += op;
-		expr += to_enclosed_expression(operand);
-		expr += '.';
-		expr += index_to_swizzle(i);
+		expr += to_extract_component_expression(operand, i);
 
 		if (i + 1 < type.vecsize)
 			expr += ", ";
@@ -3049,15 +3331,11 @@ void CompilerGLSL::emit_unrolled_binary_op(uint32_t result_type, uint32_t result
 	{
 		// Make sure to call to_expression multiple times to ensure
 		// that these expressions are properly flushed to temporaries if needed.
-		expr += to_enclosed_expression(op0);
-		expr += '.';
-		expr += index_to_swizzle(i);
+		expr += to_extract_component_expression(op0, i);
 		expr += ' ';
 		expr += op;
 		expr += ' ';
-		expr += to_enclosed_expression(op1);
-		expr += '.';
-		expr += index_to_swizzle(i);
+		expr += to_extract_component_expression(op1, i);
 
 		if (i + 1 < type.vecsize)
 			expr += ", ";
@@ -3096,8 +3374,8 @@ SPIRType CompilerGLSL::binary_op_bitcast_helper(string &cast_op0, string &cast_o
 	else
 	{
 		// If we don't cast, our actual input type is that of the first (or second) argument.
-		cast_op0 = to_enclosed_expression(op0);
-		cast_op1 = to_enclosed_expression(op1);
+		cast_op0 = to_enclosed_unpacked_expression(op0);
+		cast_op1 = to_enclosed_unpacked_expression(op1);
 		input_type = type0.basetype;
 	}
 
@@ -3127,12 +3405,14 @@ void CompilerGLSL::emit_binary_op_cast(uint32_t result_type, uint32_t result_id,
 		expr += join(cast_op0, " ", op, " ", cast_op1);
 
 	emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
+	inherit_expression_dependencies(result_id, op0);
+	inherit_expression_dependencies(result_id, op1);
 }
 
 void CompilerGLSL::emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
 {
 	bool forward = should_forward(op0);
-	emit_op(result_type, result_id, join(op, "(", to_expression(op0), ")"), forward);
+	emit_op(result_type, result_id, join(op, "(", to_unpacked_expression(op0), ")"), forward);
 	inherit_expression_dependencies(result_id, op0);
 }
 
@@ -3140,7 +3420,8 @@ void CompilerGLSL::emit_binary_func_op(uint32_t result_type, uint32_t result_id,
                                        const char *op)
 {
 	bool forward = should_forward(op0) && should_forward(op1);
-	emit_op(result_type, result_id, join(op, "(", to_expression(op0), ", ", to_expression(op1), ")"), forward);
+	emit_op(result_type, result_id, join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ")"),
+	        forward);
 	inherit_expression_dependencies(result_id, op0);
 	inherit_expression_dependencies(result_id, op1);
 }
@@ -3168,6 +3449,8 @@ void CompilerGLSL::emit_binary_func_op_cast(uint32_t result_type, uint32_t resul
 	}
 
 	emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
+	inherit_expression_dependencies(result_id, op0);
+	inherit_expression_dependencies(result_id, op1);
 }
 
 void CompilerGLSL::emit_trinary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
@@ -3175,7 +3458,9 @@ void CompilerGLSL::emit_trinary_func_op(uint32_t result_type, uint32_t result_id
 {
 	bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2);
 	emit_op(result_type, result_id,
-	        join(op, "(", to_expression(op0), ", ", to_expression(op1), ", ", to_expression(op2), ")"), forward);
+	        join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ", ",
+	             to_unpacked_expression(op2), ")"),
+	        forward);
 
 	inherit_expression_dependencies(result_id, op0);
 	inherit_expression_dependencies(result_id, op1);
@@ -3187,8 +3472,8 @@ void CompilerGLSL::emit_quaternary_func_op(uint32_t result_type, uint32_t result
 {
 	bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3);
 	emit_op(result_type, result_id,
-	        join(op, "(", to_expression(op0), ", ", to_expression(op1), ", ", to_expression(op2), ", ",
-	             to_expression(op3), ")"),
+	        join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ", ",
+	             to_unpacked_expression(op2), ", ", to_unpacked_expression(op3), ")"),
 	        forward);
 
 	inherit_expression_dependencies(result_id, op0);
@@ -3246,15 +3531,15 @@ string CompilerGLSL::legacy_tex_op(const std::string &op, const SPIRType &imgtyp
 
 	bool use_explicit_lod = check_explicit_lod_allowed(lod);
 
-	if (op == "textureLod" || op == "textureProjLod")
+	if (op == "textureLod" || op == "textureProjLod" || op == "textureGrad")
 	{
 		if (is_legacy_es())
 		{
 			if (use_explicit_lod)
-				require_extension("GL_EXT_shader_texture_lod");
+				require_extension_internal("GL_EXT_shader_texture_lod");
 		}
 		else if (is_legacy())
-			require_extension("GL_ARB_shader_texture_lod");
+			require_extension_internal("GL_ARB_shader_texture_lod");
 	}
 
 	if (op == "texture")
@@ -3268,6 +3553,8 @@ string CompilerGLSL::legacy_tex_op(const std::string &op, const SPIRType &imgtyp
 	}
 	else if (op == "textureProj")
 		return join("texture", type, "Proj");
+	else if (op == "textureGrad")
+		return join("texture", type, is_legacy_es() ? "GradEXT" : is_legacy_desktop() ? "GradARB" : "Grad");
 	else if (op == "textureProjLod")
 	{
 		if (use_explicit_lod)
@@ -3309,6 +3596,10 @@ bool CompilerGLSL::to_trivial_mix_op(const SPIRType &type, string &op, uint32_t
 		ret = cleft->scalar() == 0 && cright->scalar() == 1;
 		break;
 
+	case SPIRType::Half:
+		ret = cleft->scalar_f16() == 0.0f && cright->scalar_f16() == 1.0f;
+		break;
+
 	case SPIRType::Float:
 		ret = cleft->scalar_f32() == 0.0f && cright->scalar_f32() == 1.0f;
 		break;
@@ -3368,7 +3659,7 @@ void CompilerGLSL::emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left,
 		else
 		{
 			auto swiz = [this](uint32_t expression, uint32_t i) {
-				return join(to_enclosed_expression(expression), ".", index_to_swizzle(i));
+				return to_extract_component_expression(expression, i);
 			};
 
 			expr = type_to_glsl_constructor(restype);
@@ -3387,6 +3678,9 @@ void CompilerGLSL::emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left,
 		}
 
 		emit_op(result_type, id, expr, should_forward(left) && should_forward(right) && should_forward(lerp));
+		inherit_expression_dependencies(id, left);
+		inherit_expression_dependencies(id, right);
+		inherit_expression_dependencies(id, lerp);
 	}
 	else
 		emit_trinary_func_op(result_type, id, left, right, lerp, "mix");
@@ -3394,6 +3688,14 @@ void CompilerGLSL::emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left,
 
 string CompilerGLSL::to_combined_image_sampler(uint32_t image_id, uint32_t samp_id)
 {
+	// Keep track of the array indices we have used to load the image.
+	// We'll need to use the same array index into the combined image sampler array.
+	auto image_expr = to_expression(image_id);
+	string array_expr;
+	auto array_index = image_expr.find_first_of('[');
+	if (array_index != string::npos)
+		array_expr = image_expr.substr(array_index, string::npos);
+
 	auto &args = current_function->arguments;
 
 	// For GLSL and ESSL targets, we must enumerate all possible combinations for sampler2D(texture2D, sampler) and redirect
@@ -3426,7 +3728,7 @@ string CompilerGLSL::to_combined_image_sampler(uint32_t image_id, uint32_t samp_
 		});
 
 		if (itr != end(combined))
-			return to_expression(itr->id);
+			return to_expression(itr->id) + array_expr;
 		else
 		{
 			SPIRV_CROSS_THROW(
@@ -3443,7 +3745,7 @@ string CompilerGLSL::to_combined_image_sampler(uint32_t image_id, uint32_t samp_
 		});
 
 		if (itr != end(combined_image_samplers))
-			return to_expression(itr->combined_id);
+			return to_expression(itr->combined_id) + array_expr;
 		else
 		{
 			SPIRV_CROSS_THROW("Cannot find mapping for combined sampler, was build_combined_image_samplers() used "
@@ -3458,9 +3760,15 @@ void CompilerGLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_i
 	{
 		emit_binary_func_op(result_type, result_id, image_id, samp_id,
 		                    type_to_glsl(get<SPIRType>(result_type)).c_str());
+
+		// Make sure to suppress usage tracking. It is illegal to create temporaries of opaque types.
+		forwarded_temporaries.erase(result_id);
 	}
 	else
-		emit_op(result_type, result_id, to_combined_image_sampler(image_id, samp_id), true);
+	{
+		// Make sure to suppress usage tracking. It is illegal to create temporaries of opaque types.
+		emit_op(result_type, result_id, to_combined_image_sampler(image_id, samp_id), true, true);
+	}
 }
 
 void CompilerGLSL::emit_texture_op(const Instruction &i)
@@ -3472,6 +3780,8 @@ void CompilerGLSL::emit_texture_op(const Instruction &i)
 	if (i.offset + length > spirv.size())
 		SPIRV_CROSS_THROW("Compiler::parse() opcode out of range.");
 
+	vector<uint32_t> inherited_expressions;
+
 	uint32_t result_type = ops[0];
 	uint32_t id = ops[1];
 	uint32_t img = ops[2];
@@ -3483,6 +3793,8 @@ void CompilerGLSL::emit_texture_op(const Instruction &i)
 	bool fetch = false;
 	const uint32_t *opt = nullptr;
 
+	inherited_expressions.push_back(coord);
+
 	switch (op)
 	{
 	case OpImageSampleDrefImplicitLod:
@@ -3561,6 +3873,9 @@ void CompilerGLSL::emit_texture_op(const Instruction &i)
 		break;
 	}
 
+	if (dref)
+		inherited_expressions.push_back(dref);
+
 	if (proj)
 		coord_components++;
 	if (imgtype.image.arrayed)
@@ -3586,6 +3901,7 @@ void CompilerGLSL::emit_texture_op(const Instruction &i)
 		if (length && (flags & flag))
 		{
 			v = *opt++;
+			inherited_expressions.push_back(v);
 			length--;
 		}
 	};
@@ -3609,6 +3925,21 @@ void CompilerGLSL::emit_texture_op(const Instruction &i)
 	expr += ")";
 
 	emit_op(result_type, id, expr, forward);
+	for (auto &inherit : inherited_expressions)
+		inherit_expression_dependencies(id, inherit);
+
+	switch (op)
+	{
+	case OpImageSampleDrefImplicitLod:
+	case OpImageSampleImplicitLod:
+	case OpImageSampleProjImplicitLod:
+	case OpImageSampleProjDrefImplicitLod:
+		register_control_dependent_expression(id);
+		break;
+
+	default:
+		break;
+	}
 }
 
 // Returns the function name for a texture sampling function for the specified image and sampling characteristics.
@@ -3657,41 +3988,48 @@ string CompilerGLSL::to_function_name(uint32_t, const SPIRType &imgtype, bool is
 	return is_legacy() ? legacy_tex_op(fname, imgtype, lod) : fname;
 }
 
-// Returns the function args for a texture sampling function for the specified image and sampling characteristics.
-string CompilerGLSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool, bool is_proj,
-                                      uint32_t coord, uint32_t coord_components, uint32_t dref, uint32_t grad_x,
-                                      uint32_t grad_y, uint32_t lod, uint32_t coffset, uint32_t offset, uint32_t bias,
-                                      uint32_t comp, uint32_t sample, bool *p_forward)
+std::string CompilerGLSL::convert_separate_image_to_combined(uint32_t id)
 {
-	string farg_str = to_expression(img);
+	auto &imgtype = expression_type(id);
+	auto *var = maybe_get_backing_variable(id);
 
-	if (is_fetch)
+	// If we are fetching from a plain OpTypeImage, we must combine with a dummy sampler.
+	if (var)
 	{
-		auto *var = maybe_get_backing_variable(img);
-
-		// If we are fetching from a plain OpTypeImage, we must combine with a dummy sampler.
-		if (var)
+		auto &type = get<SPIRType>(var->basetype);
+		if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer)
 		{
-			auto &type = get<SPIRType>(var->basetype);
-			if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer)
-			{
-				if (!dummy_sampler_id)
-					SPIRV_CROSS_THROW(
-					    "Cannot find dummy sampler ID. Was build_dummy_sampler_for_combined_images() called?");
+			if (!dummy_sampler_id)
+				SPIRV_CROSS_THROW(
+				    "Cannot find dummy sampler ID. Was build_dummy_sampler_for_combined_images() called?");
 
-				if (options.vulkan_semantics)
-				{
-					auto sampled_type = imgtype;
-					sampled_type.basetype = SPIRType::SampledImage;
-					farg_str = join(type_to_glsl(sampled_type), "(", to_expression(img), ", ",
-					                to_expression(dummy_sampler_id), ")");
-				}
-				else
-					farg_str = to_combined_image_sampler(img, dummy_sampler_id);
+			if (options.vulkan_semantics)
+			{
+				auto sampled_type = imgtype;
+				sampled_type.basetype = SPIRType::SampledImage;
+				return join(type_to_glsl(sampled_type), "(", to_expression(id), ", ", to_expression(dummy_sampler_id),
+				            ")");
 			}
+			else
+				return to_combined_image_sampler(id, dummy_sampler_id);
 		}
 	}
 
+	return to_expression(id);
+}
+
+// Returns the function args for a texture sampling function for the specified image and sampling characteristics.
+string CompilerGLSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather,
+                                      bool is_proj, uint32_t coord, uint32_t coord_components, uint32_t dref,
+                                      uint32_t grad_x, uint32_t grad_y, uint32_t lod, uint32_t coffset, uint32_t offset,
+                                      uint32_t bias, uint32_t comp, uint32_t sample, bool *p_forward)
+{
+	string farg_str;
+	if (is_fetch)
+		farg_str = convert_separate_image_to_combined(img);
+	else
+		farg_str = to_expression(img);
+
 	bool swizz_func = backend.swizzle_is_function;
 	auto swizzle = [swizz_func](uint32_t comps, uint32_t in_comps) -> const char * {
 		if (comps == in_comps)
@@ -3739,7 +4077,7 @@ string CompilerGLSL::to_function_args(uint32_t img, const SPIRType &imgtype, boo
 		forward = forward && should_forward(dref);
 
 		// SPIR-V splits dref and coordinate.
-		if (coord_components == 4) // GLSL also splits the arguments in two.
+		if (is_gather || coord_components == 4) // GLSL also splits the arguments in two. Same for textureGather.
 		{
 			farg_str += ", ";
 			farg_str += to_expression(coord);
@@ -3826,6 +4164,11 @@ string CompilerGLSL::to_function_args(uint32_t img, const SPIRType &imgtype, boo
 			}
 		}
 	}
+	else if (is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms)
+	{
+		// Lod argument is optional in OpImageFetch, but we require a LOD value, pick 0 as the default.
+		farg_str += ", 0";
+	}
 
 	if (coffset)
 	{
@@ -3867,7 +4210,10 @@ string CompilerGLSL::to_function_args(uint32_t img, const SPIRType &imgtype, boo
 
 void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t)
 {
-	GLSLstd450 op = static_cast<GLSLstd450>(eop);
+	auto op = static_cast<GLSLstd450>(eop);
+
+	if (is_legacy() && is_unsigned_glsl_opcode(op))
+		SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy GLSL targets.");
 
 	switch (op)
 	{
@@ -3932,8 +4278,8 @@ void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 	}
 
 	// Minmax
-	case GLSLstd450FMin:
 	case GLSLstd450UMin:
+	case GLSLstd450FMin:
 	case GLSLstd450SMin:
 		emit_binary_func_op(result_type, id, args[0], args[1], "min");
 		break;
@@ -4147,7 +4493,7 @@ void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 void CompilerGLSL::emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args,
                                                  uint32_t)
 {
-	require_extension("GL_AMD_shader_ballot");
+	require_extension_internal("GL_AMD_shader_ballot");
 
 	enum AMDShaderBallot
 	{
@@ -4163,18 +4509,22 @@ void CompilerGLSL::emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t
 	{
 	case SwizzleInvocationsAMD:
 		emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsAMD");
+		register_control_dependent_expression(id);
 		break;
 
 	case SwizzleInvocationsMaskedAMD:
 		emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsMaskedAMD");
+		register_control_dependent_expression(id);
 		break;
 
 	case WriteInvocationAMD:
 		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "writeInvocationAMD");
+		register_control_dependent_expression(id);
 		break;
 
 	case MbcntAMD:
 		emit_unary_func_op(result_type, id, args[0], "mbcntAMD");
+		register_control_dependent_expression(id);
 		break;
 
 	default:
@@ -4186,7 +4536,7 @@ void CompilerGLSL::emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t
 void CompilerGLSL::emit_spv_amd_shader_explicit_vertex_parameter_op(uint32_t result_type, uint32_t id, uint32_t eop,
                                                                     const uint32_t *args, uint32_t)
 {
-	require_extension("GL_AMD_shader_explicit_vertex_parameter");
+	require_extension_internal("GL_AMD_shader_explicit_vertex_parameter");
 
 	enum AMDShaderExplicitVertexParameter
 	{
@@ -4210,7 +4560,7 @@ void CompilerGLSL::emit_spv_amd_shader_explicit_vertex_parameter_op(uint32_t res
 void CompilerGLSL::emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t id, uint32_t eop,
                                                          const uint32_t *args, uint32_t)
 {
-	require_extension("GL_AMD_shader_trinary_minmax");
+	require_extension_internal("GL_AMD_shader_trinary_minmax");
 
 	enum AMDShaderTrinaryMinMax
 	{
@@ -4256,7 +4606,7 @@ void CompilerGLSL::emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, u
 void CompilerGLSL::emit_spv_amd_gcn_shader_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args,
                                               uint32_t)
 {
-	require_extension("GL_AMD_gcn_shader");
+	require_extension_internal("GL_AMD_gcn_shader");
 
 	enum AMDGCNShader
 	{
@@ -4279,6 +4629,7 @@ void CompilerGLSL::emit_spv_amd_gcn_shader_op(uint32_t result_type, uint32_t id,
 	{
 		string expr = "timeAMD()";
 		emit_op(result_type, id, expr, true);
+		register_control_dependent_expression(id);
 		break;
 	}
 
@@ -4288,6 +4639,228 @@ void CompilerGLSL::emit_spv_amd_gcn_shader_op(uint32_t result_type, uint32_t id,
 	}
 }
 
+void CompilerGLSL::emit_subgroup_op(const Instruction &i)
+{
+	const uint32_t *ops = stream(i);
+	auto op = static_cast<Op>(i.op);
+
+	if (!options.vulkan_semantics)
+		SPIRV_CROSS_THROW("Can only use subgroup operations in Vulkan semantics.");
+
+	switch (op)
+	{
+	case OpGroupNonUniformElect:
+		require_extension_internal("GL_KHR_shader_subgroup_basic");
+		break;
+
+	case OpGroupNonUniformBroadcast:
+	case OpGroupNonUniformBroadcastFirst:
+	case OpGroupNonUniformBallot:
+	case OpGroupNonUniformInverseBallot:
+	case OpGroupNonUniformBallotBitExtract:
+	case OpGroupNonUniformBallotBitCount:
+	case OpGroupNonUniformBallotFindLSB:
+	case OpGroupNonUniformBallotFindMSB:
+		require_extension_internal("GL_KHR_shader_subgroup_ballot");
+		break;
+
+	case OpGroupNonUniformShuffle:
+	case OpGroupNonUniformShuffleXor:
+		require_extension_internal("GL_KHR_shader_subgroup_shuffle");
+		break;
+
+	case OpGroupNonUniformShuffleUp:
+	case OpGroupNonUniformShuffleDown:
+		require_extension_internal("GL_KHR_shader_subgroup_shuffle_relative");
+		break;
+
+	case OpGroupNonUniformAll:
+	case OpGroupNonUniformAny:
+	case OpGroupNonUniformAllEqual:
+		require_extension_internal("GL_KHR_shader_subgroup_vote");
+		break;
+
+	case OpGroupNonUniformFAdd:
+	case OpGroupNonUniformFMul:
+	case OpGroupNonUniformFMin:
+	case OpGroupNonUniformFMax:
+	case OpGroupNonUniformIAdd:
+	case OpGroupNonUniformIMul:
+	case OpGroupNonUniformSMin:
+	case OpGroupNonUniformSMax:
+	case OpGroupNonUniformUMin:
+	case OpGroupNonUniformUMax:
+	case OpGroupNonUniformBitwiseAnd:
+	case OpGroupNonUniformBitwiseOr:
+	case OpGroupNonUniformBitwiseXor:
+	{
+		auto operation = static_cast<GroupOperation>(ops[3]);
+		if (operation == GroupOperationClusteredReduce)
+		{
+			require_extension_internal("GL_KHR_shader_subgroup_clustered");
+		}
+		else if (operation == GroupOperationExclusiveScan || operation == GroupOperationInclusiveScan ||
+		         operation == GroupOperationReduce)
+		{
+			require_extension_internal("GL_KHR_shader_subgroup_arithmetic");
+		}
+		else
+			SPIRV_CROSS_THROW("Invalid group operation.");
+		break;
+	}
+
+	case OpGroupNonUniformQuadSwap:
+	case OpGroupNonUniformQuadBroadcast:
+		require_extension_internal("GL_KHR_shader_subgroup_quad");
+		break;
+
+	default:
+		SPIRV_CROSS_THROW("Invalid opcode for subgroup.");
+	}
+
+	uint32_t result_type = ops[0];
+	uint32_t id = ops[1];
+
+	auto scope = static_cast<Scope>(get<SPIRConstant>(ops[2]).scalar());
+	if (scope != ScopeSubgroup)
+		SPIRV_CROSS_THROW("Only subgroup scope is supported.");
+
+	switch (op)
+	{
+	case OpGroupNonUniformElect:
+		emit_op(result_type, id, "subgroupElect()", true);
+		break;
+
+	case OpGroupNonUniformBroadcast:
+		emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupBroadcast");
+		break;
+
+	case OpGroupNonUniformBroadcastFirst:
+		emit_unary_func_op(result_type, id, ops[3], "subgroupBroadcastFirst");
+		break;
+
+	case OpGroupNonUniformBallot:
+		emit_unary_func_op(result_type, id, ops[3], "subgroupBallot");
+		break;
+
+	case OpGroupNonUniformInverseBallot:
+		emit_unary_func_op(result_type, id, ops[3], "subgroupInverseBallot");
+		break;
+
+	case OpGroupNonUniformBallotBitExtract:
+		emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupBallotBitExtract");
+		break;
+
+	case OpGroupNonUniformBallotFindLSB:
+		emit_unary_func_op(result_type, id, ops[3], "subgroupBallotFindLSB");
+		break;
+
+	case OpGroupNonUniformBallotFindMSB:
+		emit_unary_func_op(result_type, id, ops[3], "subgroupBallotFindMSB");
+		break;
+
+	case OpGroupNonUniformBallotBitCount:
+	{
+		auto operation = static_cast<GroupOperation>(ops[3]);
+		if (operation == GroupOperationReduce)
+			emit_unary_func_op(result_type, id, ops[4], "subgroupBallotBitCount");
+		else if (operation == GroupOperationInclusiveScan)
+			emit_unary_func_op(result_type, id, ops[4], "subgroupBallotInclusiveBitCount");
+		else if (operation == GroupOperationExclusiveScan)
+			emit_unary_func_op(result_type, id, ops[4], "subgroupBallotExclusiveBitCount");
+		else
+			SPIRV_CROSS_THROW("Invalid BitCount operation.");
+		break;
+	}
+
+	case OpGroupNonUniformShuffle:
+		emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffle");
+		break;
+
+	case OpGroupNonUniformShuffleXor:
+		emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleXor");
+		break;
+
+	case OpGroupNonUniformShuffleUp:
+		emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleUp");
+		break;
+
+	case OpGroupNonUniformShuffleDown:
+		emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleDown");
+		break;
+
+	case OpGroupNonUniformAll:
+		emit_unary_func_op(result_type, id, ops[3], "subgroupAll");
+		break;
+
+	case OpGroupNonUniformAny:
+		emit_unary_func_op(result_type, id, ops[3], "subgroupAny");
+		break;
+
+	case OpGroupNonUniformAllEqual:
+		emit_unary_func_op(result_type, id, ops[3], "subgroupAllEqual");
+		break;
+
+		// clang-format off
+#define GROUP_OP(op, glsl_op) \
+case OpGroupNonUniform##op: \
+	{ \
+		auto operation = static_cast<GroupOperation>(ops[3]); \
+		if (operation == GroupOperationReduce) \
+			emit_unary_func_op(result_type, id, ops[4], "subgroup" #glsl_op); \
+		else if (operation == GroupOperationInclusiveScan) \
+			emit_unary_func_op(result_type, id, ops[4], "subgroupInclusive" #glsl_op); \
+		else if (operation == GroupOperationExclusiveScan) \
+			emit_unary_func_op(result_type, id, ops[4], "subgroupExclusive" #glsl_op); \
+		else if (operation == GroupOperationClusteredReduce) \
+			emit_binary_func_op(result_type, id, ops[4], ops[5], "subgroupClustered" #glsl_op); \
+		else \
+			SPIRV_CROSS_THROW("Invalid group operation."); \
+		break; \
+	}
+	GROUP_OP(FAdd, Add)
+	GROUP_OP(FMul, Mul)
+	GROUP_OP(FMin, Min)
+	GROUP_OP(FMax, Max)
+	GROUP_OP(IAdd, Add)
+	GROUP_OP(IMul, Mul)
+	GROUP_OP(SMin, Min)
+	GROUP_OP(SMax, Max)
+	GROUP_OP(UMin, Min)
+	GROUP_OP(UMax, Max)
+	GROUP_OP(BitwiseAnd, And)
+	GROUP_OP(BitwiseOr, Or)
+	GROUP_OP(BitwiseXor, Xor)
+#undef GROUP_OP
+		// clang-format on
+
+	case OpGroupNonUniformQuadSwap:
+	{
+		uint32_t direction = get<SPIRConstant>(ops[4]).scalar();
+		if (direction == 0)
+			emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapHorizontal");
+		else if (direction == 1)
+			emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapVertical");
+		else if (direction == 2)
+			emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapDiagonal");
+		else
+			SPIRV_CROSS_THROW("Invalid quad swap direction.");
+		break;
+	}
+
+	case OpGroupNonUniformQuadBroadcast:
+	{
+		emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupQuadBroadcast");
+		break;
+	}
+
+	default:
+		SPIRV_CROSS_THROW("Invalid opcode for subgroup.");
+	}
+
+	register_control_dependent_expression(id);
+}
+
 string CompilerGLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type)
 {
 	if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Int)
@@ -4316,6 +4889,10 @@ string CompilerGLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &i
 		return "uint64BitsToDouble";
 	else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UInt && in_type.vecsize == 2)
 		return "packUint2x32";
+	else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1)
+		return "unpackFloat2x16";
+	else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Half && in_type.vecsize == 2)
+		return "packFloat2x16";
 	else
 		return "";
 }
@@ -4423,14 +5000,14 @@ string CompilerGLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage)
 
 	case BuiltInSampleId:
 		if (options.es && options.version < 320)
-			require_extension("GL_OES_sample_variables");
+			require_extension_internal("GL_OES_sample_variables");
 		if (!options.es && options.version < 400)
 			SPIRV_CROSS_THROW("gl_SampleID not supported before GLSL 400.");
 		return "gl_SampleID";
 
 	case BuiltInSampleMask:
 		if (options.es && options.version < 320)
-			require_extension("GL_OES_sample_variables");
+			require_extension_internal("GL_OES_sample_variables");
 		if (!options.es && options.version < 400)
 			SPIRV_CROSS_THROW("gl_SampleMask/gl_SampleMaskIn not supported before GLSL 400.");
 
@@ -4441,7 +5018,7 @@ string CompilerGLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage)
 
 	case BuiltInSamplePosition:
 		if (options.es && options.version < 320)
-			require_extension("GL_OES_sample_variables");
+			require_extension_internal("GL_OES_sample_variables");
 		if (!options.es && options.version < 400)
 			SPIRV_CROSS_THROW("gl_SamplePosition not supported before GLSL 400.");
 		return "gl_SamplePosition";
@@ -4449,15 +5026,69 @@ string CompilerGLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage)
 	case BuiltInViewIndex:
 		if (options.vulkan_semantics)
 		{
-			require_extension("GL_EXT_multiview");
+			require_extension_internal("GL_EXT_multiview");
 			return "gl_ViewIndex";
 		}
 		else
 		{
-			require_extension("GL_OVR_multiview2");
+			require_extension_internal("GL_OVR_multiview2");
 			return "gl_ViewID_OVR";
 		}
 
+	case BuiltInNumSubgroups:
+		if (!options.vulkan_semantics)
+			SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup.");
+		require_extension_internal("GL_KHR_shader_subgroup_basic");
+		return "gl_NumSubgroups";
+
+	case BuiltInSubgroupId:
+		if (!options.vulkan_semantics)
+			SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup.");
+		require_extension_internal("GL_KHR_shader_subgroup_basic");
+		return "gl_SubgroupID";
+
+	case BuiltInSubgroupSize:
+		if (!options.vulkan_semantics)
+			SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup.");
+		require_extension_internal("GL_KHR_shader_subgroup_basic");
+		return "gl_SubgroupSize";
+
+	case BuiltInSubgroupLocalInvocationId:
+		if (!options.vulkan_semantics)
+			SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup.");
+		require_extension_internal("GL_KHR_shader_subgroup_basic");
+		return "gl_SubgroupInvocationID";
+
+	case BuiltInSubgroupEqMask:
+		if (!options.vulkan_semantics)
+			SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup.");
+		require_extension_internal("GL_KHR_shader_subgroup_ballot");
+		return "gl_SubgroupEqMask";
+
+	case BuiltInSubgroupGeMask:
+		if (!options.vulkan_semantics)
+			SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup.");
+		require_extension_internal("GL_KHR_shader_subgroup_ballot");
+		return "gl_SubgroupGeMask";
+
+	case BuiltInSubgroupGtMask:
+		if (!options.vulkan_semantics)
+			SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup.");
+		require_extension_internal("GL_KHR_shader_subgroup_ballot");
+		return "gl_SubgroupGtMask";
+
+	case BuiltInSubgroupLeMask:
+		if (!options.vulkan_semantics)
+			SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup.");
+		require_extension_internal("GL_KHR_shader_subgroup_ballot");
+		return "gl_SubgroupLeMask";
+
+	case BuiltInSubgroupLtMask:
+		if (!options.vulkan_semantics)
+			SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup.");
+		require_extension_internal("GL_KHR_shader_subgroup_ballot");
+		return "gl_SubgroupLtMask";
+
 	default:
 		return join("gl_BuiltIn_", convert_to_string(builtin));
 	}
@@ -4499,9 +5130,9 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
 		type = &get<SPIRType>(type->parent_type);
 	}
 
-	bool access_chain_is_arrayed = false;
+	bool access_chain_is_arrayed = expr.find_first_of('[') != string::npos;
 	bool row_major_matrix_needs_conversion = is_non_native_row_major_matrix(base);
-	bool is_packed = false;
+	bool is_packed = has_decoration(base, DecorationCPacked);
 	bool pending_array_enclose = false;
 	bool dimension_flatten = false;
 
@@ -4660,23 +5291,22 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
 		// Vector -> Scalar
 		else if (type->vecsize > 1)
 		{
-			if (is_packed)
-			{
-				expr = unpack_expression_type(expr, *type);
-				is_packed = false;
-			}
-
-			if (index_is_literal)
+			if (index_is_literal && !is_packed)
 			{
 				expr += ".";
 				expr += index_to_swizzle(index);
 			}
-			else if (ids[index].get_type() == TypeConstant)
+			else if (ids[index].get_type() == TypeConstant && !is_packed)
 			{
 				auto &c = get<SPIRConstant>(index);
 				expr += ".";
 				expr += index_to_swizzle(c.scalar());
 			}
+			else if (index_is_literal)
+			{
+				// For packed vectors, we can only access them as an array, not by swizzle.
+				expr += join("[", index, "]");
+			}
 			else
 			{
 				expr += "[";
@@ -4684,6 +5314,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
 				expr += "]";
 			}
 
+			is_packed = false;
 			type_id = type->parent_type;
 			type = &get<SPIRType>(type_id);
 		}
@@ -4824,7 +5455,7 @@ std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uin
 		uint32_t matrix_stride = 0;
 		if (member_type.columns > 1)
 		{
-			need_transpose = (combined_decoration_for_member(target_type, i) & (1ull << DecorationRowMajor)) != 0;
+			need_transpose = combined_decoration_for_member(target_type, i).get(DecorationRowMajor);
 			matrix_stride = type_struct_member_matrix_stride(target_type, i);
 		}
 
@@ -5023,7 +5654,7 @@ std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(con
 			{
 				matrix_stride = type_struct_member_matrix_stride(struct_type, index);
 				row_major_matrix_needs_conversion =
-				    (combined_decoration_for_member(struct_type, index) & (1ull << DecorationRowMajor)) != 0;
+				    combined_decoration_for_member(struct_type, index).get(DecorationRowMajor);
 			}
 			else
 				row_major_matrix_needs_conversion = false;
@@ -5391,13 +6022,18 @@ bool CompilerGLSL::skip_argument(uint32_t id) const
 	return false;
 }
 
-bool CompilerGLSL::optimize_read_modify_write(const string &lhs, const string &rhs)
+bool CompilerGLSL::optimize_read_modify_write(const SPIRType &type, const string &lhs, const string &rhs)
 {
 	// Do this with strings because we have a very clear pattern we can check for and it avoids
 	// adding lots of special cases to the code emission.
 	if (rhs.size() < lhs.size() + 3)
 		return false;
 
+	// Do not optimize matrices. They are a bit awkward to reason about in general
+	// (in which order does operation happen?), and it does not work on MSL anyways.
+	if (type.vecsize > 1 && type.columns > 1)
+		return false;
+
 	auto index = rhs.find(lhs);
 	if (index != 0)
 		return false;
@@ -5422,7 +6058,16 @@ bool CompilerGLSL::optimize_read_modify_write(const string &lhs, const string &r
 	return true;
 }
 
-void CompilerGLSL::emit_block_instructions(const SPIRBlock &block)
+void CompilerGLSL::register_control_dependent_expression(uint32_t expr)
+{
+	if (forwarded_temporaries.find(expr) == end(forwarded_temporaries))
+		return;
+
+	assert(current_emitting_block);
+	current_emitting_block->invalidate_expressions.push_back(expr);
+}
+
+void CompilerGLSL::emit_block_instructions(SPIRBlock &block)
 {
 	current_emitting_block = &block;
 	for (auto &op : block.ops)
@@ -5507,7 +6152,9 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		bool need_transpose, result_is_packed;
 		auto e = access_chain(ops[2], &ops[3], length - 3, get<SPIRType>(ops[0]), &need_transpose, &result_is_packed);
 		auto &expr = set<SPIRExpression>(ops[1], move(e), ops[0], should_forward(ops[2]));
-		expr.loaded_from = ops[2];
+
+		auto *backing_variable = maybe_get_backing_variable(ops[2]);
+		expr.loaded_from = backing_variable ? backing_variable->self : ops[2];
 		expr.need_transpose = need_transpose;
 
 		// Mark the result as being packed. Some platforms handled packed vectors differently than non-packed.
@@ -5544,7 +6191,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 				// While this is purely cosmetic, this is important for legacy ESSL where loop
 				// variable increments must be in either i++ or i += const-expr.
 				// Without this, we end up with i = i + 1, which is correct GLSL, but not correct GLES 2.0.
-				if (!optimize_read_modify_write(lhs, rhs))
+				if (!optimize_read_modify_write(expression_type(ops[1]), lhs, rhs))
 					statement(lhs, " = ", rhs, ";");
 				register_write(ops[0]);
 			}
@@ -5622,15 +6269,6 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		{
 			uint32_t image_id = combined.global_image ? combined.image_id : arg[combined.image_id];
 			uint32_t sampler_id = combined.global_sampler ? combined.sampler_id : arg[combined.sampler_id];
-
-			auto *image = maybe_get_backing_variable(image_id);
-			if (image)
-				image_id = image->self;
-
-			auto *samp = maybe_get_backing_variable(sampler_id);
-			if (samp)
-				sampler_id = samp->self;
-
 			arglist.push_back(to_combined_image_sampler(image_id, sampler_id));
 		}
 
@@ -5683,7 +6321,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 	{
 		uint32_t result_type = ops[0];
 		uint32_t id = ops[1];
-		const auto *elems = &ops[2];
+		const auto *const elems = &ops[2];
 		length -= 2;
 
 		bool forward = true;
@@ -5691,34 +6329,25 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 			forward = forward && should_forward(elems[i]);
 
 		auto &out_type = get<SPIRType>(result_type);
-
-		if (!length)
-		{
-			if (out_type.basetype == SPIRType::Struct)
-			{
-				// It is technically allowed to make a blank struct,
-				// but we cannot make a meaningful expression out of it in high level languages,
-				// so make it a blank expression.
-				emit_op(result_type, id, "", forward);
-				break;
-			}
-			else
-				SPIRV_CROSS_THROW("Invalid input to OpCompositeConstruct.");
-		}
-
-		auto &in_type = expression_type(elems[0]);
+		auto *in_type = length > 0 ? &expression_type(elems[0]) : nullptr;
 
 		// Only splat if we have vector constructors.
 		// Arrays and structs must be initialized properly in full.
 		bool composite = !out_type.array.empty() || out_type.basetype == SPIRType::Struct;
-		bool splat = in_type.vecsize == 1 && in_type.columns == 1 && !composite && backend.use_constructor_splatting;
-		bool swizzle_splat = in_type.vecsize == 1 && in_type.columns == 1 && backend.can_swizzle_scalar;
 
-		if (ids[elems[0]].get_type() == TypeConstant &&
-		    (in_type.basetype != SPIRType::Float && in_type.basetype != SPIRType::Double))
+		bool splat = false;
+		bool swizzle_splat = false;
+
+		if (in_type)
 		{
-			// Cannot swizzle literal integers as a special case.
-			swizzle_splat = false;
+			splat = in_type->vecsize == 1 && in_type->columns == 1 && !composite && backend.use_constructor_splatting;
+			swizzle_splat = in_type->vecsize == 1 && in_type->columns == 1 && backend.can_swizzle_scalar;
+
+			if (ids[elems[0]].get_type() == TypeConstant && !type_is_floating_point(*in_type))
+			{
+				// Cannot swizzle literal integers as a special case.
+				swizzle_splat = false;
+			}
 		}
 
 		if (splat || swizzle_splat)
@@ -5738,16 +6367,20 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 			forward = false;
 		if (!out_type.array.empty() && !backend.can_declare_arrays_inline)
 			forward = false;
+		if (type_is_empty(out_type) && !backend.supports_empty_struct)
+			forward = false;
 
 		string constructor_op;
 		if (backend.use_initializer_list && composite)
 		{
 			// Only use this path if we are building composites.
 			// This path cannot be used for arithmetic.
-			if (backend.use_typed_initializer_list)
+			if (backend.use_typed_initializer_list && out_type.basetype == SPIRType::Struct)
 				constructor_op += type_to_glsl_constructor(get<SPIRType>(result_type));
 			constructor_op += "{ ";
-			if (splat)
+			if (type_is_empty(out_type) && !backend.supports_empty_struct)
+				constructor_op += "0";
+			else if (splat)
 				constructor_op += to_expression(elems[0]);
 			else
 				constructor_op += build_composite_combiner(result_type, elems, length);
@@ -5760,7 +6393,9 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		else
 		{
 			constructor_op = type_to_glsl_constructor(get<SPIRType>(result_type)) + "(";
-			if (splat)
+			if (type_is_empty(out_type) && !backend.supports_empty_struct)
+				constructor_op += "0";
+			else if (splat)
 				constructor_op += to_expression(elems[0]);
 			else
 				constructor_op += build_composite_combiner(result_type, elems, length);
@@ -5768,6 +6403,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		}
 
 		emit_op(result_type, id, constructor_op, forward);
+		for (uint32_t i = 0; i < length; i++)
+			inherit_expression_dependencies(id, elems[i]);
 		break;
 	}
 
@@ -5796,6 +6433,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 
 		auto expr = access_chain_internal(ops[2], &ops[3], 1, false);
 		emit_op(result_type, id, expr, should_forward(ops[2]));
+		inherit_expression_dependencies(id, ops[2]);
+		inherit_expression_dependencies(id, ops[3]);
 		break;
 	}
 
@@ -5815,6 +6454,10 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		if (composite_type.basetype == SPIRType::Struct || !composite_type.array.empty())
 			allow_base_expression = false;
 
+		// Packed expressions cannot be split up.
+		if (has_decoration(ops[2], DecorationCPacked))
+			allow_base_expression = false;
+
 		// Only apply this optimization if result is scalar.
 		if (allow_base_expression && should_forward(ops[2]) && type.vecsize == 1 && type.columns == 1 && length == 1)
 		{
@@ -5832,12 +6475,14 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 			// from expression causing it to be forced to an actual temporary in GLSL.
 			auto expr = access_chain_internal(ops[2], &ops[3], length, true, true);
 			auto &e = emit_op(result_type, id, expr, true, !expression_is_forwarded(ops[2]));
+			inherit_expression_dependencies(id, ops[2]);
 			e.base_expression = ops[2];
 		}
 		else
 		{
 			auto expr = access_chain_internal(ops[2], &ops[3], length, true);
 			emit_op(result_type, id, expr, should_forward(ops[2]), !expression_is_forwarded(ops[2]));
+			inherit_expression_dependencies(id, ops[2]);
 		}
 		break;
 	}
@@ -5889,6 +6534,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 			// For pointer types, we copy the pointer itself.
 			statement(declare_temporary(result_type, id), to_expression(rhs), ";");
 			set<SPIRExpression>(id, to_name(id), result_type, true);
+			inherit_expression_dependencies(id, rhs);
 		}
 		else
 		{
@@ -5921,6 +6567,10 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 			if (elems[i] >= type0.vecsize)
 				shuffle = true;
 
+		// Cannot use swizzles with packed expressions, force shuffle path.
+		if (!shuffle && has_decoration(vec0, DecorationCPacked))
+			shuffle = true;
+
 		string expr;
 		bool should_fwd, trivial_forward;
 
@@ -5934,9 +6584,9 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 			for (uint32_t i = 0; i < length; i++)
 			{
 				if (elems[i] >= type0.vecsize)
-					args.push_back(join(to_enclosed_expression(vec1), ".", index_to_swizzle(elems[i] - type0.vecsize)));
+					args.push_back(to_extract_component_expression(vec1, elems[i] - type0.vecsize));
 				else
-					args.push_back(join(to_enclosed_expression(vec0), ".", index_to_swizzle(elems[i])));
+					args.push_back(to_extract_component_expression(vec0, elems[i]));
 			}
 			expr += join(type_to_glsl_constructor(get<SPIRType>(result_type)), "(", merge(args), ")");
 		}
@@ -5947,10 +6597,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 
 			// We only source from first vector, so can use swizzle.
 			// If the vector is packed, unpack it before applying a swizzle (needed for MSL)
-			expr += to_enclosed_expression(vec0);
-			if (has_decoration(vec0, DecorationCPacked))
-				expr = unpack_expression_type(expr, expression_type(vec0));
-
+			expr += to_enclosed_unpacked_expression(vec0);
 			expr += ".";
 			for (uint32_t i = 0; i < length; i++)
 				expr += index_to_swizzle(elems[i]);
@@ -5963,6 +6610,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		// We inherit the forwardedness from our arguments to avoid flushing out to temporaries when it's not really needed.
 
 		emit_op(result_type, id, expr, should_fwd, trivial_forward);
+		inherit_expression_dependencies(id, vec0);
+		inherit_expression_dependencies(id, vec1);
 		break;
 	}
 
@@ -6045,6 +6694,24 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		UFOP(transpose);
 		break;
 
+	case OpSRem:
+	{
+		uint32_t result_type = ops[0];
+		uint32_t result_id = ops[1];
+		uint32_t op0 = ops[2];
+		uint32_t op1 = ops[3];
+
+		// Needs special handling.
+		bool forward = should_forward(op0) && should_forward(op1);
+		auto expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "(",
+		                 to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")");
+
+		emit_op(result_type, result_id, expr, forward);
+		inherit_expression_dependencies(result_id, op0);
+		inherit_expression_dependencies(result_id, op1);
+		break;
+	}
+
 	case OpSDiv:
 		BOP_CAST(/, SPIRType::Int);
 		break;
@@ -6364,6 +7031,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		}
 
 		emit_op(result_type, id, op, should_forward(arg));
+		inherit_expression_dependencies(id, arg);
 		break;
 	}
 
@@ -6371,13 +7039,15 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 	case OpDPdx:
 		UFOP(dFdx);
 		if (is_legacy_es())
-			require_extension("GL_OES_standard_derivatives");
+			require_extension_internal("GL_OES_standard_derivatives");
+		register_control_dependent_expression(ops[1]);
 		break;
 
 	case OpDPdy:
 		UFOP(dFdy);
 		if (is_legacy_es())
-			require_extension("GL_OES_standard_derivatives");
+			require_extension_internal("GL_OES_standard_derivatives");
+		register_control_dependent_expression(ops[1]);
 		break;
 
 	case OpDPdxFine:
@@ -6387,7 +7057,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 			SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
 		}
 		if (options.version < 450)
-			require_extension("GL_ARB_derivative_control");
+			require_extension_internal("GL_ARB_derivative_control");
+		register_control_dependent_expression(ops[1]);
 		break;
 
 	case OpDPdyFine:
@@ -6397,7 +7068,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 			SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
 		}
 		if (options.version < 450)
-			require_extension("GL_ARB_derivative_control");
+			require_extension_internal("GL_ARB_derivative_control");
+		register_control_dependent_expression(ops[1]);
 		break;
 
 	case OpDPdxCoarse:
@@ -6407,7 +7079,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		}
 		UFOP(dFdxCoarse);
 		if (options.version < 450)
-			require_extension("GL_ARB_derivative_control");
+			require_extension_internal("GL_ARB_derivative_control");
+		register_control_dependent_expression(ops[1]);
 		break;
 
 	case OpDPdyCoarse:
@@ -6417,13 +7090,37 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 			SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
 		}
 		if (options.version < 450)
-			require_extension("GL_ARB_derivative_control");
+			require_extension_internal("GL_ARB_derivative_control");
+		register_control_dependent_expression(ops[1]);
 		break;
 
 	case OpFwidth:
 		UFOP(fwidth);
 		if (is_legacy_es())
-			require_extension("GL_OES_standard_derivatives");
+			require_extension_internal("GL_OES_standard_derivatives");
+		register_control_dependent_expression(ops[1]);
+		break;
+
+	case OpFwidthCoarse:
+		UFOP(fwidthCoarse);
+		if (options.es)
+		{
+			SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
+		}
+		if (options.version < 450)
+			require_extension_internal("GL_ARB_derivative_control");
+		register_control_dependent_expression(ops[1]);
+		break;
+
+	case OpFwidthFine:
+		UFOP(fwidthFine);
+		if (options.es)
+		{
+			SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
+		}
+		if (options.version < 450)
+			require_extension_internal("GL_ARB_derivative_control");
+		register_control_dependent_expression(ops[1]);
 		break;
 
 	// Bitfield
@@ -6613,7 +7310,9 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 	{
 		uint32_t result_type = ops[0];
 		uint32_t id = ops[1];
-		auto &e = emit_op(result_type, id, to_expression(ops[2]), true);
+
+		// Suppress usage tracking.
+		auto &e = emit_op(result_type, id, to_expression(ops[2]), true, true);
 
 		// When using the image, we need to know which variable it is actually loaded from.
 		auto *var = maybe_get_backing_variable(ops[2]);
@@ -6625,7 +7324,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 	{
 		if (!options.es && options.version < 400)
 		{
-			require_extension("GL_ARB_texture_query_lod");
+			require_extension_internal("GL_ARB_texture_query_lod");
 			// For some reason, the ARB spec is all-caps.
 			BFOP(textureQueryLOD);
 		}
@@ -6633,6 +7332,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 			SPIRV_CROSS_THROW("textureQueryLod not supported in ES profile.");
 		else
 			BFOP(textureQueryLod);
+		register_control_dependent_expression(ops[1]);
 		break;
 	}
 
@@ -6642,11 +7342,11 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		uint32_t id = ops[1];
 
 		if (!options.es && options.version < 430)
-			require_extension("GL_ARB_texture_query_levels");
+			require_extension_internal("GL_ARB_texture_query_levels");
 		if (options.es)
 			SPIRV_CROSS_THROW("textureQueryLevels not supported in ES profile.");
 
-		auto expr = join("textureQueryLevels(", to_expression(ops[2]), ")");
+		auto expr = join("textureQueryLevels(", convert_separate_image_to_combined(ops[2]), ")");
 		auto &restype = get<SPIRType>(ops[0]);
 		expr = bitcast_expression(restype, SPIRType::Int, expr);
 		emit_op(result_type, id, expr, true);
@@ -6663,7 +7363,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		if (type.image.sampled == 2)
 			expr = join("imageSamples(", to_expression(ops[2]), ")");
 		else
-			expr = join("textureSamples(", to_expression(ops[2]), ")");
+			expr = join("textureSamples(", convert_separate_image_to_combined(ops[2]), ")");
 
 		auto &restype = get<SPIRType>(ops[0]);
 		expr = bitcast_expression(restype, SPIRType::Int, expr);
@@ -6684,7 +7384,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		uint32_t result_type = ops[0];
 		uint32_t id = ops[1];
 
-		auto expr = join("textureSize(", to_expression(ops[2]), ", ", bitcast_expression(SPIRType::Int, ops[3]), ")");
+		auto expr = join("textureSize(", convert_separate_image_to_combined(ops[2]), ", ",
+		                 bitcast_expression(SPIRType::Int, ops[3]), ")");
 		auto &restype = get<SPIRType>(ops[0]);
 		expr = bitcast_expression(restype, SPIRType::Int, expr);
 		emit_op(result_type, id, expr, true);
@@ -6701,9 +7402,9 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		if (var)
 		{
 			auto &flags = meta.at(var->self).decoration.decoration_flags;
-			if (flags & (1ull << DecorationNonReadable))
+			if (flags.get(DecorationNonReadable))
 			{
-				flags &= ~(1ull << DecorationNonReadable);
+				flags.clear(DecorationNonReadable);
 				force_recompile = true;
 			}
 		}
@@ -6821,6 +7522,10 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		}
 		else
 			emit_op(result_type, id, imgexpr, false);
+
+		inherit_expression_dependencies(id, ops[2]);
+		if (type.image.ms)
+			inherit_expression_dependencies(id, ops[5]);
 		break;
 	}
 
@@ -6845,9 +7550,9 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		if (var)
 		{
 			auto &flags = meta.at(var->self).decoration.decoration_flags;
-			if (flags & (1ull << DecorationNonWritable))
+			if (flags.get(DecorationNonWritable))
 			{
-				flags &= ~(1ull << DecorationNonWritable);
+				flags.clear(DecorationNonWritable);
 				force_recompile = true;
 			}
 		}
@@ -6898,7 +7603,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 			else
 			{
 				// This path is hit for samplerBuffers and multisampled images which do not have LOD.
-				expr = join("textureSize(", to_expression(ops[2]), ")");
+				expr = join("textureSize(", convert_separate_image_to_combined(ops[2]), ")");
 			}
 
 			auto &restype = get<SPIRType>(ops[0]);
@@ -6914,14 +7619,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 	case OpControlBarrier:
 	case OpMemoryBarrier:
 	{
-		if (get_entry_point().model == ExecutionModelTessellationControl)
-		{
-			// Control shaders only have barriers, and it implies memory barriers.
-			if (opcode == OpControlBarrier)
-				statement("barrier();");
-			break;
-		}
-
+		uint32_t execution_scope = 0;
 		uint32_t memory;
 		uint32_t semantics;
 
@@ -6932,10 +7630,26 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		}
 		else
 		{
+			execution_scope = get<SPIRConstant>(ops[0]).scalar();
 			memory = get<SPIRConstant>(ops[1]).scalar();
 			semantics = get<SPIRConstant>(ops[2]).scalar();
 		}
 
+		if (execution_scope == ScopeSubgroup || memory == ScopeSubgroup)
+		{
+			if (!options.vulkan_semantics)
+				SPIRV_CROSS_THROW("Can only use subgroup operations in Vulkan semantics.");
+			require_extension_internal("GL_KHR_shader_subgroup_basic");
+		}
+
+		if (execution_scope != ScopeSubgroup && get_entry_point().model == ExecutionModelTessellationControl)
+		{
+			// Control shaders only have barriers, and it implies memory barriers.
+			if (opcode == OpControlBarrier)
+				statement("barrier();");
+			break;
+		}
+
 		// We only care about these flags, acquire/release and friends are not relevant to GLSL.
 		semantics = mask_relevant_memory_semantics(semantics);
 
@@ -6979,7 +7693,11 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		// We are synchronizing some memory or syncing execution,
 		// so we cannot forward any loads beyond the memory barrier.
 		if (semantics || opcode == OpControlBarrier)
+		{
+			assert(current_emitting_block);
+			flush_control_dependent_expressions(current_emitting_block->self);
 			flush_all_active_variables();
+		}
 
 		if (memory == ScopeWorkgroup) // Only need to consider memory within a group
 		{
@@ -6988,6 +7706,33 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 			else if (semantics != 0)
 				statement("groupMemoryBarrier();");
 		}
+		else if (memory == ScopeSubgroup)
+		{
+			const uint32_t all_barriers =
+			    MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask;
+
+			if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask))
+			{
+				// These are not relevant for GLSL, but assume it means memoryBarrier().
+				// memoryBarrier() does everything, so no need to test anything else.
+				statement("subgroupMemoryBarrier();");
+			}
+			else if ((semantics & all_barriers) == all_barriers)
+			{
+				// Short-hand instead of emitting 3 barriers.
+				statement("subgroupMemoryBarrier();");
+			}
+			else
+			{
+				// Pick out individual barriers.
+				if (semantics & MemorySemanticsWorkgroupMemoryMask)
+					statement("subgroupMemoryBarrierShared();");
+				if (semantics & MemorySemanticsUniformMemoryMask)
+					statement("subgroupMemoryBarrierBuffer();");
+				if (semantics & MemorySemanticsImageMemoryMask)
+					statement("subgroupMemoryBarrierImage();");
+			}
+		}
 		else
 		{
 			const uint32_t all_barriers = MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask |
@@ -7019,7 +7764,12 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		}
 
 		if (opcode == OpControlBarrier)
-			statement("barrier();");
+		{
+			if (execution_scope == ScopeSubgroup)
+				statement("subgroupBarrier();");
+			else
+				statement("barrier();");
+		}
 		break;
 	}
 
@@ -7056,15 +7806,18 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		break;
 	}
 
+	// Legacy sub-group stuff ...
 	case OpSubgroupBallotKHR:
 	{
 		uint32_t result_type = ops[0];
 		uint32_t id = ops[1];
 		string expr;
-		expr = join("unpackUint2x32(ballotARB(" + to_expression(ops[2]) + "))");
-		emit_op(result_type, id, expr, true);
+		expr = join("uvec4(unpackUint2x32(ballotARB(" + to_expression(ops[2]) + ")), 0u, 0u)");
+		emit_op(result_type, id, expr, should_forward(ops[2]));
 
-		require_extension("GL_ARB_shader_ballot");
+		require_extension_internal("GL_ARB_shader_ballot");
+		inherit_expression_dependencies(id, ops[2]);
+		register_control_dependent_expression(ops[1]);
 		break;
 	}
 
@@ -7074,7 +7827,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		uint32_t id = ops[1];
 		emit_unary_func_op(result_type, id, ops[2], "readFirstInvocationARB");
 
-		require_extension("GL_ARB_shader_ballot");
+		require_extension_internal("GL_ARB_shader_ballot");
+		register_control_dependent_expression(ops[1]);
 		break;
 	}
 
@@ -7084,7 +7838,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		uint32_t id = ops[1];
 		emit_binary_func_op(result_type, id, ops[2], ops[3], "readInvocationARB");
 
-		require_extension("GL_ARB_shader_ballot");
+		require_extension_internal("GL_ARB_shader_ballot");
+		register_control_dependent_expression(ops[1]);
 		break;
 	}
 
@@ -7094,7 +7849,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		uint32_t id = ops[1];
 		emit_unary_func_op(result_type, id, ops[2], "allInvocationsARB");
 
-		require_extension("GL_ARB_shader_group_vote");
+		require_extension_internal("GL_ARB_shader_group_vote");
+		register_control_dependent_expression(ops[1]);
 		break;
 	}
 
@@ -7104,7 +7860,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		uint32_t id = ops[1];
 		emit_unary_func_op(result_type, id, ops[2], "anyInvocationARB");
 
-		require_extension("GL_ARB_shader_group_vote");
+		require_extension_internal("GL_ARB_shader_group_vote");
+		register_control_dependent_expression(ops[1]);
 		break;
 	}
 
@@ -7114,7 +7871,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		uint32_t id = ops[1];
 		emit_unary_func_op(result_type, id, ops[2], "allInvocationsEqualARB");
 
-		require_extension("GL_ARB_shader_group_vote");
+		require_extension_internal("GL_ARB_shader_group_vote");
+		register_control_dependent_expression(ops[1]);
 		break;
 	}
 
@@ -7125,7 +7883,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		uint32_t id = ops[1];
 		emit_unary_func_op(result_type, id, ops[4], "addInvocationsNonUniformAMD");
 
-		require_extension("GL_AMD_shader_ballot");
+		require_extension_internal("GL_AMD_shader_ballot");
+		register_control_dependent_expression(ops[1]);
 		break;
 	}
 
@@ -7137,7 +7896,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		uint32_t id = ops[1];
 		emit_unary_func_op(result_type, id, ops[4], "minInvocationsNonUniformAMD");
 
-		require_extension("GL_AMD_shader_ballot");
+		require_extension_internal("GL_AMD_shader_ballot");
+		register_control_dependent_expression(ops[1]);
 		break;
 	}
 
@@ -7149,7 +7909,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		uint32_t id = ops[1];
 		emit_unary_func_op(result_type, id, ops[4], "maxInvocationsNonUniformAMD");
 
-		require_extension("GL_AMD_shader_ballot");
+		require_extension_internal("GL_AMD_shader_ballot");
+		register_control_dependent_expression(ops[1]);
 		break;
 	}
 
@@ -7168,7 +7929,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 			emit_binary_func_op(result_type, id, ops[2], ops[3], "fragmentMaskFetchAMD");
 		}
 
-		require_extension("GL_AMD_shader_fragment_mask");
+		require_extension_internal("GL_AMD_shader_fragment_mask");
 		break;
 	}
 
@@ -7187,10 +7948,45 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 			emit_trinary_func_op(result_type, id, ops[2], ops[3], ops[4], "fragmentFetchAMD");
 		}
 
-		require_extension("GL_AMD_shader_fragment_mask");
+		require_extension_internal("GL_AMD_shader_fragment_mask");
 		break;
 	}
 
+	// Vulkan 1.1 sub-group stuff ...
+	case OpGroupNonUniformElect:
+	case OpGroupNonUniformBroadcast:
+	case OpGroupNonUniformBroadcastFirst:
+	case OpGroupNonUniformBallot:
+	case OpGroupNonUniformInverseBallot:
+	case OpGroupNonUniformBallotBitExtract:
+	case OpGroupNonUniformBallotBitCount:
+	case OpGroupNonUniformBallotFindLSB:
+	case OpGroupNonUniformBallotFindMSB:
+	case OpGroupNonUniformShuffle:
+	case OpGroupNonUniformShuffleXor:
+	case OpGroupNonUniformShuffleUp:
+	case OpGroupNonUniformShuffleDown:
+	case OpGroupNonUniformAll:
+	case OpGroupNonUniformAny:
+	case OpGroupNonUniformAllEqual:
+	case OpGroupNonUniformFAdd:
+	case OpGroupNonUniformIAdd:
+	case OpGroupNonUniformFMul:
+	case OpGroupNonUniformIMul:
+	case OpGroupNonUniformFMin:
+	case OpGroupNonUniformFMax:
+	case OpGroupNonUniformSMin:
+	case OpGroupNonUniformSMax:
+	case OpGroupNonUniformUMin:
+	case OpGroupNonUniformUMax:
+	case OpGroupNonUniformBitwiseAnd:
+	case OpGroupNonUniformBitwiseOr:
+	case OpGroupNonUniformBitwiseXor:
+	case OpGroupNonUniformQuadSwap:
+	case OpGroupNonUniformQuadBroadcast:
+		emit_subgroup_op(instruction);
+		break;
+
 	default:
 		statement("// unimplemented op ", instruction.op);
 		break;
@@ -7261,7 +8057,7 @@ bool CompilerGLSL::is_non_native_row_major_matrix(uint32_t id)
 		return false;
 
 	// Non-matrix or column-major matrix types do not need to be converted.
-	if (!(meta[id].decoration.decoration_flags & (1ull << DecorationRowMajor)))
+	if (!meta[id].decoration.decoration_flags.get(DecorationRowMajor))
 		return false;
 
 	// Only square row-major matrices can be converted at this time.
@@ -7282,7 +8078,7 @@ bool CompilerGLSL::member_is_non_native_row_major_matrix(const SPIRType &type, u
 		return false;
 
 	// Non-matrix or column-major matrix types do not need to be converted.
-	if (!(combined_decoration_for_member(type, index) & (1ull << DecorationRowMajor)))
+	if (!combined_decoration_for_member(type, index).get(DecorationRowMajor))
 		return false;
 
 	// Only square row-major matrices can be converted at this time.
@@ -7326,14 +8122,15 @@ void CompilerGLSL::emit_struct_member(const SPIRType &type, uint32_t member_type
 {
 	auto &membertype = get<SPIRType>(member_type_id);
 
-	uint64_t memberflags = 0;
+	Bitset memberflags;
 	auto &memb = meta[type.self].members;
 	if (index < memb.size())
 		memberflags = memb[index].decoration_flags;
 
 	string qualifiers;
-	bool is_block = (meta[type.self].decoration.decoration_flags &
-	                 ((1ull << DecorationBlock) | (1ull << DecorationBufferBlock))) != 0;
+	bool is_block = meta[type.self].decoration.decoration_flags.get(DecorationBlock) ||
+	                meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
+
 	if (is_block)
 		qualifiers = to_interpolation_qualifiers(memberflags);
 
@@ -7342,7 +8139,7 @@ void CompilerGLSL::emit_struct_member(const SPIRType &type, uint32_t member_type
 	          variable_decl(membertype, to_member_name(type, index)), ";");
 }
 
-const char *CompilerGLSL::flags_to_precision_qualifiers_glsl(const SPIRType &type, uint64_t flags)
+const char *CompilerGLSL::flags_to_precision_qualifiers_glsl(const SPIRType &type, const Bitset &flags)
 {
 	// Structs do not have precision qualifiers, neither do doubles (desktop only anyways, so no mediump/highp).
 	if (type.basetype != SPIRType::Float && type.basetype != SPIRType::Int && type.basetype != SPIRType::UInt &&
@@ -7354,7 +8151,7 @@ const char *CompilerGLSL::flags_to_precision_qualifiers_glsl(const SPIRType &typ
 	{
 		auto &execution = get_entry_point();
 
-		if (flags & (1ull << DecorationRelaxedPrecision))
+		if (flags.get(DecorationRelaxedPrecision))
 		{
 			bool implied_fmediump = type.basetype == SPIRType::Float &&
 			                        options.fragment.default_float_precision == Options::Mediump &&
@@ -7385,12 +8182,8 @@ const char *CompilerGLSL::flags_to_precision_qualifiers_glsl(const SPIRType &typ
 	{
 		// Vulkan GLSL supports precision qualifiers, even in desktop profiles, which is convenient.
 		// The default is highp however, so only emit mediump in the rare case that a shader has these.
-		if (flags & (1ull << DecorationRelaxedPrecision))
-		{
-			bool can_use_mediump =
-			    type.basetype == SPIRType::Float || type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt;
-			return can_use_mediump ? "mediump " : "";
-		}
+		if (flags.get(DecorationRelaxedPrecision))
+			return "mediump ";
 		else
 			return "";
 	}
@@ -7420,13 +8213,13 @@ string CompilerGLSL::to_qualifiers_glsl(uint32_t id)
 	auto &type = expression_type(id);
 	if (type.image.dim != DimSubpassData && type.image.sampled == 2)
 	{
-		if (flags & (1ull << DecorationCoherent))
+		if (flags.get(DecorationCoherent))
 			res += "coherent ";
-		if (flags & (1ull << DecorationRestrict))
+		if (flags.get(DecorationRestrict))
 			res += "restrict ";
-		if (flags & (1ull << DecorationNonWritable))
+		if (flags.get(DecorationNonWritable))
 			res += "readonly ";
-		if (flags & (1ull << DecorationNonReadable))
+		if (flags.get(DecorationNonReadable))
 			res += "writeonly ";
 	}
 
@@ -7477,7 +8270,7 @@ string CompilerGLSL::variable_decl(const SPIRVariable &variable)
 const char *CompilerGLSL::to_pls_qualifiers_glsl(const SPIRVariable &variable)
 {
 	auto flags = meta[variable.self].decoration.decoration_flags;
-	if (flags & (1ull << DecorationRelaxedPrecision))
+	if (flags.get(DecorationRelaxedPrecision))
 		return "mediump ";
 	else
 		return "highp ";
@@ -7564,7 +8357,7 @@ string CompilerGLSL::type_to_array_glsl(const SPIRType &type)
 		if (type.array.size() > 1)
 		{
 			if (!options.es && options.version < 430)
-				require_extension("GL_ARB_arrays_of_arrays");
+				require_extension_internal("GL_ARB_arrays_of_arrays");
 			else if (options.es && options.version < 310)
 				SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310. "
 				                  "Try using --flatten-multidimensional-arrays or set "
@@ -7632,9 +8425,9 @@ string CompilerGLSL::image_type_glsl(const SPIRType &type, uint32_t /* id */)
 
 	case DimBuffer:
 		if (options.es && options.version < 320)
-			require_extension("GL_OES_texture_buffer");
+			require_extension_internal("GL_OES_texture_buffer");
 		else if (!options.es && options.version < 300)
-			require_extension("GL_EXT_texture_buffer_object");
+			require_extension_internal("GL_EXT_texture_buffer_object");
 		res += "Buffer";
 		break;
 
@@ -7650,7 +8443,7 @@ string CompilerGLSL::image_type_glsl(const SPIRType &type, uint32_t /* id */)
 	if (type.image.arrayed)
 	{
 		if (is_legacy_desktop())
-			require_extension("GL_EXT_texture_array");
+			require_extension_internal("GL_EXT_texture_array");
 		res += "Array";
 	}
 
@@ -7668,7 +8461,7 @@ string CompilerGLSL::type_to_glsl_constructor(const SPIRType &type)
 		if (options.flatten_multidimensional_arrays)
 			SPIRV_CROSS_THROW("Cannot flatten constructors of multidimensional array constructors, e.g. float[][]().");
 		else if (!options.es && options.version < 430)
-			require_extension("GL_ARB_arrays_of_arrays");
+			require_extension_internal("GL_ARB_arrays_of_arrays");
 		else if (options.es && options.version < 310)
 			SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310.");
 	}
@@ -7711,6 +8504,9 @@ string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id)
 		break;
 	}
 
+	if (type.basetype == SPIRType::UInt && is_legacy())
+		SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy targets.");
+
 	if (type.vecsize == 1 && type.columns == 1) // Scalar builtin
 	{
 		switch (type.basetype)
@@ -7723,6 +8519,8 @@ string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id)
 			return backend.basic_uint_type;
 		case SPIRType::AtomicCounter:
 			return "atomic_uint";
+		case SPIRType::Half:
+			return "float16_t";
 		case SPIRType::Float:
 			return "float";
 		case SPIRType::Double:
@@ -7745,6 +8543,8 @@ string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id)
 			return join("ivec", type.vecsize);
 		case SPIRType::UInt:
 			return join("uvec", type.vecsize);
+		case SPIRType::Half:
+			return join("f16vec", type.vecsize);
 		case SPIRType::Float:
 			return join("vec", type.vecsize);
 		case SPIRType::Double:
@@ -7767,6 +8567,8 @@ string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id)
 			return join("imat", type.vecsize);
 		case SPIRType::UInt:
 			return join("umat", type.vecsize);
+		case SPIRType::Half:
+			return join("f16mat", type.vecsize);
 		case SPIRType::Float:
 			return join("mat", type.vecsize);
 		case SPIRType::Double:
@@ -7786,6 +8588,8 @@ string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id)
 			return join("imat", type.columns, "x", type.vecsize);
 		case SPIRType::UInt:
 			return join("umat", type.columns, "x", type.vecsize);
+		case SPIRType::Half:
+			return join("f16mat", type.columns, "x", type.vecsize);
 		case SPIRType::Float:
 			return join("mat", type.columns, "x", type.vecsize);
 		case SPIRType::Double:
@@ -7809,6 +8613,9 @@ void CompilerGLSL::add_variable(unordered_set<string> &variables, string &name)
 		return;
 	}
 
+	// Avoid double underscores.
+	name = sanitize_underscores(name);
+
 	update_name_cache(variables, name);
 }
 
@@ -7839,7 +8646,13 @@ bool CompilerGLSL::has_extension(const std::string &ext) const
 	return itr != end(forced_extensions);
 }
 
-void CompilerGLSL::require_extension(const string &ext)
+void CompilerGLSL::require_extension(const std::string &ext)
+{
+	if (!has_extension(ext))
+		forced_extensions.push_back(ext);
+}
+
+void CompilerGLSL::require_extension_internal(const string &ext)
 {
 	if (backend.supports_extensions && !has_extension(ext))
 	{
@@ -7859,7 +8672,7 @@ void CompilerGLSL::flatten_buffer_block(uint32_t id)
 		SPIRV_CROSS_THROW(name + " is an array of UBOs.");
 	if (type.basetype != SPIRType::Struct)
 		SPIRV_CROSS_THROW(name + " is not a struct.");
-	if ((flags & (1ull << DecorationBlock)) == 0)
+	if (!flags.get(DecorationBlock))
 		SPIRV_CROSS_THROW(name + " is not a block.");
 	if (type.member_types.empty())
 		SPIRV_CROSS_THROW(name + " is an empty struct.");
@@ -7873,16 +8686,16 @@ bool CompilerGLSL::check_atomic_image(uint32_t id)
 	if (type.storage == StorageClassImage)
 	{
 		if (options.es && options.version < 320)
-			require_extension("GL_OES_shader_image_atomic");
+			require_extension_internal("GL_OES_shader_image_atomic");
 
 		auto *var = maybe_get_backing_variable(id);
 		if (var)
 		{
 			auto &flags = meta.at(var->self).decoration.decoration_flags;
-			if (flags & ((1ull << DecorationNonWritable) | (1ull << DecorationNonReadable)))
+			if (flags.get(DecorationNonWritable) || flags.get(DecorationNonReadable))
 			{
-				flags &= ~(1ull << DecorationNonWritable);
-				flags &= ~(1ull << DecorationNonReadable);
+				flags.clear(DecorationNonWritable);
+				flags.clear(DecorationNonReadable);
 				force_recompile = true;
 			}
 		}
@@ -7907,6 +8720,20 @@ void CompilerGLSL::add_function_overload(const SPIRFunction &func)
 			type_id = type->parent_type;
 			type = &get<SPIRType>(type_id);
 		}
+
+		if (!combined_image_samplers.empty())
+		{
+			// If we have combined image samplers, we cannot really trust the image and sampler arguments
+			// we pass down to callees, because they may be shuffled around.
+			// Ignore these arguments, to make sure that functions need to differ in some other way
+			// to be considered different overloads.
+			if (type->basetype == SPIRType::SampledImage ||
+			    (type->basetype == SPIRType::Image && type->image.sampled == 1) || type->basetype == SPIRType::Sampler)
+			{
+				continue;
+			}
+		}
+
 		hasher.u32(type_id);
 	}
 	uint64_t types_hash = hasher.get();
@@ -7937,7 +8764,7 @@ void CompilerGLSL::add_function_overload(const SPIRFunction &func)
 	}
 }
 
-void CompilerGLSL::emit_function_prototype(SPIRFunction &func, uint64_t return_flags)
+void CompilerGLSL::emit_function_prototype(SPIRFunction &func, const Bitset &return_flags)
 {
 	if (func.self != entry_point)
 		add_function_overload(func);
@@ -8005,7 +8832,7 @@ void CompilerGLSL::emit_function_prototype(SPIRFunction &func, uint64_t return_f
 	statement(decl);
 }
 
-void CompilerGLSL::emit_function(SPIRFunction &func, uint64_t return_flags)
+void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags)
 {
 	// Avoid potential cycles.
 	if (func.active)
@@ -8033,6 +8860,9 @@ void CompilerGLSL::emit_function(SPIRFunction &func, uint64_t return_flags)
 	emit_function_prototype(func, return_flags);
 	begin_scope();
 
+	if (func.self == entry_point)
+		emit_entry_point_declarations();
+
 	current_function = &func;
 	auto &entry_block = get<SPIRBlock>(func.entry_block);
 
@@ -8048,12 +8878,12 @@ void CompilerGLSL::emit_function(SPIRFunction &func, uint64_t return_flags)
 			if (b.loop_variables.size() < 2)
 				continue;
 
-			uint64_t flags = get_decoration_mask(b.loop_variables.front());
+			auto &flags = get_decoration_bitset(b.loop_variables.front());
 			uint32_t type = get<SPIRVariable>(b.loop_variables.front()).basetype;
 			bool invalid_initializers = false;
 			for (auto loop_variable : b.loop_variables)
 			{
-				if (flags != get_decoration_mask(loop_variable) ||
+				if (flags != get_decoration_bitset(loop_variable) ||
 				    type != get<SPIRVariable>(b.loop_variables.front()).basetype)
 				{
 					invalid_initializers = true;
@@ -8074,7 +8904,30 @@ void CompilerGLSL::emit_function(SPIRFunction &func, uint64_t return_flags)
 	for (auto &v : func.local_variables)
 	{
 		auto &var = get<SPIRVariable>(v);
-		if (expression_is_lvalue(v))
+		if (var.storage == StorageClassWorkgroup)
+		{
+			// Special variable type which cannot have initializer,
+			// need to be declared as standalone variables.
+			// Comes from MSL which can push global variables as local variables in main function.
+			add_local_variable_name(var.self);
+			statement(variable_decl(var), ";");
+			var.deferred_declaration = false;
+		}
+		else if (var.storage == StorageClassPrivate)
+		{
+			// These variables will not have had their CFG usage analyzed, so move it to the entry block.
+			// Comes from MSL which can push global variables as local variables in main function.
+			// We could just declare them right now, but we would miss out on an important initialization case which is
+			// LUT declaration in MSL.
+			// If we don't declare the variable when it is assigned we're forced to go through a helper function
+			// which copies elements one by one.
+			add_local_variable_name(var.self);
+			auto &dominated = entry_block.dominated_variables;
+			if (find(begin(dominated), end(dominated), var.self) == end(dominated))
+				entry_block.dominated_variables.push_back(var.self);
+			var.deferred_declaration = true;
+		}
+		else if (expression_is_lvalue(v))
 		{
 			add_local_variable_name(var.self);
 
@@ -8085,10 +8938,7 @@ void CompilerGLSL::emit_function(SPIRFunction &func, uint64_t return_flags)
 				// Don't declare variable until first use to declutter the GLSL output quite a lot.
 				// If we don't touch the variable before first branch,
 				// declare it then since we need variable declaration to be in top scope.
-				// Never declare empty structs. They have no meaningful representation.
-				auto &type = get<SPIRType>(var.basetype);
-				bool empty_struct = type.basetype == SPIRType::Struct && type.member_types.empty();
-				var.deferred_declaration = !empty_struct;
+				var.deferred_declaration = true;
 			}
 		}
 		else
@@ -8108,6 +8958,9 @@ void CompilerGLSL::emit_function(SPIRFunction &func, uint64_t return_flags)
 			var.deferred_declaration = false;
 	}
 
+	for (auto &line : current_function->fixup_statements_in)
+		statement(line);
+
 	entry_block.loop_dominator = SPIRBlock::NoDominator;
 	emit_block_chain(entry_block);
 
@@ -8162,16 +9015,78 @@ void CompilerGLSL::flush_phi(uint32_t from, uint32_t to)
 				// use this to emit ESSL 1.0 compliant increments/decrements.
 				auto lhs = to_expression(phi.function_variable);
 				auto rhs = to_expression(phi.local_variable);
-				if (!optimize_read_modify_write(lhs, rhs))
+				if (!optimize_read_modify_write(get<SPIRType>(var.basetype), lhs, rhs))
 					statement(lhs, " = ", rhs, ";");
 			}
+
+			register_write(phi.function_variable);
 		}
 	}
 }
 
+void CompilerGLSL::branch_to_continue(uint32_t from, uint32_t to)
+{
+	auto &to_block = get<SPIRBlock>(to);
+	if (from == to)
+		return;
+
+	assert(is_continue(to));
+	if (to_block.complex_continue)
+	{
+		// Just emit the whole block chain as is.
+		auto usage_counts = expression_usage_counts;
+		auto invalid = invalid_expressions;
+
+		emit_block_chain(to_block);
+
+		// Expression usage counts and invalid expressions
+		// are moot after returning from the continue block.
+		// Since we emit the same block multiple times,
+		// we don't want to invalidate ourselves.
+		expression_usage_counts = usage_counts;
+		invalid_expressions = invalid;
+	}
+	else
+	{
+		auto &from_block = get<SPIRBlock>(from);
+		bool outside_control_flow = false;
+		uint32_t loop_dominator = 0;
+
+		// FIXME: Refactor this to not use the old loop_dominator tracking.
+		if (from_block.merge_block)
+		{
+			// If we are a loop header, we don't set the loop dominator,
+			// so just use "self" here.
+			loop_dominator = from;
+		}
+		else if (from_block.loop_dominator != SPIRBlock::NoDominator)
+		{
+			loop_dominator = from_block.loop_dominator;
+		}
+
+		if (loop_dominator != 0)
+		{
+			auto &dominator = get<SPIRBlock>(loop_dominator);
+
+			// For non-complex continue blocks, we implicitly branch to the continue block
+			// by having the continue block be part of the loop header in for (; ; continue-block).
+			outside_control_flow = block_is_outside_flow_control_from_block(dominator, from_block);
+		}
+
+		// Some simplification for for-loops. We always end up with a useless continue;
+		// statement since we branch to a loop block.
+		// Walk the CFG, if we uncoditionally execute the block calling continue assuming we're in the loop block,
+		// we can avoid writing out an explicit continue statement.
+		// Similar optimization to return statements if we know we're outside flow control.
+		if (!outside_control_flow)
+			statement("continue;");
+	}
+}
+
 void CompilerGLSL::branch(uint32_t from, uint32_t to)
 {
 	flush_phi(from, to);
+	flush_control_dependent_expressions(from);
 	flush_all_active_variables();
 
 	// This is only a continue if we branch to our loop dominator.
@@ -8182,64 +9097,22 @@ void CompilerGLSL::branch(uint32_t from, uint32_t to)
 		// and end the chain here.
 		statement("continue;");
 	}
-	else if (is_continue(to))
-	{
-		auto &to_block = get<SPIRBlock>(to);
-		if (to_block.complex_continue)
-		{
-			// Just emit the whole block chain as is.
-			auto usage_counts = expression_usage_counts;
-			auto invalid = invalid_expressions;
-
-			emit_block_chain(to_block);
-
-			// Expression usage counts and invalid expressions
-			// are moot after returning from the continue block.
-			// Since we emit the same block multiple times,
-			// we don't want to invalidate ourselves.
-			expression_usage_counts = usage_counts;
-			invalid_expressions = invalid;
-		}
-		else
-		{
-			auto &from_block = get<SPIRBlock>(from);
-			bool outside_control_flow = false;
-			uint32_t loop_dominator = 0;
-
-			// FIXME: Refactor this to not use the old loop_dominator tracking.
-			if (from_block.merge_block)
-			{
-				// If we are a loop header, we don't set the loop dominator,
-				// so just use "self" here.
-				loop_dominator = from;
-			}
-			else if (from_block.loop_dominator != SPIRBlock::NoDominator)
-			{
-				loop_dominator = from_block.loop_dominator;
-			}
-
-			if (loop_dominator != 0)
-			{
-				auto &dominator = get<SPIRBlock>(loop_dominator);
-
-				// For non-complex continue blocks, we implicitly branch to the continue block
-				// by having the continue block be part of the loop header in for (; ; continue-block).
-				outside_control_flow = block_is_outside_flow_control_from_block(dominator, from_block);
-			}
-
-			// Some simplification for for-loops. We always end up with a useless continue;
-			// statement since we branch to a loop block.
-			// Walk the CFG, if we uncoditionally execute the block calling continue assuming we're in the loop block,
-			// we can avoid writing out an explicit continue statement.
-			// Similar optimization to return statements if we know we're outside flow control.
-			if (!outside_control_flow)
-				statement("continue;");
-		}
-	}
 	else if (is_break(to))
 		statement("break;");
+	else if (is_continue(to) || (from == to))
+	{
+		// For from == to case can happen for a do-while loop which branches into itself.
+		// We don't mark these cases as continue blocks, but the only possible way to branch into
+		// ourselves is through means of continue blocks.
+		branch_to_continue(from, to);
+	}
 	else if (!is_conditional(to))
 		emit_block_chain(get<SPIRBlock>(to));
+
+	// It is important that we check for break before continue.
+	// A block might serve two purposes, a break block for the inner scope, and
+	// a continue block in the outer scope.
+	// Inner scope always takes precedence.
 }
 
 void CompilerGLSL::branch(uint32_t from, uint32_t cond, uint32_t true_block, uint32_t false_block)
@@ -8255,7 +9128,7 @@ void CompilerGLSL::branch(uint32_t from, uint32_t cond, uint32_t true_block, uin
 		branch(from, true_block);
 		end_scope();
 
-		if (false_sub)
+		if (false_sub || is_continue(false_block) || is_break(false_block))
 		{
 			statement("else");
 			begin_scope();
@@ -8273,12 +9146,19 @@ void CompilerGLSL::branch(uint32_t from, uint32_t cond, uint32_t true_block, uin
 	else if (false_sub && !true_sub)
 	{
 		// Only need false path, use negative conditional.
-		statement("if (!", to_expression(cond), ")");
+		statement("if (!", to_enclosed_expression(cond), ")");
 		begin_scope();
 		branch(from, false_block);
 		end_scope();
 
-		if (flush_phi_required(from, true_block))
+		if (is_continue(true_block) || is_break(true_block))
+		{
+			statement("else");
+			begin_scope();
+			branch(from, true_block);
+			end_scope();
+		}
+		else if (flush_phi_required(from, true_block))
 		{
 			statement("else");
 			begin_scope();
@@ -8426,7 +9306,7 @@ string CompilerGLSL::emit_for_loop_initializers(const SPIRBlock &block)
 				if (expr.empty())
 				{
 					// For loop initializers are of the form <type id = value, id = value, id = value, etc ...
-					auto &var = get<SPIRVariable>(block.loop_variables.front());
+					auto &var = get<SPIRVariable>(loop_var);
 					auto &type = get<SPIRType>(var.basetype);
 					expr = join(to_qualifiers_glsl(var.self), type_to_glsl(type), " ");
 				}
@@ -8447,7 +9327,7 @@ bool CompilerGLSL::for_loop_initializers_are_same_type(const SPIRBlock &block)
 		return true;
 
 	uint32_t expected = 0;
-	uint64_t expected_flags = 0;
+	Bitset expected_flags;
 	for (auto &var : block.loop_variables)
 	{
 		// Don't care about uninitialized variables as they will not be part of the initializers.
@@ -8458,13 +9338,13 @@ bool CompilerGLSL::for_loop_initializers_are_same_type(const SPIRBlock &block)
 		if (expected == 0)
 		{
 			expected = get<SPIRVariable>(var).basetype;
-			expected_flags = get_decoration_mask(var);
+			expected_flags = get_decoration_bitset(var);
 		}
 		else if (expected != get<SPIRVariable>(var).basetype)
 			return false;
 
 		// Precision flags and things like that must also match.
-		if (expected_flags != get_decoration_mask(var))
+		if (expected_flags != get_decoration_bitset(var))
 			return false;
 	}
 
@@ -8475,7 +9355,7 @@ bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method
 {
 	SPIRBlock::ContinueBlockType continue_type = continue_block_type(get<SPIRBlock>(block.continue_block));
 
-	if (method == SPIRBlock::MergeToSelectForLoop)
+	if (method == SPIRBlock::MergeToSelectForLoop || method == SPIRBlock::MergeToSelectContinueForLoop)
 	{
 		uint32_t current_count = statement_count;
 		// If we're trying to create a true for loop,
@@ -8499,8 +9379,13 @@ bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method
 				// emitting the continue block can invalidate the condition expression.
 				auto initializer = emit_for_loop_initializers(block);
 				auto condition = to_expression(block.condition);
-				auto continue_block = emit_continue_block(block.continue_block);
-				statement("for (", initializer, "; ", condition, "; ", continue_block, ")");
+				if (method != SPIRBlock::MergeToSelectContinueForLoop)
+				{
+					auto continue_block = emit_continue_block(block.continue_block);
+					statement("for (", initializer, "; ", condition, "; ", continue_block, ")");
+				}
+				else
+					statement("for (", initializer, "; ", condition, "; )");
 				break;
 			}
 
@@ -8596,6 +9481,28 @@ void CompilerGLSL::flush_undeclared_variables(SPIRBlock &block)
 	}
 }
 
+void CompilerGLSL::emit_hoisted_temporaries(vector<pair<uint32_t, uint32_t>> &temporaries)
+{
+	// If we need to force temporaries for certain IDs due to continue blocks, do it before starting loop header.
+	// Need to sort these to ensure that reference output is stable.
+	sort(begin(temporaries), end(temporaries),
+	     [](const pair<uint32_t, uint32_t> &a, const pair<uint32_t, uint32_t> &b) { return a.second < b.second; });
+
+	for (auto &tmp : temporaries)
+	{
+		add_local_variable_name(tmp.second);
+		auto flags = meta[tmp.second].decoration.decoration_flags;
+		auto &type = get<SPIRType>(tmp.first);
+		statement(flags_to_precision_qualifiers_glsl(type, flags), variable_decl(type, to_name(tmp.second)), ";");
+
+		hoisted_temporaries.insert(tmp.second);
+		forced_temporaries.insert(tmp.second);
+
+		// The temporary might be read from before it's assigned, set up the expression now.
+		set<SPIRExpression>(tmp.second, to_name(tmp.second), tmp.first, true);
+	}
+}
+
 void CompilerGLSL::emit_block_chain(SPIRBlock &block)
 {
 	propagate_loop_dominators(block);
@@ -8603,21 +9510,9 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
 	bool select_branch_to_true_block = false;
 	bool skip_direct_branch = false;
 	bool emitted_for_loop_header = false;
+	bool force_complex_continue_block = false;
 
-	// If we need to force temporaries for certain IDs due to continue blocks, do it before starting loop header.
-	// Need to sort these to ensure that reference output is stable.
-	sort(begin(block.declare_temporary), end(block.declare_temporary),
-	     [](const pair<uint32_t, uint32_t> &a, const pair<uint32_t, uint32_t> &b) { return a.second < b.second; });
-
-	for (auto &tmp : block.declare_temporary)
-	{
-		auto flags = meta[tmp.second].decoration.decoration_flags;
-		auto &type = get<SPIRType>(tmp.first);
-		statement(flags_to_precision_qualifiers_glsl(type, flags), variable_decl(type, to_name(tmp.second)), ";");
-
-		// The temporary might be read from before it's assigned, set up the expression now.
-		set<SPIRExpression>(tmp.second, to_name(tmp.second), tmp.first, true);
-	}
+	emit_hoisted_temporaries(block.declare_temporary);
 
 	SPIRBlock::ContinueBlockType continue_type = SPIRBlock::ContinueNone;
 	if (block.continue_block)
@@ -8627,8 +9522,22 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
 	for (auto var : block.loop_variables)
 		get<SPIRVariable>(var).loop_variable_enable = true;
 
+	// This is the method often used by spirv-opt to implement loops.
+	// The loop header goes straight into the continue block.
+	// However, don't attempt this on ESSL 1.0, because if a loop variable is used in a continue block,
+	// it *MUST* be used in the continue block. This loop method will not work.
+	if (!is_legacy_es() && block_is_loop_candidate(block, SPIRBlock::MergeToSelectContinueForLoop))
+	{
+		flush_undeclared_variables(block);
+		if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectContinueForLoop))
+		{
+			select_branch_to_true_block = true;
+			emitted_for_loop_header = true;
+			force_complex_continue_block = true;
+		}
+	}
 	// This is the older loop behavior in glslang which branches to loop body directly from the loop header.
-	if (block_is_loop_candidate(block, SPIRBlock::MergeToSelectForLoop))
+	else if (block_is_loop_candidate(block, SPIRBlock::MergeToSelectForLoop))
 	{
 		flush_undeclared_variables(block);
 		if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectForLoop))
@@ -8652,6 +9561,11 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
 	else if (continue_type == SPIRBlock::DoWhileLoop)
 	{
 		flush_undeclared_variables(block);
+		// We have some temporaries where the loop header is the dominator.
+		// We risk a case where we have code like:
+		// for (;;) { create-temporary; break; } consume-temporary;
+		// so force-declare temporaries here.
+		emit_hoisted_temporaries(block.potential_declare_temporary);
 		statement("do");
 		begin_scope();
 
@@ -8665,6 +9579,11 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
 		get<SPIRBlock>(block.continue_block).complex_continue = true;
 		continue_type = SPIRBlock::ComplexLoop;
 
+		// We have some temporaries where the loop header is the dominator.
+		// We risk a case where we have code like:
+		// for (;;) { create-temporary; break; } consume-temporary;
+		// so force-declare temporaries here.
+		emit_hoisted_temporaries(block.potential_declare_temporary);
 		statement("for (;;)");
 		begin_scope();
 
@@ -8709,9 +9628,23 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
 		break;
 
 	case SPIRBlock::Select:
-		// True if MergeToSelectForLoop succeeded.
+		// True if MergeToSelectForLoop or MergeToSelectContinueForLoop succeeded.
 		if (select_branch_to_true_block)
-			branch(block.self, block.true_block);
+		{
+			if (force_complex_continue_block)
+			{
+				assert(block.true_block == block.continue_block);
+
+				// We're going to emit a continue block directly here, so make sure it's marked as complex.
+				auto &complex_continue = get<SPIRBlock>(block.continue_block).complex_continue;
+				bool old_complex = complex_continue;
+				complex_continue = true;
+				branch(block.self, block.true_block);
+				complex_continue = old_complex;
+			}
+			else
+				branch(block.self, block.true_block);
+		}
 		else
 			branch(block.self, block.condition, block.true_block, block.false_block);
 		break;
@@ -8757,6 +9690,9 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
 	}
 
 	case SPIRBlock::Return:
+		for (auto &line : current_function->fixup_statements_out)
+			statement(line);
+
 		if (processing_entry_point)
 			emit_fixup();
 
@@ -8812,7 +9748,23 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
 		// that block after this. If we had selection merge, we already flushed phi variables.
 		if (block.merge != SPIRBlock::MergeSelection)
 			flush_phi(block.self, block.next_block);
-		emit_block_chain(get<SPIRBlock>(block.next_block));
+
+		// For merge selects we might have ignored the fact that a merge target
+		// could have been a break; or continue;
+		// We will need to deal with it here.
+		if (is_loop_break(block.next_block))
+		{
+			// Cannot check for just break, because switch statements will also use break.
+			assert(block.merge == SPIRBlock::MergeSelection);
+			statement("break;");
+		}
+		else if (is_continue(block.next_block))
+		{
+			assert(block.merge == SPIRBlock::MergeSelection);
+			branch_to_continue(block.self, block.next_block);
+		}
+		else
+			emit_block_chain(get<SPIRBlock>(block.next_block));
 	}
 
 	if (block.merge == SPIRBlock::MergeLoop)
@@ -8835,9 +9787,17 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
 		else
 			end_scope();
 
-		flush_phi(block.self, block.merge_block);
-		emit_block_chain(get<SPIRBlock>(block.merge_block));
+		// We cannot break out of two loops at once, so don't check for break; here.
+		// Using block.self as the "from" block isn't quite right, but it has the same scope
+		// and dominance structure, so it's fine.
+		if (is_continue(block.merge_block))
+			branch_to_continue(block.self, block.merge_block);
+		else
+			emit_block_chain(get<SPIRBlock>(block.merge_block));
 	}
+
+	// Forget about control dependent expressions now.
+	block.invalidate_expressions.clear();
 }
 
 void CompilerGLSL::begin_scope()
diff --git a/spirv_glsl.hpp b/spirv_glsl.hpp
index bfc8501946..e02ef34802 100644
--- a/spirv_glsl.hpp
+++ b/spirv_glsl.hpp
@@ -135,15 +135,32 @@ public:
 		init();
 	}
 
+	// Deprecate this interface because it doesn't overload properly with subclasses.
+	// Requires awkward static casting, which was a mistake.
+	SPIRV_CROSS_DEPRECATED("get_options() is obsolete, use get_common_options() instead.")
 	const Options &get_options() const
 	{
 		return options;
 	}
+
+	const Options &get_common_options() const
+	{
+		return options;
+	}
+
+	// Deprecate this interface because it doesn't overload properly with subclasses.
+	// Requires awkward static casting, which was a mistake.
+	SPIRV_CROSS_DEPRECATED("set_options() is obsolete, use set_common_options() instead.")
 	void set_options(Options &opts)
 	{
 		options = opts;
 	}
 
+	void set_common_options(const Options &opts)
+	{
+		options = opts;
+	}
+
 	std::string compile() override;
 
 	// Returns the current string held in the conversion buffer. Useful for
@@ -175,18 +192,18 @@ public:
 
 protected:
 	void reset();
-	void emit_function(SPIRFunction &func, uint64_t return_flags);
+	void emit_function(SPIRFunction &func, const Bitset &return_flags);
 
 	bool has_extension(const std::string &ext) const;
+	void require_extension_internal(const std::string &ext);
 
 	// Virtualize methods which need to be overridden by subclass targets like C++ and such.
-	virtual void emit_function_prototype(SPIRFunction &func, uint64_t return_flags);
+	virtual void emit_function_prototype(SPIRFunction &func, const Bitset &return_flags);
 
-	// Kinda ugly way to let opcodes peek at their neighbor instructions for trivial peephole scenarios.
-	const SPIRBlock *current_emitting_block = nullptr;
+	SPIRBlock *current_emitting_block = nullptr;
 
 	virtual void emit_instruction(const Instruction &instr);
-	void emit_block_instructions(const SPIRBlock &block);
+	void emit_block_instructions(SPIRBlock &block);
 	virtual void emit_glsl_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args,
 	                          uint32_t count);
 	virtual void emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t result_id, uint32_t op,
@@ -200,6 +217,7 @@ protected:
 	virtual void emit_header();
 	virtual void emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id);
 	virtual void emit_texture_op(const Instruction &i);
+	virtual void emit_subgroup_op(const Instruction &i);
 	virtual std::string type_to_glsl(const SPIRType &type, uint32_t id = 0);
 	virtual std::string builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClass storage);
 	virtual void emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
@@ -319,6 +337,7 @@ protected:
 		bool long_long_literal_suffix = false;
 		const char *basic_int_type = "int";
 		const char *basic_uint_type = "uint";
+		const char *half_literal_suffix = "hf";
 		bool swizzle_is_function = false;
 		bool shared_is_implied = false;
 		bool flexible_member_array_supported = true;
@@ -336,6 +355,7 @@ protected:
 		bool can_return_array = true;
 		bool allow_truncated_access_chain = false;
 		bool supports_extensions = false;
+		bool supports_empty_struct = false;
 	} backend;
 
 	void emit_struct(SPIRType &type);
@@ -349,12 +369,15 @@ protected:
 	void emit_interface_block(const SPIRVariable &type);
 	void emit_flattened_io_block(const SPIRVariable &var, const char *qual);
 	void emit_block_chain(SPIRBlock &block);
+	void emit_hoisted_temporaries(std::vector<std::pair<uint32_t, uint32_t>> &temporaries);
 	void emit_specialization_constant(const SPIRConstant &constant);
+	void emit_specialization_constant_op(const SPIRConstantOp &constant);
 	std::string emit_continue_block(uint32_t continue_block);
 	bool attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method method);
 	void propagate_loop_dominators(const SPIRBlock &block);
 
 	void branch(uint32_t from, uint32_t to);
+	void branch_to_continue(uint32_t from, uint32_t to);
 	void branch(uint32_t from, uint32_t cond, uint32_t true_block, uint32_t false_block);
 	void flush_phi(uint32_t from, uint32_t to);
 	bool flush_phi_required(uint32_t from, uint32_t to);
@@ -413,6 +436,9 @@ protected:
 	void append_global_func_args(const SPIRFunction &func, uint32_t index, std::vector<std::string> &arglist);
 	std::string to_expression(uint32_t id);
 	std::string to_enclosed_expression(uint32_t id);
+	std::string to_unpacked_expression(uint32_t id);
+	std::string to_enclosed_unpacked_expression(uint32_t id);
+	std::string to_extract_component_expression(uint32_t id, uint32_t index);
 	std::string enclose_expression(const std::string &expr);
 	void strip_enclosed_expression(std::string &expr);
 	std::string to_member_name(const SPIRType &type, uint32_t index);
@@ -421,11 +447,11 @@ protected:
 	virtual std::string to_qualifiers_glsl(uint32_t id);
 	const char *to_precision_qualifiers_glsl(uint32_t id);
 	virtual const char *to_storage_qualifiers_glsl(const SPIRVariable &var);
-	const char *flags_to_precision_qualifiers_glsl(const SPIRType &type, uint64_t flags);
+	const char *flags_to_precision_qualifiers_glsl(const SPIRType &type, const Bitset &flags);
 	const char *format_to_glsl(spv::ImageFormat format);
 	virtual std::string layout_for_member(const SPIRType &type, uint32_t index);
-	virtual std::string to_interpolation_qualifiers(uint64_t flags);
-	uint64_t combined_decoration_for_member(const SPIRType &type, uint32_t index);
+	virtual std::string to_interpolation_qualifiers(const Bitset &flags);
+	Bitset combined_decoration_for_member(const SPIRType &type, uint32_t index);
 	std::string layout_for_variable(const SPIRVariable &variable);
 	std::string to_combined_image_sampler(uint32_t image_id, uint32_t samp_id);
 	virtual bool skip_argument(uint32_t id) const;
@@ -434,9 +460,9 @@ protected:
 	bool buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing, uint32_t start_offset = 0,
 	                                uint32_t end_offset = std::numeric_limits<uint32_t>::max());
 	uint32_t type_to_packed_base_size(const SPIRType &type, BufferPackingStandard packing);
-	uint32_t type_to_packed_alignment(const SPIRType &type, uint64_t flags, BufferPackingStandard packing);
-	uint32_t type_to_packed_array_stride(const SPIRType &type, uint64_t flags, BufferPackingStandard packing);
-	uint32_t type_to_packed_size(const SPIRType &type, uint64_t flags, BufferPackingStandard packing);
+	uint32_t type_to_packed_alignment(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing);
+	uint32_t type_to_packed_array_stride(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing);
+	uint32_t type_to_packed_size(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing);
 
 	std::string bitcast_glsl(const SPIRType &result_type, uint32_t arg);
 	virtual std::string bitcast_glsl_op(const SPIRType &result_type, const SPIRType &argument_type);
@@ -453,6 +479,7 @@ protected:
 	bool check_atomic_image(uint32_t id);
 
 	virtual void replace_illegal_names();
+	virtual void emit_entry_point_declarations();
 
 	void replace_fragment_output(SPIRVariable &var);
 	void replace_fragment_outputs();
@@ -498,6 +525,7 @@ protected:
 	bool args_will_forward(uint32_t id, const uint32_t *args, uint32_t num_args, bool pure);
 	void register_call_out_argument(uint32_t id);
 	void register_impure_function_call();
+	void register_control_dependent_expression(uint32_t expr);
 
 	// GL_EXT_shader_pixel_local_storage support.
 	std::vector<PlsRemap> pls_inputs;
@@ -515,7 +543,7 @@ protected:
 
 	std::string emit_for_loop_initializers(const SPIRBlock &block);
 	bool for_loop_initializers_are_same_type(const SPIRBlock &block);
-	bool optimize_read_modify_write(const std::string &lhs, const std::string &rhs);
+	bool optimize_read_modify_write(const SPIRType &type, const std::string &lhs, const std::string &rhs);
 	void fixup_image_load_store_access();
 
 	bool type_is_empty(const SPIRType &type);
@@ -524,13 +552,16 @@ protected:
 
 	static std::string sanitize_underscores(const std::string &str);
 
-	bool can_use_io_location(spv::StorageClass storage);
+	bool can_use_io_location(spv::StorageClass storage, bool block);
 	const Instruction *get_next_instruction_in_block(const Instruction &instr);
 	static uint32_t mask_relevant_memory_semantics(uint32_t semantics);
 
+	std::string convert_half_to_string(const SPIRConstant &value, uint32_t col, uint32_t row);
 	std::string convert_float_to_string(const SPIRConstant &value, uint32_t col, uint32_t row);
 	std::string convert_double_to_string(const SPIRConstant &value, uint32_t col, uint32_t row);
 
+	std::string convert_separate_image_to_combined(uint32_t id);
+
 private:
 	void init()
 	{
@@ -541,6 +572,6 @@ private:
 		}
 	}
 };
-}
+} // namespace spirv_cross
 
 #endif
diff --git a/spirv_hlsl.cpp b/spirv_hlsl.cpp
index a56865e78d..fffdf3129a 100644
--- a/spirv_hlsl.cpp
+++ b/spirv_hlsl.cpp
@@ -346,7 +346,7 @@ string CompilerHLSL::image_type_hlsl_legacy(const SPIRType &type)
 
 string CompilerHLSL::image_type_hlsl(const SPIRType &type)
 {
-	if (options.shader_model <= 30)
+	if (hlsl_options.shader_model <= 30)
 		return image_type_hlsl_legacy(type);
 	else
 		return image_type_hlsl_modern(type);
@@ -394,6 +394,8 @@ string CompilerHLSL::type_to_glsl(const SPIRType &type, uint32_t id)
 			return backend.basic_uint_type;
 		case SPIRType::AtomicCounter:
 			return "atomic_uint";
+		case SPIRType::Half:
+			return "min16float";
 		case SPIRType::Float:
 			return "float";
 		case SPIRType::Double:
@@ -416,6 +418,8 @@ string CompilerHLSL::type_to_glsl(const SPIRType &type, uint32_t id)
 			return join("int", type.vecsize);
 		case SPIRType::UInt:
 			return join("uint", type.vecsize);
+		case SPIRType::Half:
+			return join("min16float", type.vecsize);
 		case SPIRType::Float:
 			return join("float", type.vecsize);
 		case SPIRType::Double:
@@ -438,6 +442,8 @@ string CompilerHLSL::type_to_glsl(const SPIRType &type, uint32_t id)
 			return join("int", type.columns, "x", type.vecsize);
 		case SPIRType::UInt:
 			return join("uint", type.columns, "x", type.vecsize);
+		case SPIRType::Half:
+			return join("min16float", type.columns, "x", type.vecsize);
 		case SPIRType::Float:
 			return join("float", type.columns, "x", type.vecsize);
 		case SPIRType::Double:
@@ -468,7 +474,7 @@ void CompilerHLSL::emit_interface_block_globally(const SPIRVariable &var)
 	// These are emitted inside the interface structs.
 	auto &flags = meta[var.self].decoration.decoration_flags;
 	auto old_flags = flags;
-	flags = 0;
+	flags.reset();
 	statement("static ", variable_decl(var), ";");
 	flags = old_flags;
 }
@@ -488,12 +494,8 @@ const char *CompilerHLSL::to_storage_qualifiers_glsl(const SPIRVariable &var)
 
 void CompilerHLSL::emit_builtin_outputs_in_struct()
 {
-	bool legacy = options.shader_model <= 30;
-	for (uint32_t i = 0; i < 64; i++)
-	{
-		if (!(active_output_builtins & (1ull << i)))
-			continue;
-
+	bool legacy = hlsl_options.shader_model <= 30;
+	active_output_builtins.for_each_bit([&](uint32_t i) {
 		const char *type = nullptr;
 		const char *semantic = nullptr;
 		auto builtin = static_cast<BuiltIn>(i);
@@ -545,7 +547,7 @@ void CompilerHLSL::emit_builtin_outputs_in_struct()
 			// If point_size_compat is enabled, just ignore PointSize.
 			// PointSize does not exist in HLSL, but some code bases might want to be able to use these shaders,
 			// even if it means working around the missing feature.
-			if (options.point_size_compat)
+			if (hlsl_options.point_size_compat)
 				break;
 			else
 				SPIRV_CROSS_THROW("Unsupported builtin in HLSL.");
@@ -557,17 +559,13 @@ void CompilerHLSL::emit_builtin_outputs_in_struct()
 
 		if (type && semantic)
 			statement(type, " ", builtin_to_glsl(builtin, StorageClassOutput), " : ", semantic, ";");
-	}
+	});
 }
 
 void CompilerHLSL::emit_builtin_inputs_in_struct()
 {
-	bool legacy = options.shader_model <= 30;
-	for (uint32_t i = 0; i < 64; i++)
-	{
-		if (!(active_input_builtins & (1ull << i)))
-			continue;
-
+	bool legacy = hlsl_options.shader_model <= 30;
+	active_input_builtins.for_each_bit([&](uint32_t i) {
 		const char *type = nullptr;
 		const char *semantic = nullptr;
 		auto builtin = static_cast<BuiltIn>(i);
@@ -627,6 +625,13 @@ void CompilerHLSL::emit_builtin_inputs_in_struct()
 			break;
 
 		case BuiltInNumWorkgroups:
+		case BuiltInSubgroupSize:
+		case BuiltInSubgroupLocalInvocationId:
+		case BuiltInSubgroupEqMask:
+		case BuiltInSubgroupLtMask:
+		case BuiltInSubgroupLeMask:
+		case BuiltInSubgroupGtMask:
+		case BuiltInSubgroupGeMask:
 			// Handled specially.
 			break;
 
@@ -664,7 +669,7 @@ void CompilerHLSL::emit_builtin_inputs_in_struct()
 
 		case BuiltInPointCoord:
 			// PointCoord is not supported, but provide a way to just ignore that, similar to PointSize.
-			if (options.point_coord_compat)
+			if (hlsl_options.point_coord_compat)
 				break;
 			else
 				SPIRV_CROSS_THROW("Unsupported builtin in HLSL.");
@@ -676,7 +681,7 @@ void CompilerHLSL::emit_builtin_inputs_in_struct()
 
 		if (type && semantic)
 			statement(type, " ", builtin_to_glsl(builtin, StorageClassInput), " : ", semantic, ";");
-	}
+	});
 }
 
 uint32_t CompilerHLSL::type_to_consumed_locations(const SPIRType &type) const
@@ -704,22 +709,22 @@ uint32_t CompilerHLSL::type_to_consumed_locations(const SPIRType &type) const
 	return elements;
 }
 
-string CompilerHLSL::to_interpolation_qualifiers(uint64_t flags)
+string CompilerHLSL::to_interpolation_qualifiers(const Bitset &flags)
 {
 	string res;
 	//if (flags & (1ull << DecorationSmooth))
 	//    res += "linear ";
-	if (flags & (1ull << DecorationFlat))
+	if (flags.get(DecorationFlat))
 		res += "nointerpolation ";
-	if (flags & (1ull << DecorationNoPerspective))
+	if (flags.get(DecorationNoPerspective))
 		res += "noperspective ";
-	if (flags & (1ull << DecorationCentroid))
+	if (flags.get(DecorationCentroid))
 		res += "centroid ";
-	if (flags & (1ull << DecorationPatch))
+	if (flags.get(DecorationPatch))
 		res += "patch "; // Seems to be different in actual HLSL.
-	if (flags & (1ull << DecorationSample))
+	if (flags.get(DecorationSample))
 		res += "sample ";
-	if (flags & (1ull << DecorationInvariant))
+	if (flags.get(DecorationInvariant))
 		res += "invariant "; // Not supported?
 
 	return res;
@@ -765,7 +770,7 @@ void CompilerHLSL::emit_io_block(const SPIRVariable &var)
 		add_member_name(type, i);
 
 		auto &membertype = get<SPIRType>(type.member_types[i]);
-		statement(to_interpolation_qualifiers(get_member_decoration_mask(type.self, i)),
+		statement(to_interpolation_qualifiers(get_member_decoration_bitset(type.self, i)),
 		          variable_decl(membertype, to_member_name(type, i)), semantic, ";");
 	}
 
@@ -783,7 +788,7 @@ void CompilerHLSL::emit_interface_block_in_struct(const SPIRVariable &var, unord
 
 	string binding;
 	bool use_location_number = true;
-	bool legacy = options.shader_model <= 30;
+	bool legacy = hlsl_options.shader_model <= 30;
 	if (execution.model == ExecutionModelFragment && var.storage == StorageClassOutput)
 	{
 		binding = join(legacy ? "COLOR" : "SV_Target", get_decoration(var.self, DecorationLocation));
@@ -807,7 +812,7 @@ void CompilerHLSL::emit_interface_block_in_struct(const SPIRVariable &var, unord
 
 		// If an explicit location exists, use it with TEXCOORD[N] semantic.
 		// Otherwise, pick a vacant location.
-		if (m.decoration_flags & (1ull << DecorationLocation))
+		if (m.decoration_flags.get(DecorationLocation))
 			location_number = m.location;
 		else
 			location_number = get_vacant_location();
@@ -825,14 +830,14 @@ void CompilerHLSL::emit_interface_block_in_struct(const SPIRVariable &var, unord
 			{
 				SPIRType newtype = type;
 				newtype.columns = 1;
-				statement(to_interpolation_qualifiers(get_decoration_mask(var.self)),
+				statement(to_interpolation_qualifiers(get_decoration_bitset(var.self)),
 				          variable_decl(newtype, join(name, "_", i)), " : ", semantic, "_", i, ";");
 				active_locations.insert(location_number++);
 			}
 		}
 		else
 		{
-			statement(to_interpolation_qualifiers(get_decoration_mask(var.self)), variable_decl(type, name), " : ",
+			statement(to_interpolation_qualifiers(get_decoration_bitset(var.self)), variable_decl(type, name), " : ",
 			          semantic, ";");
 
 			// Structs and arrays should consume more locations.
@@ -866,6 +871,11 @@ std::string CompilerHLSL::builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClas
 	case BuiltInPointCoord:
 		// Crude hack, but there is no real alternative. This path is only enabled if point_coord_compat is set.
 		return "float2(0.5f, 0.5f)";
+	case BuiltInSubgroupLocalInvocationId:
+		return "WaveGetLaneIndex()";
+	case BuiltInSubgroupSize:
+		return "WaveGetLaneCount()";
+
 	default:
 		return CompilerGLSL::builtin_to_glsl(builtin, storage);
 	}
@@ -873,12 +883,11 @@ std::string CompilerHLSL::builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClas
 
 void CompilerHLSL::emit_builtin_variables()
 {
-	// Emit global variables for the interface variables which are statically used by the shader.
-	for (uint32_t i = 0; i < 64; i++)
-	{
-		if (!((active_input_builtins | active_output_builtins) & (1ull << i)))
-			continue;
+	Bitset builtins = active_input_builtins;
+	builtins.merge_or(active_output_builtins);
 
+	// Emit global variables for the interface variables which are statically used by the shader.
+	builtins.for_each_bit([&](uint32_t i) {
 		const char *type = nullptr;
 		auto builtin = static_cast<BuiltIn>(i);
 		uint32_t array_size = 0;
@@ -903,7 +912,7 @@ void CompilerHLSL::emit_builtin_variables()
 			break;
 
 		case BuiltInPointSize:
-			if (options.point_size_compat)
+			if (hlsl_options.point_size_compat)
 			{
 				// Just emit the global variable, it will be ignored.
 				type = "float";
@@ -931,6 +940,22 @@ void CompilerHLSL::emit_builtin_variables()
 			// Handled specially.
 			break;
 
+		case BuiltInSubgroupLocalInvocationId:
+		case BuiltInSubgroupSize:
+			if (hlsl_options.shader_model < 60)
+				SPIRV_CROSS_THROW("Need SM 6.0 for Wave ops.");
+			break;
+
+		case BuiltInSubgroupEqMask:
+		case BuiltInSubgroupLtMask:
+		case BuiltInSubgroupLeMask:
+		case BuiltInSubgroupGtMask:
+		case BuiltInSubgroupGeMask:
+			if (hlsl_options.shader_model < 60)
+				SPIRV_CROSS_THROW("Need SM 6.0 for Wave ops.");
+			type = "uint4";
+			break;
+
 		case BuiltInClipDistance:
 			array_size = clip_distance_count;
 			type = "float";
@@ -943,10 +968,9 @@ void CompilerHLSL::emit_builtin_variables()
 
 		default:
 			SPIRV_CROSS_THROW(join("Unsupported builtin in HLSL: ", unsigned(builtin)));
-			break;
 		}
 
-		StorageClass storage = (active_input_builtins & (1ull << i)) != 0 ? StorageClassInput : StorageClassOutput;
+		StorageClass storage = active_input_builtins.get(i) ? StorageClassInput : StorageClassOutput;
 		// FIXME: SampleMask can be both in and out with sample builtin,
 		// need to distinguish that when we add support for that.
 
@@ -957,7 +981,7 @@ void CompilerHLSL::emit_builtin_variables()
 			else
 				statement("static ", type, " ", builtin_to_glsl(builtin, storage), ";");
 		}
-	}
+	});
 }
 
 void CompilerHLSL::emit_composite_constants()
@@ -1010,6 +1034,13 @@ void CompilerHLSL::emit_specialization_constants()
 			statement("static const ", variable_decl(type, name), " = ", constant_expression(c), ";");
 			emitted = true;
 		}
+		else if (id.get_type() == TypeConstantOp)
+		{
+			auto &c = id.get<SPIRConstantOp>();
+			auto &type = get<SPIRType>(c.basetype);
+			auto name = to_name(c.self);
+			statement("static const ", variable_decl(type, name), " = ", constant_op_expression(c), ";");
+		}
 	}
 
 	if (workgroup_size_id)
@@ -1023,6 +1054,30 @@ void CompilerHLSL::emit_specialization_constants()
 		statement("");
 }
 
+void CompilerHLSL::replace_illegal_names()
+{
+	static const unordered_set<string> keywords = {
+		// Additional HLSL specific keywords.
+		"line", "linear", "matrix", "point", "row_major", "sampler",
+	};
+
+	for (auto &id : ids)
+	{
+		if (id.get_type() == TypeVariable)
+		{
+			auto &var = id.get<SPIRVariable>();
+			if (!is_hidden_variable(var))
+			{
+				auto &m = meta[var.self].decoration;
+				if (keywords.find(m.alias) != end(keywords))
+					m.alias = join("_", m.alias);
+			}
+		}
+	}
+
+	CompilerGLSL::replace_illegal_names();
+}
+
 void CompilerHLSL::emit_resources()
 {
 	auto &execution = get_entry_point();
@@ -1039,8 +1094,8 @@ void CompilerHLSL::emit_resources()
 		{
 			auto &type = id.get<SPIRType>();
 			if (type.basetype == SPIRType::Struct && type.array.empty() && !type.pointer &&
-			    (meta[type.self].decoration.decoration_flags &
-			     ((1ull << DecorationBlock) | (1ull << DecorationBufferBlock))) == 0)
+			    (!meta[type.self].decoration.decoration_flags.get(DecorationBlock) &&
+			     !meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock)))
 			{
 				emit_struct(type);
 			}
@@ -1060,8 +1115,8 @@ void CompilerHLSL::emit_resources()
 			auto &type = get<SPIRType>(var.basetype);
 
 			bool is_block_storage = type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform;
-			bool has_block_flags = (meta[type.self].decoration.decoration_flags &
-			                        ((1ull << DecorationBlock) | (1ull << DecorationBufferBlock))) != 0;
+			bool has_block_flags = meta[type.self].decoration.decoration_flags.get(DecorationBlock) ||
+			                       meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
 
 			if (var.storage != StorageClassFunction && type.pointer && is_block_storage && !is_hidden_variable(var) &&
 			    has_block_flags)
@@ -1088,7 +1143,7 @@ void CompilerHLSL::emit_resources()
 		}
 	}
 
-	if (execution.model == ExecutionModelVertex && options.shader_model <= 30)
+	if (execution.model == ExecutionModelVertex && hlsl_options.shader_model <= 30)
 	{
 		statement("uniform float4 gl_HalfPixel;");
 		emitted = true;
@@ -1125,7 +1180,7 @@ void CompilerHLSL::emit_resources()
 		{
 			auto &var = id.get<SPIRVariable>();
 			auto &type = get<SPIRType>(var.basetype);
-			bool block = (meta[type.self].decoration.decoration_flags & (1ull << DecorationBlock)) != 0;
+			bool block = meta[type.self].decoration.decoration_flags.get(DecorationBlock);
 
 			// Do not emit I/O blocks here.
 			// I/O blocks can be arrayed, so we must deal with them separately to support geometry shaders
@@ -1157,7 +1212,7 @@ void CompilerHLSL::emit_resources()
 		{
 			auto &var = id.get<SPIRVariable>();
 			auto &type = get<SPIRType>(var.basetype);
-			bool block = (meta[type.self].decoration.decoration_flags & (1ull << DecorationBlock)) != 0;
+			bool block = meta[type.self].decoration.decoration_flags.get(DecorationBlock);
 
 			if (var.storage != StorageClassInput && var.storage != StorageClassOutput)
 				continue;
@@ -1225,8 +1280,18 @@ void CompilerHLSL::emit_resources()
 		return name1.compare(name2) < 0;
 	};
 
-	static const uint64_t implicit_builtins = (1ull << BuiltInNumWorkgroups) | (1ull << BuiltInPointCoord);
-	if (!input_variables.empty() || (active_input_builtins & ~implicit_builtins))
+	auto input_builtins = active_input_builtins;
+	input_builtins.clear(BuiltInNumWorkgroups);
+	input_builtins.clear(BuiltInPointCoord);
+	input_builtins.clear(BuiltInSubgroupSize);
+	input_builtins.clear(BuiltInSubgroupLocalInvocationId);
+	input_builtins.clear(BuiltInSubgroupEqMask);
+	input_builtins.clear(BuiltInSubgroupLtMask);
+	input_builtins.clear(BuiltInSubgroupLeMask);
+	input_builtins.clear(BuiltInSubgroupGtMask);
+	input_builtins.clear(BuiltInSubgroupGeMask);
+
+	if (!input_variables.empty() || !input_builtins.empty())
 	{
 		require_input = true;
 		statement("struct SPIRV_Cross_Input");
@@ -1240,7 +1305,7 @@ void CompilerHLSL::emit_resources()
 		statement("");
 	}
 
-	if (!output_variables.empty() || active_output_builtins)
+	if (!output_variables.empty() || !active_output_builtins.empty())
 	{
 		require_output = true;
 		statement("struct SPIRV_Cross_Output");
@@ -1305,7 +1370,7 @@ void CompilerHLSL::emit_resources()
 
 	if (requires_textureProj)
 	{
-		if (options.shader_model >= 40)
+		if (hlsl_options.shader_model >= 40)
 		{
 			statement("float SPIRV_Cross_projectTextureCoordinate(float2 coord)");
 			begin_scope();
@@ -1427,6 +1492,23 @@ void CompilerHLSL::emit_resources()
 		statement("");
 	}
 
+	if (requires_explicit_fp16_packing)
+	{
+		// HLSL does not pack into a single word sadly :(
+		statement("uint SPIRV_Cross_packFloat2x16(min16float2 value)");
+		begin_scope();
+		statement("uint2 Packed = f32tof16(value);");
+		statement("return Packed.x | (Packed.y << 16);");
+		end_scope();
+		statement("");
+
+		statement("min16float2 SPIRV_Cross_unpackFloat2x16(uint value)");
+		begin_scope();
+		statement("return min16float2(f16tof32(uint2(value & 0xffff, value >> 16)));");
+		end_scope();
+		statement("");
+	}
+
 	// HLSL does not seem to have builtins for these operation, so roll them by hand ...
 	if (requires_unorm8_packing)
 	{
@@ -1696,19 +1778,16 @@ void CompilerHLSL::emit_resources()
 
 string CompilerHLSL::layout_for_member(const SPIRType &type, uint32_t index)
 {
-	auto flags = combined_decoration_for_member(type, index);
+	auto &flags = get_member_decoration_bitset(type.self, index);
 
-	bool is_block = (meta[type.self].decoration.decoration_flags &
-	                 ((1ull << DecorationBlock) | (1ull << DecorationBufferBlock))) != 0;
-
-	if (!is_block)
-		return "";
+	// HLSL can emit row_major or column_major decoration in any struct.
+	// Do not try to merge combined decorations for children like in GLSL.
 
 	// Flip the convention. HLSL is a bit odd in that the memory layout is column major ... but the language API is "row-major".
 	// The way to deal with this is to multiply everything in inverse order, and reverse the memory layout.
-	if (flags & (1ull << DecorationColMajor))
+	if (flags.get(DecorationColMajor))
 		return "row_major ";
-	else if (flags & (1ull << DecorationRowMajor))
+	else if (flags.get(DecorationRowMajor))
 		return "column_major ";
 
 	return "";
@@ -1719,14 +1798,15 @@ void CompilerHLSL::emit_struct_member(const SPIRType &type, uint32_t member_type
 {
 	auto &membertype = get<SPIRType>(member_type_id);
 
-	uint64_t memberflags = 0;
+	Bitset memberflags;
 	auto &memb = meta[type.self].members;
 	if (index < memb.size())
 		memberflags = memb[index].decoration_flags;
 
 	string qualifiers;
-	bool is_block = (meta[type.self].decoration.decoration_flags &
-	                 ((1ull << DecorationBlock) | (1ull << DecorationBufferBlock))) != 0;
+	bool is_block = meta[type.self].decoration.decoration_flags.get(DecorationBlock) ||
+	                meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
+
 	if (is_block)
 		qualifiers = to_interpolation_qualifiers(memberflags);
 
@@ -1756,8 +1836,8 @@ void CompilerHLSL::emit_buffer_block(const SPIRVariable &var)
 
 	if (is_uav)
 	{
-		uint64_t flags = get_buffer_block_flags(var);
-		bool is_readonly = (flags & (1ull << DecorationNonWritable)) != 0;
+		Bitset flags = get_buffer_block_flags(var);
+		bool is_readonly = flags.get(DecorationNonWritable);
 		add_resource_name(var.self);
 		statement(is_readonly ? "ByteAddressBuffer " : "RWByteAddressBuffer ", to_name(var.self),
 		          type_to_array_glsl(type), to_resource_binding(var), ";");
@@ -1796,7 +1876,7 @@ void CompilerHLSL::emit_buffer_block(const SPIRVariable &var)
 		}
 		else
 		{
-			if (options.shader_model < 51)
+			if (hlsl_options.shader_model < 51)
 				SPIRV_CROSS_THROW(
 				    "Need ConstantBuffer<T> to use arrays of UBOs, but this is only supported in SM 5.1.");
 
@@ -1857,7 +1937,7 @@ void CompilerHLSL::emit_push_constant_block(const SPIRVariable &var)
 					auto backup_name = get_member_name(type.self, i);
 					auto member_name = to_member_name(type, i);
 					set_member_name(type.self, constant_index,
-					                sanitize_underscores(join(to_name(type.self), "_", member_name)));
+					                sanitize_underscores(join(to_name(var.self), "_", member_name)));
 					emit_struct_member(type, member, i, "", layout.start);
 					set_member_name(type.self, constant_index, backup_name);
 
@@ -1894,7 +1974,7 @@ string CompilerHLSL::to_func_call_arg(uint32_t id)
 {
 	string arg_str = CompilerGLSL::to_func_call_arg(id);
 
-	if (options.shader_model <= 30)
+	if (hlsl_options.shader_model <= 30)
 		return arg_str;
 
 	// Manufacture automatic sampler arg if the arg is a SampledImage texture and we're in modern HLSL.
@@ -1909,7 +1989,7 @@ string CompilerHLSL::to_func_call_arg(uint32_t id)
 	return arg_str;
 }
 
-void CompilerHLSL::emit_function_prototype(SPIRFunction &func, uint64_t return_flags)
+void CompilerHLSL::emit_function_prototype(SPIRFunction &func, const Bitset &return_flags)
 {
 	if (func.self != entry_point)
 		add_function_overload(func);
@@ -1974,12 +2054,13 @@ void CompilerHLSL::emit_function_prototype(SPIRFunction &func, uint64_t return_f
 
 		// Flatten a combined sampler to two separate arguments in modern HLSL.
 		auto &arg_type = get<SPIRType>(arg.type);
-		if (options.shader_model > 30 && arg_type.basetype == SPIRType::SampledImage && arg_type.image.dim != DimBuffer)
+		if (hlsl_options.shader_model > 30 && arg_type.basetype == SPIRType::SampledImage &&
+		    arg_type.image.dim != DimBuffer)
 		{
 			// Manufacture automatic sampler arg for SampledImage texture
 			decl += ", ";
-			decl +=
-			    join(arg_type.image.depth ? "SamplerComparisonState " : "SamplerState ", to_sampler_expression(arg.id));
+			decl += join(arg_type.image.depth ? "SamplerComparisonState " : "SamplerState ",
+			             to_sampler_expression(arg.id), type_to_array_glsl(arg_type));
 		}
 
 		if (&arg != &func.arguments.back())
@@ -2009,7 +2090,7 @@ void CompilerHLSL::emit_hlsl_entry_point()
 		{
 			auto &var = id.get<SPIRVariable>();
 			auto &type = get<SPIRType>(var.basetype);
-			bool block = (meta[type.self].decoration.decoration_flags & (1ull << DecorationBlock)) != 0;
+			bool block = meta[type.self].decoration.decoration_flags.get(DecorationBlock);
 
 			if (var.storage != StorageClassInput && var.storage != StorageClassOutput)
 				continue;
@@ -2052,7 +2133,7 @@ void CompilerHLSL::emit_hlsl_entry_point()
 		break;
 	}
 	case ExecutionModelFragment:
-		if (execution.flags & (1ull << ExecutionModeEarlyFragmentTests))
+		if (execution.flags.get(ExecutionModeEarlyFragmentTests))
 			statement("[earlydepthstencil]");
 		break;
 	default:
@@ -2061,14 +2142,10 @@ void CompilerHLSL::emit_hlsl_entry_point()
 
 	statement(require_output ? "SPIRV_Cross_Output " : "void ", "main(", merge(arguments), ")");
 	begin_scope();
-	bool legacy = options.shader_model <= 30;
+	bool legacy = hlsl_options.shader_model <= 30;
 
 	// Copy builtins from entry point arguments to globals.
-	for (uint32_t i = 0; i < 64; i++)
-	{
-		if (!(active_input_builtins & (1ull << i)))
-			continue;
-
+	active_input_builtins.for_each_bit([&](uint32_t i) {
 		auto builtin = builtin_to_glsl(static_cast<BuiltIn>(i), StorageClassInput);
 		switch (static_cast<BuiltIn>(i))
 		{
@@ -2092,6 +2169,70 @@ void CompilerHLSL::emit_hlsl_entry_point()
 
 		case BuiltInNumWorkgroups:
 		case BuiltInPointCoord:
+		case BuiltInSubgroupSize:
+		case BuiltInSubgroupLocalInvocationId:
+			break;
+
+		case BuiltInSubgroupEqMask:
+			// Emulate these ...
+			// No 64-bit in HLSL, so have to do it in 32-bit and unroll.
+			statement("gl_SubgroupEqMask = 1u << (WaveGetLaneIndex() - uint4(0, 32, 64, 96));");
+			statement("if (WaveGetLaneIndex() >= 32) gl_SubgroupEqMask.x = 0;");
+			statement("if (WaveGetLaneIndex() >= 64 || WaveGetLaneIndex() < 32) gl_SubgroupEqMask.y = 0;");
+			statement("if (WaveGetLaneIndex() >= 96 || WaveGetLaneIndex() < 64) gl_SubgroupEqMask.z = 0;");
+			statement("if (WaveGetLaneIndex() < 96) gl_SubgroupEqMask.w = 0;");
+			break;
+
+		case BuiltInSubgroupGeMask:
+			// Emulate these ...
+			// No 64-bit in HLSL, so have to do it in 32-bit and unroll.
+			statement("gl_SubgroupGeMask = ~((1u << (WaveGetLaneIndex() - uint4(0, 32, 64, 96))) - 1u);");
+			statement("if (WaveGetLaneIndex() >= 32) gl_SubgroupGeMask.x = 0u;");
+			statement("if (WaveGetLaneIndex() >= 64) gl_SubgroupGeMask.y = 0u;");
+			statement("if (WaveGetLaneIndex() >= 96) gl_SubgroupGeMask.z = 0u;");
+			statement("if (WaveGetLaneIndex() < 32) gl_SubgroupGeMask.y = ~0u;");
+			statement("if (WaveGetLaneIndex() < 64) gl_SubgroupGeMask.z = ~0u;");
+			statement("if (WaveGetLaneIndex() < 96) gl_SubgroupGeMask.w = ~0u;");
+			break;
+
+		case BuiltInSubgroupGtMask:
+			// Emulate these ...
+			// No 64-bit in HLSL, so have to do it in 32-bit and unroll.
+			statement("uint gt_lane_index = WaveGetLaneIndex() + 1;");
+			statement("gl_SubgroupGtMask = ~((1u << (gt_lane_index - uint4(0, 32, 64, 96))) - 1u);");
+			statement("if (gt_lane_index >= 32) gl_SubgroupGtMask.x = 0u;");
+			statement("if (gt_lane_index >= 64) gl_SubgroupGtMask.y = 0u;");
+			statement("if (gt_lane_index >= 96) gl_SubgroupGtMask.z = 0u;");
+			statement("if (gt_lane_index >= 128) gl_SubgroupGtMask.w = 0u;");
+			statement("if (gt_lane_index < 32) gl_SubgroupGtMask.y = ~0u;");
+			statement("if (gt_lane_index < 64) gl_SubgroupGtMask.z = ~0u;");
+			statement("if (gt_lane_index < 96) gl_SubgroupGtMask.w = ~0u;");
+			break;
+
+		case BuiltInSubgroupLeMask:
+			// Emulate these ...
+			// No 64-bit in HLSL, so have to do it in 32-bit and unroll.
+			statement("uint le_lane_index = WaveGetLaneIndex() + 1;");
+			statement("gl_SubgroupLeMask = (1u << (le_lane_index - uint4(0, 32, 64, 96))) - 1u;");
+			statement("if (le_lane_index >= 32) gl_SubgroupLeMask.x = ~0u;");
+			statement("if (le_lane_index >= 64) gl_SubgroupLeMask.y = ~0u;");
+			statement("if (le_lane_index >= 96) gl_SubgroupLeMask.z = ~0u;");
+			statement("if (le_lane_index >= 128) gl_SubgroupLeMask.w = ~0u;");
+			statement("if (le_lane_index < 32) gl_SubgroupLeMask.y = 0u;");
+			statement("if (le_lane_index < 64) gl_SubgroupLeMask.z = 0u;");
+			statement("if (le_lane_index < 96) gl_SubgroupLeMask.w = 0u;");
+			break;
+
+		case BuiltInSubgroupLtMask:
+			// Emulate these ...
+			// No 64-bit in HLSL, so have to do it in 32-bit and unroll.
+			statement("gl_SubgroupLtMask = (1u << (WaveGetLaneIndex() - uint4(0, 32, 64, 96))) - 1u;");
+			statement("if (WaveGetLaneIndex() >= 32) gl_SubgroupLtMask.x = ~0u;");
+			statement("if (WaveGetLaneIndex() >= 64) gl_SubgroupLtMask.y = ~0u;");
+			statement("if (WaveGetLaneIndex() >= 96) gl_SubgroupLtMask.z = ~0u;");
+			statement("if (WaveGetLaneIndex() < 32) gl_SubgroupLtMask.y = 0u;");
+			statement("if (WaveGetLaneIndex() < 64) gl_SubgroupLtMask.z = 0u;");
+			statement("if (WaveGetLaneIndex() < 96) gl_SubgroupLtMask.w = 0u;");
 			break;
 
 		case BuiltInClipDistance:
@@ -2110,7 +2251,7 @@ void CompilerHLSL::emit_hlsl_entry_point()
 			statement(builtin, " = stage_input.", builtin, ";");
 			break;
 		}
-	}
+	});
 
 	// Copy from stage input struct to globals.
 	for (auto &id : ids)
@@ -2119,7 +2260,7 @@ void CompilerHLSL::emit_hlsl_entry_point()
 		{
 			auto &var = id.get<SPIRVariable>();
 			auto &type = get<SPIRType>(var.basetype);
-			bool block = (meta[type.self].decoration.decoration_flags & (1ull << DecorationBlock)) != 0;
+			bool block = meta[type.self].decoration.decoration_flags.get(DecorationBlock);
 
 			if (var.storage != StorageClassInput)
 				continue;
@@ -2169,7 +2310,7 @@ void CompilerHLSL::emit_hlsl_entry_point()
 		{
 			auto &var = id.get<SPIRVariable>();
 			auto &type = get<SPIRType>(var.basetype);
-			bool block = (meta[type.self].decoration.decoration_flags & (1ull << DecorationBlock)) != 0;
+			bool block = meta[type.self].decoration.decoration_flags.get(DecorationBlock);
 
 			if (var.storage != StorageClassOutput)
 				continue;
@@ -2189,14 +2330,10 @@ void CompilerHLSL::emit_hlsl_entry_point()
 		statement("SPIRV_Cross_Output stage_output;");
 
 		// Copy builtins from globals to return struct.
-		for (uint32_t i = 0; i < 64; i++)
-		{
-			if (!(active_output_builtins & (1ull << i)))
-				continue;
-
+		active_output_builtins.for_each_bit([&](uint32_t i) {
 			// PointSize doesn't exist in HLSL.
 			if (i == BuiltInPointSize)
-				continue;
+				return;
 
 			switch (static_cast<BuiltIn>(i))
 			{
@@ -2219,7 +2356,7 @@ void CompilerHLSL::emit_hlsl_entry_point()
 				break;
 			}
 			}
-		}
+		});
 
 		for (auto &id : ids)
 		{
@@ -2227,7 +2364,7 @@ void CompilerHLSL::emit_hlsl_entry_point()
 			{
 				auto &var = id.get<SPIRVariable>();
 				auto &type = get<SPIRType>(var.basetype);
-				bool block = (meta[type.self].decoration.decoration_flags & (1ull << DecorationBlock)) != 0;
+				bool block = meta[type.self].decoration.decoration_flags.get(DecorationBlock);
 
 				if (var.storage != StorageClassOutput)
 					continue;
@@ -2252,7 +2389,7 @@ void CompilerHLSL::emit_fixup()
 	if (get_entry_point().model == ExecutionModelVertex)
 	{
 		// Do various mangling on the gl_Position.
-		if (options.shader_model <= 30)
+		if (hlsl_options.shader_model <= 30)
 		{
 			statement("gl_Position.x = gl_Position.x - gl_HalfPixel.x * "
 			          "gl_Position.w;");
@@ -2260,9 +2397,9 @@ void CompilerHLSL::emit_fixup()
 			          "gl_Position.w;");
 		}
 
-		if (CompilerGLSL::options.vertex.flip_vert_y)
+		if (options.vertex.flip_vert_y)
 			statement("gl_Position.y = -gl_Position.y;");
-		if (CompilerGLSL::options.vertex.fixup_clipspace)
+		if (options.vertex.fixup_clipspace)
 			statement("gl_Position.z = (gl_Position.z + gl_Position.w) * 0.5;");
 	}
 }
@@ -2276,6 +2413,8 @@ void CompilerHLSL::emit_texture_op(const Instruction &i)
 	if (i.offset + length > spirv.size())
 		SPIRV_CROSS_THROW("Compiler::parse() opcode out of range.");
 
+	vector<uint32_t> inherited_expressions;
+
 	uint32_t result_type = ops[0];
 	uint32_t id = ops[1];
 	uint32_t img = ops[2];
@@ -2288,6 +2427,8 @@ void CompilerHLSL::emit_texture_op(const Instruction &i)
 	auto *combined_image = maybe_get<SPIRCombinedImageSampler>(img);
 	auto img_expr = to_expression(combined_image ? combined_image->image : img);
 
+	inherited_expressions.push_back(coord);
+
 	switch (op)
 	{
 	case OpImageSampleDrefImplicitLod:
@@ -2361,6 +2502,9 @@ void CompilerHLSL::emit_texture_op(const Instruction &i)
 		break;
 	}
 
+	if (dref)
+		inherited_expressions.push_back(dref);
+
 	if (proj)
 		coord_components++;
 	if (imgtype.image.arrayed)
@@ -2387,6 +2531,7 @@ void CompilerHLSL::emit_texture_op(const Instruction &i)
 		if (length && (flags & flag))
 		{
 			v = *opt++;
+			inherited_expressions.push_back(v);
 			length--;
 		}
 	};
@@ -2405,7 +2550,7 @@ void CompilerHLSL::emit_texture_op(const Instruction &i)
 
 	if (op == OpImageFetch)
 	{
-		if (options.shader_model < 40)
+		if (hlsl_options.shader_model < 40)
 		{
 			SPIRV_CROSS_THROW("texelFetch is not supported in HLSL shader model 2/3.");
 		}
@@ -2425,7 +2570,7 @@ void CompilerHLSL::emit_texture_op(const Instruction &i)
 			SPIRV_CROSS_THROW("Sampling non-float textures is not supported in HLSL.");
 		}
 
-		if (options.shader_model >= 40)
+		if (hlsl_options.shader_model >= 40)
 		{
 			texop += img_expr;
 
@@ -2446,7 +2591,7 @@ void CompilerHLSL::emit_texture_op(const Instruction &i)
 			else if (gather)
 			{
 				uint32_t comp_num = get<SPIRConstant>(comp).scalar();
-				if (options.shader_model >= 50)
+				if (hlsl_options.shader_model >= 50)
 				{
 					switch (comp_num)
 					{
@@ -2524,7 +2669,7 @@ void CompilerHLSL::emit_texture_op(const Instruction &i)
 
 	expr += texop;
 	expr += "(";
-	if (options.shader_model < 40)
+	if (hlsl_options.shader_model < 40)
 	{
 		if (combined_image)
 			SPIRV_CROSS_THROW("Separate images/samplers are not supported in HLSL shader model 2/3.");
@@ -2572,7 +2717,7 @@ void CompilerHLSL::emit_texture_op(const Instruction &i)
 		coord_expr = "SPIRV_Cross_projectTextureCoordinate(" + coord_expr + ")";
 	}
 
-	if (options.shader_model < 40 && lod)
+	if (hlsl_options.shader_model < 40 && lod)
 	{
 		auto &coordtype = expression_type(coord);
 		string coord_filler;
@@ -2583,7 +2728,7 @@ void CompilerHLSL::emit_texture_op(const Instruction &i)
 		coord_expr = "float4(" + coord_expr + coord_filler + ", " + to_expression(lod) + ")";
 	}
 
-	if (options.shader_model < 40 && bias)
+	if (hlsl_options.shader_model < 40 && bias)
 	{
 		auto &coordtype = expression_type(coord);
 		string coord_filler;
@@ -2597,8 +2742,9 @@ void CompilerHLSL::emit_texture_op(const Instruction &i)
 	if (op == OpImageFetch)
 	{
 		auto &coordtype = expression_type(coord);
-		if (imgtype.image.dim != DimBuffer)
-			coord_expr = join("int", coordtype.vecsize + 1, "(", coord_expr, ", ", to_expression(lod), ")");
+		if (imgtype.image.dim != DimBuffer && !imgtype.image.ms)
+			coord_expr =
+			    join("int", coordtype.vecsize + 1, "(", coord_expr, ", ", lod ? to_expression(lod) : string("0"), ")");
 	}
 	else
 		expr += ", ";
@@ -2621,14 +2767,14 @@ void CompilerHLSL::emit_texture_op(const Instruction &i)
 		expr += to_expression(grad_y);
 	}
 
-	if (!dref && lod && options.shader_model >= 40 && op != OpImageFetch)
+	if (!dref && lod && hlsl_options.shader_model >= 40 && op != OpImageFetch)
 	{
 		forward = forward && should_forward(lod);
 		expr += ", ";
 		expr += to_expression(lod);
 	}
 
-	if (!dref && bias && options.shader_model >= 40)
+	if (!dref && bias && hlsl_options.shader_model >= 40)
 	{
 		forward = forward && should_forward(bias);
 		expr += ", ";
@@ -2671,6 +2817,23 @@ void CompilerHLSL::emit_texture_op(const Instruction &i)
 	{
 		emit_op(result_type, id, expr, forward, false);
 	}
+
+	for (auto &inherit : inherited_expressions)
+		inherit_expression_dependencies(id, inherit);
+
+	switch (op)
+	{
+	case OpImageSampleDrefImplicitLod:
+	case OpImageSampleImplicitLod:
+	case OpImageSampleProjImplicitLod:
+	case OpImageSampleProjDrefImplicitLod:
+	case OpImageQueryLod:
+		register_control_dependent_expression(id);
+		break;
+
+	default:
+		break;
+	}
 }
 
 string CompilerHLSL::to_resource_binding(const SPIRVariable &var)
@@ -2708,8 +2871,8 @@ string CompilerHLSL::to_resource_binding(const SPIRVariable &var)
 		{
 			if (has_decoration(type.self, DecorationBufferBlock))
 			{
-				uint64_t flags = get_buffer_block_flags(var);
-				bool is_readonly = (flags & (1ull << DecorationNonWritable)) != 0;
+				Bitset flags = get_buffer_block_flags(var);
+				bool is_readonly = flags.get(DecorationNonWritable);
 				space = is_readonly ? 't' : 'u'; // UAV
 			}
 			else if (has_decoration(type.self, DecorationBlock))
@@ -2745,7 +2908,7 @@ string CompilerHLSL::to_resource_binding_sampler(const SPIRVariable &var)
 
 string CompilerHLSL::to_resource_register(char space, uint32_t binding, uint32_t space_set)
 {
-	if (options.shader_model >= 51)
+	if (hlsl_options.shader_model >= 51)
 		return join(" : register(", space, binding, ", space", space_set, ")");
 	else
 		return join(" : register(", space, binding, ")");
@@ -2807,7 +2970,7 @@ void CompilerHLSL::emit_legacy_uniform(const SPIRVariable &var)
 void CompilerHLSL::emit_uniform(const SPIRVariable &var)
 {
 	add_resource_name(var.self);
-	if (options.shader_model >= 40)
+	if (hlsl_options.shader_model >= 40)
 		emit_modern_uniform(var);
 	else
 		emit_legacy_uniform(var);
@@ -2839,6 +3002,24 @@ string CompilerHLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &i
 		return "asdouble";
 	else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::UInt64)
 		return "asdouble";
+	else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1)
+	{
+		if (!requires_explicit_fp16_packing)
+		{
+			requires_explicit_fp16_packing = true;
+			force_recompile = true;
+		}
+		return "SPIRV_Cross_unpackFloat2x16";
+	}
+	else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Half && in_type.vecsize == 2)
+	{
+		if (!requires_explicit_fp16_packing)
+		{
+			requires_explicit_fp16_packing = true;
+			force_recompile = true;
+		}
+		return "SPIRV_Cross_packFloat2x16";
+	}
 	else
 		return "";
 }
@@ -2857,6 +3038,14 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 		emit_unary_func_op(result_type, id, args[0], "frac");
 		break;
 
+	case GLSLstd450RoundEven:
+		SPIRV_CROSS_THROW("roundEven is not supported on HLSL.");
+
+	case GLSLstd450Acosh:
+	case GLSLstd450Asinh:
+	case GLSLstd450Atanh:
+		SPIRV_CROSS_THROW("Inverse hyperbolics are not supported on HLSL.");
+
 	case GLSLstd450FMix:
 	case GLSLstd450IMix:
 		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "lerp");
@@ -3467,6 +3656,180 @@ void CompilerHLSL::emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op)
 	register_read(ops[1], ops[2], should_forward(ops[2]));
 }
 
+void CompilerHLSL::emit_subgroup_op(const Instruction &i)
+{
+	if (hlsl_options.shader_model < 60)
+		SPIRV_CROSS_THROW("Wave ops requires SM 6.0 or higher.");
+
+	const uint32_t *ops = stream(i);
+	auto op = static_cast<Op>(i.op);
+
+	uint32_t result_type = ops[0];
+	uint32_t id = ops[1];
+
+	auto scope = static_cast<Scope>(get<SPIRConstant>(ops[2]).scalar());
+	if (scope != ScopeSubgroup)
+		SPIRV_CROSS_THROW("Only subgroup scope is supported.");
+
+	const auto make_inclusive_Sum = [&](const string &expr) -> string {
+		return join(expr, " + ", to_expression(ops[4]));
+	};
+
+	const auto make_inclusive_Product = [&](const string &expr) -> string {
+		return join(expr, " * ", to_expression(ops[4]));
+	};
+
+#define make_inclusive_BitAnd(expr) ""
+#define make_inclusive_BitOr(expr) ""
+#define make_inclusive_BitXor(expr) ""
+#define make_inclusive_Min(expr) ""
+#define make_inclusive_Max(expr) ""
+
+	switch (op)
+	{
+	case OpGroupNonUniformElect:
+		emit_op(result_type, id, "WaveIsFirstLane()", true);
+		break;
+
+	case OpGroupNonUniformBroadcast:
+		emit_binary_func_op(result_type, id, ops[3], ops[4], "WaveReadLaneAt");
+		break;
+
+	case OpGroupNonUniformBroadcastFirst:
+		emit_unary_func_op(result_type, id, ops[3], "WaveReadLaneFirst");
+		break;
+
+	case OpGroupNonUniformBallot:
+		emit_unary_func_op(result_type, id, ops[3], "WaveActiveBallot");
+		break;
+
+	case OpGroupNonUniformInverseBallot:
+		SPIRV_CROSS_THROW("Cannot trivially implement InverseBallot in HLSL.");
+		break;
+
+	case OpGroupNonUniformBallotBitExtract:
+		SPIRV_CROSS_THROW("Cannot trivially implement BallotBitExtract in HLSL.");
+		break;
+
+	case OpGroupNonUniformBallotFindLSB:
+		SPIRV_CROSS_THROW("Cannot trivially implement BallotFindLSB in HLSL.");
+		break;
+
+	case OpGroupNonUniformBallotFindMSB:
+		SPIRV_CROSS_THROW("Cannot trivially implement BallotFindMSB in HLSL.");
+		break;
+
+	case OpGroupNonUniformBallotBitCount:
+	{
+		auto operation = static_cast<GroupOperation>(ops[3]);
+		if (operation == GroupOperationReduce)
+		{
+			bool forward = should_forward(ops[4]);
+			auto left = join("countbits(", to_enclosed_expression(ops[4]), ".x) + countbits(",
+			                 to_enclosed_expression(ops[4]), ".y)");
+			auto right = join("countbits(", to_enclosed_expression(ops[4]), ".z) + countbits(",
+			                  to_enclosed_expression(ops[4]), ".w)");
+			emit_op(result_type, id, join(left, " + ", right), forward);
+			inherit_expression_dependencies(id, ops[4]);
+		}
+		else if (operation == GroupOperationInclusiveScan)
+			SPIRV_CROSS_THROW("Cannot trivially implement BallotBitCount Inclusive Scan in HLSL.");
+		else if (operation == GroupOperationExclusiveScan)
+			SPIRV_CROSS_THROW("Cannot trivially implement BallotBitCount Exclusive Scan in HLSL.");
+		else
+			SPIRV_CROSS_THROW("Invalid BitCount operation.");
+		break;
+	}
+
+	case OpGroupNonUniformShuffle:
+		SPIRV_CROSS_THROW("Cannot trivially implement Shuffle in HLSL.");
+	case OpGroupNonUniformShuffleXor:
+		SPIRV_CROSS_THROW("Cannot trivially implement ShuffleXor in HLSL.");
+	case OpGroupNonUniformShuffleUp:
+		SPIRV_CROSS_THROW("Cannot trivially implement ShuffleUp in HLSL.");
+	case OpGroupNonUniformShuffleDown:
+		SPIRV_CROSS_THROW("Cannot trivially implement ShuffleDown in HLSL.");
+
+	case OpGroupNonUniformAll:
+		emit_unary_func_op(result_type, id, ops[3], "WaveActiveAllTrue");
+		break;
+
+	case OpGroupNonUniformAny:
+		emit_unary_func_op(result_type, id, ops[3], "WaveActiveAnyTrue");
+		break;
+
+	case OpGroupNonUniformAllEqual:
+	{
+		auto &type = get<SPIRType>(result_type);
+		emit_unary_func_op(result_type, id, ops[3],
+		                   type.basetype == SPIRType::Boolean ? "WaveActiveAllEqualBool" : "WaveActiveAllEqual");
+		break;
+	}
+
+	// clang-format off
+#define GROUP_OP(op, hlsl_op, supports_scan) \
+case OpGroupNonUniform##op: \
+	{ \
+		auto operation = static_cast<GroupOperation>(ops[3]); \
+		if (operation == GroupOperationReduce) \
+			emit_unary_func_op(result_type, id, ops[4], "WaveActive" #hlsl_op); \
+		else if (operation == GroupOperationInclusiveScan && supports_scan) \
+        { \
+			bool forward = should_forward(ops[4]); \
+			emit_op(result_type, id, make_inclusive_##hlsl_op (join("WavePrefix" #hlsl_op, "(", to_expression(ops[4]), ")")), forward); \
+			inherit_expression_dependencies(id, ops[4]); \
+        } \
+		else if (operation == GroupOperationExclusiveScan && supports_scan) \
+			emit_unary_func_op(result_type, id, ops[4], "WavePrefix" #hlsl_op); \
+		else if (operation == GroupOperationClusteredReduce) \
+			SPIRV_CROSS_THROW("Cannot trivially implement ClusteredReduce in HLSL."); \
+		else \
+			SPIRV_CROSS_THROW("Invalid group operation."); \
+		break; \
+	}
+	GROUP_OP(FAdd, Sum, true)
+	GROUP_OP(FMul, Product, true)
+	GROUP_OP(FMin, Min, false)
+	GROUP_OP(FMax, Max, false)
+	GROUP_OP(IAdd, Sum, true)
+	GROUP_OP(IMul, Product, true)
+	GROUP_OP(SMin, Min, false)
+	GROUP_OP(SMax, Max, false)
+	GROUP_OP(UMin, Min, false)
+	GROUP_OP(UMax, Max, false)
+	GROUP_OP(BitwiseAnd, BitAnd, false)
+	GROUP_OP(BitwiseOr, BitOr, false)
+	GROUP_OP(BitwiseXor, BitXor, false)
+#undef GROUP_OP
+		// clang-format on
+
+	case OpGroupNonUniformQuadSwap:
+	{
+		uint32_t direction = get<SPIRConstant>(ops[4]).scalar();
+		if (direction == 0)
+			emit_unary_func_op(result_type, id, ops[3], "QuadReadAcrossX");
+		else if (direction == 1)
+			emit_unary_func_op(result_type, id, ops[3], "QuadReadAcrossY");
+		else if (direction == 2)
+			emit_unary_func_op(result_type, id, ops[3], "QuadReadAcrossDiagonal");
+		else
+			SPIRV_CROSS_THROW("Invalid quad swap direction.");
+		break;
+	}
+
+	case OpGroupNonUniformQuadBroadcast:
+	{
+		emit_binary_func_op(result_type, id, ops[3], ops[4], "QuadReadLaneAt");
+		break;
+	}
+
+	default:
+		SPIRV_CROSS_THROW("Invalid opcode for subgroup.");
+	}
+
+	register_control_dependent_expression(id);
+}
+
 void CompilerHLSL::emit_instruction(const Instruction &instruction)
 {
 	auto ops = stream(instruction);
@@ -3552,26 +3915,39 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 
 	case OpDPdx:
 		UFOP(ddx);
+		register_control_dependent_expression(ops[1]);
 		break;
 
 	case OpDPdy:
 		UFOP(ddy);
+		register_control_dependent_expression(ops[1]);
 		break;
 
 	case OpDPdxFine:
 		UFOP(ddx_fine);
+		register_control_dependent_expression(ops[1]);
 		break;
 
 	case OpDPdyFine:
 		UFOP(ddy_fine);
+		register_control_dependent_expression(ops[1]);
 		break;
 
 	case OpDPdxCoarse:
 		UFOP(ddx_coarse);
+		register_control_dependent_expression(ops[1]);
 		break;
 
 	case OpDPdyCoarse:
 		UFOP(ddy_coarse);
+		register_control_dependent_expression(ops[1]);
+		break;
+
+	case OpFwidth:
+	case OpFwidthCoarse:
+	case OpFwidthFine:
+		UFOP(fwidth);
+		register_control_dependent_expression(ops[1]);
 		break;
 
 	case OpLogicalNot:
@@ -3815,7 +4191,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 
 		if (subpass_data)
 		{
-			if (options.shader_model < 40)
+			if (hlsl_options.shader_model < 40)
 				SPIRV_CROSS_THROW("Subpass loads are not supported in HLSL shader model 2/3.");
 
 			// Similar to GLSL, implement subpass loads using texelFetch.
@@ -3856,6 +4232,10 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 		}
 		else
 			emit_op(result_type, id, imgexpr, false);
+
+		inherit_expression_dependencies(id, ops[2]);
+		if (type.image.ms)
+			inherit_expression_dependencies(id, ops[5]);
 		break;
 	}
 
@@ -3926,6 +4306,12 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 			semantics = get<SPIRConstant>(ops[2]).scalar();
 		}
 
+		if (memory == ScopeSubgroup)
+		{
+			// No Wave-barriers in HLSL.
+			break;
+		}
+
 		// We only care about these flags, acquire/release and friends are not relevant to GLSL.
 		semantics = mask_relevant_memory_semantics(semantics);
 
@@ -3974,7 +4360,11 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 		// We are synchronizing some memory or syncing execution,
 		// so we cannot forward any loads beyond the memory barrier.
 		if (semantics || opcode == OpControlBarrier)
+		{
+			assert(current_emitting_block);
+			flush_control_dependent_expressions(current_emitting_block->self);
 			flush_all_active_variables();
+		}
 
 		if (opcode == OpControlBarrier)
 		{
@@ -4116,7 +4506,7 @@ uint32_t CompilerHLSL::remap_num_workgroups_builtin()
 {
 	update_active_builtins();
 
-	if ((active_input_builtins & (1ull << BuiltInNumWorkgroups)) == 0)
+	if (!active_input_builtins.get(BuiltInNumWorkgroups))
 		return 0;
 
 	// Create a new, fake UBO.
@@ -4161,11 +4551,12 @@ uint32_t CompilerHLSL::remap_num_workgroups_builtin()
 string CompilerHLSL::compile()
 {
 	// Do not deal with ES-isms like precision, older extensions and such.
-	CompilerGLSL::options.es = false;
-	CompilerGLSL::options.version = 450;
-	CompilerGLSL::options.vulkan_semantics = true;
+	options.es = false;
+	options.version = 450;
+	options.vulkan_semantics = true;
 	backend.float_literal_suffix = true;
 	backend.double_literal_suffix = false;
+	backend.half_literal_suffix = nullptr;
 	backend.long_long_literal_suffix = true;
 	backend.uint32_t_literal_suffix = true;
 	backend.basic_int_type = "int";
@@ -4187,7 +4578,7 @@ string CompilerHLSL::compile()
 
 	// Subpass input needs SV_Position.
 	if (need_subpass_input)
-		active_input_builtins |= 1ull << BuiltInFragCoord;
+		active_input_builtins.set(BuiltInFragCoord);
 
 	uint32_t pass_count = 0;
 	do
@@ -4203,7 +4594,7 @@ string CompilerHLSL::compile()
 		emit_header();
 		emit_resources();
 
-		emit_function(get<SPIRFunction>(entry_point), 0);
+		emit_function(get<SPIRFunction>(entry_point), Bitset());
 		emit_hlsl_entry_point();
 
 		pass_count++;
diff --git a/spirv_hlsl.hpp b/spirv_hlsl.hpp
index 0de72408e0..df330d0588 100644
--- a/spirv_hlsl.hpp
+++ b/spirv_hlsl.hpp
@@ -66,14 +66,26 @@ public:
 	{
 	}
 
+	SPIRV_CROSS_DEPRECATED("CompilerHLSL::get_options() is obsolete, use get_hlsl_options() instead.")
 	const Options &get_options() const
 	{
-		return options;
+		return hlsl_options;
 	}
 
+	const Options &get_hlsl_options() const
+	{
+		return hlsl_options;
+	}
+
+	SPIRV_CROSS_DEPRECATED("CompilerHLSL::get_options() is obsolete, use set_hlsl_options() instead.")
 	void set_options(Options &opts)
 	{
-		options = opts;
+		hlsl_options = opts;
+	}
+
+	void set_hlsl_options(const Options &opts)
+	{
+		hlsl_options = opts;
 	}
 
 	// Optionally specify a custom root constant layout.
@@ -109,7 +121,7 @@ private:
 	std::string image_type_hlsl(const SPIRType &type);
 	std::string image_type_hlsl_modern(const SPIRType &type);
 	std::string image_type_hlsl_legacy(const SPIRType &type);
-	void emit_function_prototype(SPIRFunction &func, uint64_t return_flags) override;
+	void emit_function_prototype(SPIRFunction &func, const Bitset &return_flags) override;
 	void emit_hlsl_entry_point();
 	void emit_header() override;
 	void emit_resources();
@@ -131,7 +143,7 @@ private:
 	void emit_fixup() override;
 	std::string builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClass storage) override;
 	std::string layout_for_member(const SPIRType &type, uint32_t index) override;
-	std::string to_interpolation_qualifiers(uint64_t flags) override;
+	std::string to_interpolation_qualifiers(const Bitset &flags) override;
 	std::string bitcast_glsl_op(const SPIRType &result_type, const SPIRType &argument_type) override;
 	std::string to_func_call_arg(uint32_t id) override;
 	std::string to_sampler_expression(uint32_t id);
@@ -145,16 +157,19 @@ private:
 	void write_access_chain(const SPIRAccessChain &chain, uint32_t value);
 	void emit_store(const Instruction &instruction);
 	void emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op);
+	void emit_subgroup_op(const Instruction &i) override;
 
 	void emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, const std::string &qualifier,
 	                        uint32_t base_offset = 0) override;
 
 	const char *to_storage_qualifiers_glsl(const SPIRVariable &var) override;
+	void replace_illegal_names() override;
 
-	Options options;
+	Options hlsl_options;
 	bool requires_op_fmod = false;
 	bool requires_textureProj = false;
 	bool requires_fp16_packing = false;
+	bool requires_explicit_fp16_packing = false;
 	bool requires_unorm8_packing = false;
 	bool requires_snorm8_packing = false;
 	bool requires_unorm16_packing = false;
@@ -206,6 +221,6 @@ private:
 	// when translating push constant ranges.
 	std::vector<RootConstants> root_constants_layout;
 };
-}
+} // namespace spirv_cross
 
 #endif
diff --git a/spirv_msl.cpp b/spirv_msl.cpp
index a8910bf1b1..984323b0d8 100644
--- a/spirv_msl.cpp
+++ b/spirv_msl.cpp
@@ -25,7 +25,7 @@ using namespace spv;
 using namespace spirv_cross;
 using namespace std;
 
-static const uint32_t k_unknown_location = ~0;
+static const uint32_t k_unknown_location = ~0u;
 
 CompilerMSL::CompilerMSL(vector<uint32_t> spirv_, vector<MSLVertexAttr> *p_vtx_attrs,
                          vector<MSLResourceBinding> *p_res_bindings)
@@ -104,16 +104,163 @@ void CompilerMSL::build_implicit_builtins()
 	}
 }
 
+static string create_sampler_address(const char *prefix, MSLSamplerAddress addr)
+{
+	switch (addr)
+	{
+	case MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE:
+		return join(prefix, "address::clamp_to_edge");
+	case MSL_SAMPLER_ADDRESS_CLAMP_TO_ZERO:
+		return join(prefix, "address::clamp_to_zero");
+	case MSL_SAMPLER_ADDRESS_CLAMP_TO_BORDER:
+		return join(prefix, "address::clamp_to_border");
+	case MSL_SAMPLER_ADDRESS_REPEAT:
+		return join(prefix, "address::repeat");
+	case MSL_SAMPLER_ADDRESS_MIRRORED_REPEAT:
+		return join(prefix, "address::mirrored_repeat");
+	default:
+		SPIRV_CROSS_THROW("Invalid sampler addressing mode.");
+	}
+}
+
+void CompilerMSL::emit_entry_point_declarations()
+{
+	// FIXME: Get test coverage here ...
+
+	// Emit constexpr samplers here.
+	for (auto &samp : constexpr_samplers)
+	{
+		auto &var = get<SPIRVariable>(samp.first);
+		auto &type = get<SPIRType>(var.basetype);
+		if (type.basetype == SPIRType::Sampler)
+			add_resource_name(samp.first);
+
+		vector<string> args;
+		auto &s = samp.second;
+
+		if (s.coord != MSL_SAMPLER_COORD_NORMALIZED)
+			args.push_back("coord::pixel");
+
+		if (s.min_filter == s.mag_filter)
+		{
+			if (s.min_filter != MSL_SAMPLER_FILTER_NEAREST)
+				args.push_back("filter::linear");
+		}
+		else
+		{
+			if (s.min_filter != MSL_SAMPLER_FILTER_NEAREST)
+				args.push_back("min_filter::linear");
+			if (s.mag_filter != MSL_SAMPLER_FILTER_NEAREST)
+				args.push_back("mag_filter::linear");
+		}
+
+		switch (s.mip_filter)
+		{
+		case MSL_SAMPLER_MIP_FILTER_NONE:
+			// Default
+			break;
+		case MSL_SAMPLER_MIP_FILTER_NEAREST:
+			args.push_back("mip_filter::nearest");
+			break;
+		case MSL_SAMPLER_MIP_FILTER_LINEAR:
+			args.push_back("mip_filter::linear");
+			break;
+		default:
+			SPIRV_CROSS_THROW("Invalid mip filter.");
+		}
+
+		if (s.s_address == s.t_address && s.s_address == s.r_address)
+		{
+			if (s.s_address != MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE)
+				args.push_back(create_sampler_address("", s.s_address));
+		}
+		else
+		{
+			if (s.s_address != MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE)
+				args.push_back(create_sampler_address("s_", s.s_address));
+			if (s.t_address != MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE)
+				args.push_back(create_sampler_address("t_", s.t_address));
+			if (s.r_address != MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE)
+				args.push_back(create_sampler_address("r_", s.r_address));
+		}
+
+		if (s.compare_enable)
+		{
+			switch (s.compare_func)
+			{
+			case MSL_SAMPLER_COMPARE_FUNC_ALWAYS:
+				args.push_back("compare_func::always");
+				break;
+			case MSL_SAMPLER_COMPARE_FUNC_NEVER:
+				args.push_back("compare_func::never");
+				break;
+			case MSL_SAMPLER_COMPARE_FUNC_EQUAL:
+				args.push_back("compare_func::equal");
+				break;
+			case MSL_SAMPLER_COMPARE_FUNC_NOT_EQUAL:
+				args.push_back("compare_func::not_equal");
+				break;
+			case MSL_SAMPLER_COMPARE_FUNC_LESS:
+				args.push_back("compare_func::less");
+				break;
+			case MSL_SAMPLER_COMPARE_FUNC_LESS_EQUAL:
+				args.push_back("compare_func::less_equal");
+				break;
+			case MSL_SAMPLER_COMPARE_FUNC_GREATER:
+				args.push_back("compare_func::greater");
+				break;
+			case MSL_SAMPLER_COMPARE_FUNC_GREATER_EQUAL:
+				args.push_back("compare_func::greater_equal");
+				break;
+			default:
+				SPIRV_CROSS_THROW("Invalid sampler compare function.");
+			}
+		}
+
+		if (s.s_address == MSL_SAMPLER_ADDRESS_CLAMP_TO_BORDER || s.t_address == MSL_SAMPLER_ADDRESS_CLAMP_TO_BORDER ||
+		    s.r_address == MSL_SAMPLER_ADDRESS_CLAMP_TO_BORDER)
+		{
+			switch (s.border_color)
+			{
+			case MSL_SAMPLER_BORDER_COLOR_OPAQUE_BLACK:
+				args.push_back("border_color::opaque_black");
+				break;
+			case MSL_SAMPLER_BORDER_COLOR_OPAQUE_WHITE:
+				args.push_back("border_color::opaque_white");
+				break;
+			case MSL_SAMPLER_BORDER_COLOR_TRANSPARENT_BLACK:
+				args.push_back("border_color::transparent_black");
+				break;
+			default:
+				SPIRV_CROSS_THROW("Invalid sampler border color.");
+			}
+		}
+
+		if (s.anisotropy_enable)
+			args.push_back(join("max_anisotropy(", s.max_anisotropy, ")"));
+		if (s.lod_clamp_enable)
+		{
+			args.push_back(
+			    join("lod_clamp(", convert_to_string(s.lod_clamp_min), ", ", convert_to_string(s.lod_clamp_max), ")"));
+		}
+
+		statement("constexpr sampler ",
+		          type.basetype == SPIRType::SampledImage ? to_sampler_expression(samp.first) : to_name(samp.first),
+		          "(", merge(args), ");");
+	}
+}
+
 string CompilerMSL::compile()
 {
 	// Force a classic "C" locale, reverts when function returns
 	ClassicLocale classic_locale;
 
 	// Do not deal with GLES-isms like precision, older extensions and such.
-	CompilerGLSL::options.vulkan_semantics = true;
-	CompilerGLSL::options.es = false;
-	CompilerGLSL::options.version = 450;
+	options.vulkan_semantics = true;
+	options.es = false;
+	options.version = 450;
 	backend.float_literal_suffix = false;
+	backend.half_literal_suffix = "h";
 	backend.uint32_t_literal_suffix = true;
 	backend.basic_int_type = "int";
 	backend.basic_uint_type = "uint";
@@ -131,7 +278,6 @@ string CompilerMSL::compile()
 
 	replace_illegal_names();
 
-	non_stage_in_input_var_ids.clear();
 	struct_member_padding.clear();
 
 	update_active_builtins();
@@ -145,10 +291,11 @@ string CompilerMSL::compile()
 	// Preprocess OpCodes to extract the need to output additional header content
 	preprocess_op_codes();
 
-	// Create structs to hold input, output and uniform variables
+	// Create structs to hold input, output and uniform variables.
+	// Do output first to ensure out. is declared at top of entry function.
 	qual_pos_var_name = "";
-	stage_in_var_id = add_interface_block(StorageClassInput);
 	stage_out_var_id = add_interface_block(StorageClassOutput);
+	stage_in_var_id = add_interface_block(StorageClassInput);
 	stage_uniforms_var_id = add_interface_block(StorageClassUniformConstant);
 
 	// Convert the use of global variables to recursively-passed function parameters
@@ -160,7 +307,7 @@ string CompilerMSL::compile()
 
 	// Metal does not allow dynamic array lengths.
 	// Resolve any specialization constants that are used for array lengths.
-	if (options.resolve_specialized_array_lengths)
+	if (msl_options.resolve_specialized_array_lengths)
 		resolve_specialized_array_lengths();
 
 	uint32_t pass_count = 0;
@@ -180,7 +327,7 @@ string CompilerMSL::compile()
 		emit_specialization_constants();
 		emit_resources();
 		emit_custom_functions();
-		emit_function(get<SPIRFunction>(entry_point), 0);
+		emit_function(get<SPIRFunction>(entry_point), Bitset());
 
 		pass_count++;
 	} while (force_recompile);
@@ -210,15 +357,13 @@ string CompilerMSL::compile(vector<MSLVertexAttr> *p_vtx_attrs, vector<MSLResour
 string CompilerMSL::compile(MSLConfiguration &msl_cfg, vector<MSLVertexAttr> *p_vtx_attrs,
                             vector<MSLResourceBinding> *p_res_bindings)
 {
-	options = msl_cfg;
+	msl_options = msl_cfg;
 	return compile(p_vtx_attrs, p_res_bindings);
 }
 
 // Register the need to output any custom functions.
 void CompilerMSL::preprocess_op_codes()
 {
-	spv_function_implementations.clear();
-
 	OpCodePreprocessor preproc(*this);
 	traverse_all_reachable_opcodes(get<SPIRFunction>(entry_point), preproc);
 
@@ -244,7 +389,6 @@ void CompilerMSL::localize_global_variables()
 		auto &var = get<SPIRVariable>(v_id);
 		if (var.storage == StorageClassPrivate || var.storage == StorageClassWorkgroup)
 		{
-			var.storage = StorageClassFunction;
 			entry_func.add_local_variable(v_id);
 			iter = global_variables.erase(iter);
 		}
@@ -272,7 +416,6 @@ void CompilerMSL::resolve_specialized_array_lengths()
 // extract that variable and add it as an argument to that function.
 void CompilerMSL::extract_global_variables_from_functions()
 {
-
 	// Uniforms
 	unordered_set<uint32_t> global_var_ids;
 	for (auto &id : ids)
@@ -280,19 +423,20 @@ void CompilerMSL::extract_global_variables_from_functions()
 		if (id.get_type() == TypeVariable)
 		{
 			auto &var = id.get<SPIRVariable>();
-			if (var.storage == StorageClassInput || var.storage == StorageClassUniform ||
-			    var.storage == StorageClassUniformConstant || var.storage == StorageClassPushConstant ||
-			    var.storage == StorageClassStorageBuffer)
+			if (var.storage == StorageClassInput || var.storage == StorageClassOutput ||
+			    var.storage == StorageClassUniform || var.storage == StorageClassUniformConstant ||
+			    var.storage == StorageClassPushConstant || var.storage == StorageClassStorageBuffer)
 			{
 				global_var_ids.insert(var.self);
 			}
 		}
 	}
 
-	// Local vars that are declared in the main function and accessed directy by a function
+	// Local vars that are declared in the main function and accessed directly by a function
 	auto &entry_func = get<SPIRFunction>(entry_point);
 	for (auto &var : entry_func.local_variables)
-		global_var_ids.insert(var);
+		if (get<SPIRVariable>(var).storage != StorageClassFunction)
+			global_var_ids.insert(var);
 
 	std::set<uint32_t> added_arg_ids;
 	unordered_set<uint32_t> processed_func_ids;
@@ -347,6 +491,7 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::
 
 				break;
 			}
+
 			case OpFunctionCall:
 			{
 				// First see if any of the function call args are globals
@@ -366,9 +511,21 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::
 				break;
 			}
 
+			case OpStore:
+			{
+				uint32_t base_id = ops[0];
+				if (global_var_ids.find(base_id) != global_var_ids.end())
+					added_arg_ids.insert(base_id);
+				break;
+			}
+
 			default:
 				break;
 			}
+
+			// TODO: Add all other operations which can affect memory.
+			// We should consider a more unified system here to reduce boiler-plate.
+			// This kind of analysis is done in several places ...
 		}
 	}
 
@@ -438,6 +595,11 @@ void CompilerMSL::mark_as_packable(SPIRType &type)
 			uint32_t mbr_type_id = type.member_types[mbr_idx];
 			auto &mbr_type = get<SPIRType>(mbr_type_id);
 			mark_as_packable(mbr_type);
+			if (mbr_type.type_alias)
+			{
+				auto &mbr_type_alias = get<SPIRType>(mbr_type.type_alias);
+				mark_as_packable(mbr_type_alias);
+			}
 		}
 	}
 }
@@ -502,9 +664,9 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage)
 	{
 		ib_var_ref = stage_out_var_name;
 
-		// Add the output interface struct as a local variable to the entry function,
-		// and force the entry function to return the output interface struct from
-		// any blocks that perform a function return.
+		// Add the output interface struct as a local variable to the entry function, force
+		// the entry function to return the output interface struct from any blocks that perform
+		// a function return, and indicate the output var requires early initialization
 		auto &entry_func = get<SPIRFunction>(entry_point);
 		entry_func.add_local_variable(ib_var_id);
 		for (auto &blk_id : entry_func.blocks)
@@ -513,6 +675,7 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage)
 			if (blk.terminator == SPIRBlock::Return)
 				blk.return_value = ib_var_id;
 		}
+		vars_needing_early_declaration.push_back(ib_var_id);
 		break;
 	}
 
@@ -527,7 +690,7 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage)
 		break;
 	}
 
-	set_name(ib_type_id, get_entry_point_name() + "_" + ib_var_ref);
+	set_name(ib_type_id, to_name(entry_point) + "_" + ib_var_ref);
 	set_name(ib_var_id, ib_var_ref);
 
 	for (auto p_var : vars)
@@ -543,10 +706,7 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage)
 				BuiltIn builtin;
 				bool is_builtin = is_member_builtin(type, mbr_idx, &builtin);
 
-				if (should_move_to_input_buffer(mbr_type_id, is_builtin, storage))
-					move_member_to_input_buffer(type, mbr_idx);
-
-				else if (!is_builtin || has_active_builtin(builtin, storage))
+				if (!is_builtin || has_active_builtin(builtin, storage))
 				{
 					// Add a reference to the member to the interface struct.
 					uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size());
@@ -592,203 +752,135 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage)
 		else if (type.basetype == SPIRType::Boolean || type.basetype == SPIRType::Char ||
 		         type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt ||
 		         type.basetype == SPIRType::Int64 || type.basetype == SPIRType::UInt64 ||
-		         type.basetype == SPIRType::Float || type.basetype == SPIRType::Double ||
-		         type.basetype == SPIRType::Boolean)
+		         type_is_floating_point(type) || type.basetype == SPIRType::Boolean)
 		{
 			bool is_builtin = is_builtin_variable(*p_var);
 			BuiltIn builtin = BuiltIn(get_decoration(p_var->self, DecorationBuiltIn));
 
-			if (should_move_to_input_buffer(type_id, is_builtin, storage))
-				move_to_input_buffer(*p_var);
-
-			else if (!is_builtin || has_active_builtin(builtin, storage))
+			if (!is_builtin || has_active_builtin(builtin, storage))
 			{
-				// Add a reference to the variable type to the interface struct.
-				uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size());
-				type_id = ensure_correct_builtin_type(type_id, builtin);
-				p_var->basetype = type_id;
-				ib_type.member_types.push_back(type_id);
-
-				// Give the member a name
-				string mbr_name = ensure_valid_name(to_expression(p_var->self), "m");
-				set_member_name(ib_type_id, ib_mbr_idx, mbr_name);
-
-				// Update the original variable reference to include the structure reference
-				string qual_var_name = ib_var_ref + "." + mbr_name;
-				meta[p_var->self].decoration.qualified_alias = qual_var_name;
-
-				// Copy the variable location from the original variable to the member
-				if (get_decoration_mask(p_var->self) & (1ull << DecorationLocation))
+				// MSL does not allow matrices or arrays in input or output variables, so need to handle it specially.
+				if (!is_builtin && (storage == StorageClassInput || storage == StorageClassOutput) &&
+				    (is_matrix(type) || is_array(type)))
 				{
-					uint32_t locn = get_decoration(p_var->self, DecorationLocation);
-					set_member_decoration(ib_type_id, ib_mbr_idx, DecorationLocation, locn);
-					mark_location_as_used_by_shader(locn, storage);
+					uint32_t elem_cnt = 0;
+
+					if (is_matrix(type))
+					{
+						if (is_array(type))
+							SPIRV_CROSS_THROW("MSL cannot emit arrays-of-matrices in input and output variables.");
+
+						elem_cnt = type.columns;
+					}
+					else if (is_array(type))
+					{
+						if (type.array.size() != 1)
+							SPIRV_CROSS_THROW("MSL cannot emit arrays-of-arrays in input and output variables.");
+
+						elem_cnt = type.array_size_literal.back() ? type.array.back() :
+						                                            get<SPIRConstant>(type.array.back()).scalar();
+					}
+
+					auto *usable_type = &type;
+					while (is_array(*usable_type) || is_matrix(*usable_type))
+						usable_type = &get<SPIRType>(usable_type->parent_type);
+
+					auto &entry_func = get<SPIRFunction>(entry_point);
+					entry_func.add_local_variable(p_var->self);
+
+					// We need to declare the variable early and at entry-point scope.
+					vars_needing_early_declaration.push_back(p_var->self);
+
+					for (uint32_t i = 0; i < elem_cnt; i++)
+					{
+						// Add a reference to the variable type to the interface struct.
+						uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size());
+						ib_type.member_types.push_back(usable_type->self);
+
+						// Give the member a name
+						string mbr_name = ensure_valid_name(join(to_expression(p_var->self), "_", i), "m");
+						set_member_name(ib_type_id, ib_mbr_idx, mbr_name);
+
+						// There is no qualified alias since we need to flatten the internal array on return.
+						if (get_decoration_bitset(p_var->self).get(DecorationLocation))
+						{
+							uint32_t locn = get_decoration(p_var->self, DecorationLocation) + i;
+							set_member_decoration(ib_type_id, ib_mbr_idx, DecorationLocation, locn);
+							mark_location_as_used_by_shader(locn, storage);
+						}
+
+						if (get_decoration_bitset(p_var->self).get(DecorationIndex))
+						{
+							uint32_t index = get_decoration(p_var->self, DecorationIndex);
+							set_member_decoration(ib_type_id, ib_mbr_idx, DecorationIndex, index);
+						}
+
+						switch (storage)
+						{
+						case StorageClassInput:
+							entry_func.fixup_statements_in.push_back(
+							    join(to_name(p_var->self), "[", i, "] = ", ib_var_ref, ".", mbr_name, ";"));
+							break;
+
+						case StorageClassOutput:
+							entry_func.fixup_statements_out.push_back(
+							    join(ib_var_ref, ".", mbr_name, " = ", to_name(p_var->self), "[", i, "];"));
+							break;
+
+						default:
+							break;
+						}
+					}
 				}
-
-				// Mark the member as builtin if needed
-				if (is_builtin)
+				else
 				{
-					set_member_decoration(ib_type_id, ib_mbr_idx, DecorationBuiltIn, builtin);
-					if (builtin == BuiltInPosition)
-						qual_pos_var_name = qual_var_name;
+					// Add a reference to the variable type to the interface struct.
+					uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size());
+					type_id = ensure_correct_builtin_type(type_id, builtin);
+					p_var->basetype = type_id;
+					ib_type.member_types.push_back(type_id);
+
+					// Give the member a name
+					string mbr_name = ensure_valid_name(to_expression(p_var->self), "m");
+					set_member_name(ib_type_id, ib_mbr_idx, mbr_name);
+
+					// Update the original variable reference to include the structure reference
+					string qual_var_name = ib_var_ref + "." + mbr_name;
+					meta[p_var->self].decoration.qualified_alias = qual_var_name;
+
+					// Copy the variable location from the original variable to the member
+					if (get_decoration_bitset(p_var->self).get(DecorationLocation))
+					{
+						uint32_t locn = get_decoration(p_var->self, DecorationLocation);
+						set_member_decoration(ib_type_id, ib_mbr_idx, DecorationLocation, locn);
+						mark_location_as_used_by_shader(locn, storage);
+					}
+
+					if (get_decoration_bitset(p_var->self).get(DecorationIndex))
+					{
+						uint32_t index = get_decoration(p_var->self, DecorationIndex);
+						set_member_decoration(ib_type_id, ib_mbr_idx, DecorationIndex, index);
+					}
+
+					// Mark the member as builtin if needed
+					if (is_builtin)
+					{
+						set_member_decoration(ib_type_id, ib_mbr_idx, DecorationBuiltIn, builtin);
+						if (builtin == BuiltInPosition)
+							qual_pos_var_name = qual_var_name;
+					}
 				}
 			}
 		}
 	}
 
 	// Sort the members of the structure by their locations.
-	// Oddly, Metal handles inputs better if they are sorted in reverse order.
-	MemberSorter::SortAspect sort_aspect =
-	    (storage == StorageClassInput) ? MemberSorter::LocationReverse : MemberSorter::Location;
-	MemberSorter member_sorter(ib_type, meta[ib_type_id], sort_aspect);
+	MemberSorter member_sorter(ib_type, meta[ib_type_id], MemberSorter::Location);
 	member_sorter.sort();
 
 	return ib_var_id;
 }
 
-// Returns whether a variable of type and storage class should be moved from an interface
-// block to a secondary input buffer block.
-// This is the case for matrixes and arrays that appear in the stage_in interface block
-// of a vertex function, and true is returned.
-// Other types do not need to move, and false is returned.
-// Matrices and arrays are not permitted in the output of a vertex function or the input
-// or output of a fragment function, and in those cases, an exception is thrown.
-bool CompilerMSL::should_move_to_input_buffer(uint32_t type_id, bool is_builtin, StorageClass storage)
-{
-	auto &type = get<SPIRType>(type_id);
-
-	if ((is_matrix(type) || is_array(type)) && !is_builtin)
-	{
-		auto &execution = get_entry_point();
-
-		if (execution.model == ExecutionModelVertex)
-		{
-			if (storage == StorageClassInput)
-				return true;
-
-			if (storage == StorageClassOutput)
-				SPIRV_CROSS_THROW("The vertex function output structure may not include a matrix or array.");
-		}
-		else if (execution.model == ExecutionModelFragment)
-		{
-			if (storage == StorageClassInput)
-				SPIRV_CROSS_THROW("The fragment function stage_in structure may not include a matrix or array.");
-
-			if (storage == StorageClassOutput)
-				SPIRV_CROSS_THROW("The fragment function output structure may not include a matrix or array.");
-		}
-	}
-
-	return false;
-}
-
-// Excludes the specified variable from an interface block structure.
-// Instead, for the variable is added to a block variable corresponding to a secondary MSL buffer.
-// The use case for this is when a vertex stage_in variable contains a matrix or array.
-void CompilerMSL::move_to_input_buffer(SPIRVariable &var)
-{
-	uint32_t var_id = var.self;
-
-	if (!has_decoration(var_id, DecorationLocation))
-		return;
-
-	uint32_t mbr_type_id = var.basetype;
-	string mbr_name = ensure_valid_name(to_expression(var_id), "m");
-	uint32_t mbr_locn = get_decoration(var_id, DecorationLocation);
-	meta[var_id].decoration.qualified_alias = add_input_buffer_block_member(mbr_type_id, mbr_name, mbr_locn);
-}
-
-// Excludes the specified type member from the stage_in block structure.
-// Instead, for the variable is added to a block variable corresponding to a secondary MSL buffer.
-// The use case for this is when a vertex stage_in variable contains a matrix or array.
-void CompilerMSL::move_member_to_input_buffer(const SPIRType &type, uint32_t index)
-{
-	uint32_t type_id = type.self;
-
-	if (!has_member_decoration(type_id, index, DecorationLocation))
-		return;
-
-	uint32_t mbr_type_id = type.member_types[index];
-	string mbr_name = ensure_valid_name(to_qualified_member_name(type, index), "m");
-	uint32_t mbr_locn = get_member_decoration(type_id, index, DecorationLocation);
-	string qual_name = add_input_buffer_block_member(mbr_type_id, mbr_name, mbr_locn);
-	set_member_qualified_name(type_id, index, qual_name);
-}
-
-// Adds a member to the input buffer block that corresponds to the MTLBuffer used by an attribute location
-string CompilerMSL::add_input_buffer_block_member(uint32_t mbr_type_id, string mbr_name, uint32_t mbr_locn)
-{
-	mark_location_as_used_by_shader(mbr_locn, StorageClassInput);
-
-	MSLVertexAttr *p_va = vtx_attrs_by_location[mbr_locn];
-	if (!p_va)
-		return "";
-
-	if (p_va->per_instance)
-		needs_instance_idx_arg = true;
-	else
-		needs_vertex_idx_arg = true;
-
-	// The variable that is the block struct.
-	// Record the stride of this struct in its offset decoration.
-	uint32_t ib_var_id = get_input_buffer_block_var_id(p_va->msl_buffer);
-	auto &ib_var = get<SPIRVariable>(ib_var_id);
-	uint32_t ib_type_id = ib_var.basetype;
-	auto &ib_type = get<SPIRType>(ib_type_id);
-	set_decoration(ib_type_id, DecorationOffset, p_va->msl_stride);
-
-	// Add a reference to the variable type to the interface struct.
-	uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size());
-	ib_type.member_types.push_back(mbr_type_id);
-
-	// Give the member a name
-	set_member_name(ib_type_id, ib_mbr_idx, mbr_name);
-
-	// Set MSL buffer and offset decorations, and indicate no valid attribute location
-	set_member_decoration(ib_type_id, ib_mbr_idx, DecorationBinding, p_va->msl_buffer);
-	set_member_decoration(ib_type_id, ib_mbr_idx, DecorationOffset, p_va->msl_offset);
-	set_member_decoration(ib_type_id, ib_mbr_idx, DecorationLocation, k_unknown_location);
-
-	// Update the original variable reference to include the structure and index reference
-	string idx_var_name =
-	    builtin_to_glsl(p_va->per_instance ? BuiltInInstanceIndex : BuiltInVertexIndex, StorageClassInput);
-	return get_name(ib_var_id) + "[" + idx_var_name + "]." + mbr_name;
-}
-
-// Returns the ID of the input block that will use the specified MSL buffer index,
-// lazily creating an input block variable and type if needed.
-//
-// The use of this block applies only to input variables that have been excluded from the stage_in
-// block, which typically only occurs if an attempt to pass a matrix in the stage_in block.
-uint32_t CompilerMSL::get_input_buffer_block_var_id(uint32_t msl_buffer)
-{
-	uint32_t ib_var_id = non_stage_in_input_var_ids[msl_buffer];
-	if (!ib_var_id)
-	{
-		// No interface block exists yet. Create a new typed variable for this interface block.
-		// The initializer expression is allocated here, but populated when the function
-		// declaraion is emitted, because it is cleared after each compilation pass.
-		uint32_t next_id = increase_bound_by(3);
-		uint32_t ib_type_id = next_id++;
-		auto &ib_type = set<SPIRType>(ib_type_id);
-		ib_type.basetype = SPIRType::Struct;
-		ib_type.storage = StorageClassInput;
-		set_decoration(ib_type_id, DecorationBlock);
-
-		ib_var_id = next_id++;
-		auto &var = set<SPIRVariable>(ib_var_id, ib_type_id, StorageClassInput, 0);
-		var.initializer = next_id++;
-
-		string ib_var_name = stage_in_var_name + convert_to_string(msl_buffer);
-		set_name(ib_var_id, ib_var_name);
-		set_name(ib_type_id, get_entry_point_name() + "_" + ib_var_name);
-
-		// Add the variable to the map of buffer blocks, accessed by the Metal buffer index.
-		non_stage_in_input_var_ids[msl_buffer] = ib_var_id;
-	}
-	return ib_var_id;
-}
-
 // Ensure that the type is compatible with the builtin.
 // If it is, simply return the given type ID.
 // Otherwise, create a new type, and return it's ID.
@@ -991,7 +1083,7 @@ void CompilerMSL::emit_custom_functions()
 			statement("template<typename T>");
 			statement("T radians(T d)");
 			begin_scope();
-			statement("return d * 0.01745329251;");
+			statement("return d * T(0.01745329251);");
 			end_scope();
 			statement("");
 			break;
@@ -1001,7 +1093,7 @@ void CompilerMSL::emit_custom_functions()
 			statement("template<typename T>");
 			statement("T degrees(T r)");
 			begin_scope();
-			statement("return r * 57.2957795131;");
+			statement("return r * T(57.2957795131);");
 			end_scope();
 			statement("");
 			break;
@@ -1342,46 +1434,57 @@ void CompilerMSL::emit_resources()
 	declare_undefined_values();
 
 	// Output interface structs.
-	emit_interface_block(stage_in_var_id);
-	for (auto &nsi_var : non_stage_in_input_var_ids)
-		emit_interface_block(nsi_var.second);
-
 	emit_interface_block(stage_out_var_id);
+	emit_interface_block(stage_in_var_id);
 	emit_interface_block(stage_uniforms_var_id);
 }
 
 // Emit declarations for the specialization Metal function constants
 void CompilerMSL::emit_specialization_constants()
 {
-	const vector<SpecializationConstant> spec_consts = get_specialization_constants();
-
 	SpecializationConstant wg_x, wg_y, wg_z;
 	uint32_t workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
+	bool emitted = false;
 
-	for (auto &sc : spec_consts)
+	for (auto &id : ids)
 	{
-		// If WorkGroupSize is a specialization constant, it will be declared explicitly below.
-		if (sc.id == workgroup_size_id)
-			continue;
-
-		auto &type = expression_type(sc.id);
-		string sc_type_name = type_to_glsl(type);
-		string sc_name = to_name(sc.id);
-		string sc_tmp_name = to_name(sc.id) + "_tmp";
-
-		if (type.vecsize == 1 && type.columns == 1 && type.basetype != SPIRType::Struct && type.array.empty())
+		if (id.get_type() == TypeConstant)
 		{
-			// Only scalar, non-composite values can be function constants.
-			statement("constant ", sc_type_name, " ", sc_tmp_name, " [[function_constant(",
-			          convert_to_string(sc.constant_id), ")]];");
-			statement("constant ", sc_type_name, " ", sc_name, " = is_function_constant_defined(", sc_tmp_name, ") ? ",
-			          sc_tmp_name, " : ", constant_expression(get<SPIRConstant>(sc.id)), ";");
+			auto &c = id.get<SPIRConstant>();
+			if (!c.specialization)
+				continue;
+
+			// If WorkGroupSize is a specialization constant, it will be declared explicitly below.
+			if (c.self == workgroup_size_id)
+				continue;
+
+			auto &type = get<SPIRType>(c.constant_type);
+			string sc_type_name = type_to_glsl(type);
+			string sc_name = to_name(c.self);
+			string sc_tmp_name = sc_name + "_tmp";
+
+			if (has_decoration(c.self, DecorationSpecId))
+			{
+				uint32_t constant_id = get_decoration(c.self, DecorationSpecId);
+				// Only scalar, non-composite values can be function constants.
+				statement("constant ", sc_type_name, " ", sc_tmp_name, " [[function_constant(", constant_id, ")]];");
+				statement("constant ", sc_type_name, " ", sc_name, " = is_function_constant_defined(", sc_tmp_name,
+				          ") ? ", sc_tmp_name, " : ", constant_expression(c), ";");
+			}
+			else
+			{
+				// Composite specialization constants must be built from other specialization constants.
+				statement("constant ", sc_type_name, " ", sc_name, " = ", constant_expression(c), ";");
+			}
+			emitted = true;
 		}
-		else
+		else if (id.get_type() == TypeConstantOp)
 		{
-			// Composite specialization constants must be built from other specialization constants.
-			statement("constant ", sc_type_name, " ", sc_name, " = ", constant_expression(get<SPIRConstant>(sc.id)),
-			          ";");
+			auto &c = id.get<SPIRConstantOp>();
+			auto &type = get<SPIRType>(c.basetype);
+			auto name = to_name(c.self);
+			statement("constant ", variable_decl(type, name), " = ", constant_op_expression(c), ";");
+			emitted = true;
 		}
 	}
 
@@ -1389,10 +1492,13 @@ void CompilerMSL::emit_specialization_constants()
 	// the work group size at compile time in SPIR-V, and [[threads_per_threadgroup]] would need to be passed around as a global.
 	// The work group size may be a specialization constant.
 	if (workgroup_size_id)
+	{
 		statement("constant uint3 ", builtin_to_glsl(BuiltInWorkgroupSize, StorageClassWorkgroup), " = ",
 		          constant_expression(get<SPIRConstant>(workgroup_size_id)), ";");
+		emitted = true;
+	}
 
-	if (!spec_consts.empty() || workgroup_size_id)
+	if (emitted)
 		statement("");
 }
 
@@ -1459,12 +1565,21 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 	case OpDPdxFine:
 	case OpDPdxCoarse:
 		UFOP(dfdx);
+		register_control_dependent_expression(ops[1]);
 		break;
 
 	case OpDPdy:
 	case OpDPdyFine:
 	case OpDPdyCoarse:
 		UFOP(dfdy);
+		register_control_dependent_expression(ops[1]);
+		break;
+
+	case OpFwidth:
+	case OpFwidthCoarse:
+	case OpFwidthFine:
+		UFOP(fwidth);
+		register_control_dependent_expression(ops[1]);
 		break;
 
 	// Bitfield
@@ -1502,7 +1617,6 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 	}
 
 	case OpAtomicCompareExchange:
-	case OpAtomicCompareExchangeWeak:
 	{
 		uint32_t result_type = ops[0];
 		uint32_t id = ops[1];
@@ -1516,6 +1630,9 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 		break;
 	}
 
+	case OpAtomicCompareExchangeWeak:
+		SPIRV_CROSS_THROW("OpAtomicCompareExchangeWeak is only supported in kernel profile.");
+
 	case OpAtomicLoad:
 	{
 		uint32_t result_type = ops[0];
@@ -1796,7 +1913,16 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 		if (e && e->need_transpose && (t.columns == t.vecsize || is_packed))
 		{
 			e->need_transpose = false;
+
+			// This is important for matrices. Packed matrices
+			// are generally transposed, so unpacking using a constructor argument
+			// will result in an error.
+			// The simplest solution for now is to just avoid unpacking the matrix in this operation.
+			unset_decoration(mtx_id, DecorationCPacked);
+
 			emit_binary_op(ops[0], ops[1], ops[3], ops[2], "*");
+			if (is_packed)
+				set_decoration(mtx_id, DecorationCPacked);
 			e->need_transpose = true;
 		}
 		else
@@ -1833,7 +1959,7 @@ void CompilerMSL::emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uin
 	else
 		bar_stmt += "mem_none";
 
-	if (options.is_ios() && options.supports_msl_version(2))
+	if (msl_options.is_ios() && msl_options.supports_msl_version(2))
 	{
 		bar_stmt += ", ";
 
@@ -1863,6 +1989,10 @@ void CompilerMSL::emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uin
 	bar_stmt += ");";
 
 	statement(bar_stmt);
+
+	assert(current_emitting_block);
+	flush_control_dependent_expressions(current_emitting_block->self);
+	flush_all_active_variables();
 }
 
 // Since MSL does not allow structs to be nested within the stage_in struct, the original input
@@ -1971,52 +2101,62 @@ void CompilerMSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id,
 {
 	forced_temporaries.insert(result_id);
 
-	bool fwd_obj = should_forward(obj);
-	bool fwd_op1 = op1 ? should_forward(op1) : true;
-	bool fwd_op2 = op2 ? should_forward(op2) : true;
-
-	bool forward = fwd_obj && fwd_op1 && fwd_op2;
-
 	string exp = string(op) + "(";
 
 	auto &type = expression_type(obj);
 	exp += "(volatile ";
-	exp += "device";
+	auto *var = maybe_get_backing_variable(obj);
+	if (!var)
+		SPIRV_CROSS_THROW("No backing variable for atomic operation.");
+	exp += get_argument_address_space(*var);
 	exp += " atomic_";
 	exp += type_to_glsl(type);
 	exp += "*)";
 
-	exp += "&(";
-	exp += to_expression(obj);
-	exp += ")";
+	exp += "&";
+	exp += to_enclosed_expression(obj);
 
-	if (op1)
+	bool is_atomic_compare_exchange_strong = op1_is_pointer && op1;
+
+	if (is_atomic_compare_exchange_strong)
 	{
-		if (op1_is_pointer)
-		{
-			statement(declare_temporary(expression_type(op2).self, op1), to_expression(op1), ";");
-			exp += ", &(" + to_name(op1) + ")";
-		}
-		else
-			exp += ", " + to_expression(op1);
+		assert(strcmp(op, "atomic_compare_exchange_weak_explicit") == 0);
+		assert(op2);
+		assert(has_mem_order_2);
+		exp += ", &";
+		exp += to_name(result_id);
+		exp += ", ";
+		exp += to_expression(op2);
+		exp += ", ";
+		exp += get_memory_order(mem_order_1);
+		exp += ", ";
+		exp += get_memory_order(mem_order_2);
+		exp += ")";
+
+		// MSL only supports the weak atomic compare exchange,
+		// so emit a CAS loop here.
+		statement(variable_decl(type, to_name(result_id)), ";");
+		statement("do");
+		begin_scope();
+		statement(to_name(result_id), " = ", to_expression(op1), ";");
+		end_scope_decl(join("while (!", exp, ")"));
+		set<SPIRExpression>(result_id, to_name(result_id), result_type, true);
 	}
+	else
+	{
+		assert(strcmp(op, "atomic_compare_exchange_weak_explicit") != 0);
+		if (op1)
+			exp += ", " + to_expression(op1);
+		if (op2)
+			exp += ", " + to_expression(op2);
 
-	if (op2)
-		exp += ", " + to_expression(op2);
+		exp += string(", ") + get_memory_order(mem_order_1);
+		if (has_mem_order_2)
+			exp += string(", ") + get_memory_order(mem_order_2);
 
-	exp += string(", ") + get_memory_order(mem_order_1);
-
-	if (has_mem_order_2)
-		exp += string(", ") + get_memory_order(mem_order_2);
-
-	exp += ")";
-	emit_op(result_type, result_id, exp, forward);
-
-	inherit_expression_dependencies(result_id, obj);
-	if (op1)
-		inherit_expression_dependencies(result_id, op1);
-	if (op2)
-		inherit_expression_dependencies(result_id, op2);
+		exp += ")";
+		emit_op(result_type, result_id, exp, false);
+	}
 
 	flush_all_atomic_capable_variables();
 }
@@ -2063,9 +2203,14 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 	case GLSLstd450PackUnorm2x16:
 		emit_unary_func_op(result_type, id, args[0], "pack_float_to_unorm2x16");
 		break;
+
 	case GLSLstd450PackHalf2x16:
-		emit_unary_func_op(result_type, id, args[0], "unsupported_GLSLstd450PackHalf2x16"); // Currently unsupported
+	{
+		auto expr = join("as_type<uint>(half2(", to_expression(args[0]), "))");
+		emit_op(result_type, id, expr, should_forward(args[0]));
+		inherit_expression_dependencies(id, args[0]);
 		break;
+	}
 
 	case GLSLstd450UnpackSnorm4x8:
 		emit_unary_func_op(result_type, id, args[0], "unpack_snorm4x8_to_float");
@@ -2079,9 +2224,14 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 	case GLSLstd450UnpackUnorm2x16:
 		emit_unary_func_op(result_type, id, args[0], "unpack_unorm2x16_to_float");
 		break;
+
 	case GLSLstd450UnpackHalf2x16:
-		emit_unary_func_op(result_type, id, args[0], "unsupported_GLSLstd450UnpackHalf2x16"); // Currently unsupported
+	{
+		auto expr = join("float2(as_type<half2>(", to_expression(args[0]), "))");
+		emit_op(result_type, id, expr, should_forward(args[0]));
+		inherit_expression_dependencies(id, args[0]);
 		break;
+	}
 
 	case GLSLstd450PackDouble2x32:
 		emit_unary_func_op(result_type, id, args[0], "unsupported_GLSLstd450PackDouble2x32"); // Currently unsupported
@@ -2136,7 +2286,7 @@ void CompilerMSL::emit_interface_block(uint32_t ib_var_id)
 
 // Emits the declaration signature of the specified function.
 // If this is the entry point function, Metal-specific return value and function arguments are added.
-void CompilerMSL::emit_function_prototype(SPIRFunction &func, uint64_t)
+void CompilerMSL::emit_function_prototype(SPIRFunction &func, const Bitset &)
 {
 	if (func.self != entry_point)
 		add_function_overload(func);
@@ -2177,14 +2327,17 @@ void CompilerMSL::emit_function_prototype(SPIRFunction &func, uint64_t)
 	{
 		decl += entry_point_args(!func.arguments.empty());
 
-		// If entry point function has a output interface struct, set its initializer.
-		// This is done at this late stage because the initialization expression is
-		// cleared after each compilation pass.
-		if (stage_out_var_id)
+		// If entry point function has variables that require early declaration,
+		// ensure they each have an empty initializer, creating one if needed.
+		// This is done at this late stage because the initialization expression
+		// is cleared after each compilation pass.
+		for (auto var_id : vars_needing_early_declaration)
 		{
-			auto &so_var = get<SPIRVariable>(stage_out_var_id);
-			auto &so_type = get<SPIRType>(so_var.basetype);
-			set<SPIRExpression>(so_var.initializer, "{}", so_type.self, true);
+			auto &ed_var = get<SPIRVariable>(var_id);
+			if (!ed_var.initializer)
+				ed_var.initializer = increase_bound_by(1);
+
+			set<SPIRExpression>(ed_var.initializer, "{}", ed_var.basetype, true);
 		}
 	}
 
@@ -2207,7 +2360,7 @@ void CompilerMSL::emit_function_prototype(SPIRFunction &func, uint64_t)
 		// Manufacture automatic sampler arg for SampledImage texture
 		auto &arg_type = get<SPIRType>(arg.type);
 		if (arg_type.basetype == SPIRType::SampledImage && arg_type.image.dim != DimBuffer)
-			decl += ", thread const sampler& " + to_sampler_expression(arg.id);
+			decl += join(", thread const ", sampler_type(arg_type), " ", to_sampler_expression(arg.id));
 
 		if (&arg != &func.arguments.back())
 			decl += ", ";
@@ -2252,7 +2405,7 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
 	bool forward = should_forward(coord);
 	auto coord_expr = to_enclosed_expression(coord);
 	auto &coord_type = expression_type(coord);
-	bool coord_is_fp = (coord_type.basetype == SPIRType::Float) || (coord_type.basetype == SPIRType::Double);
+	bool coord_is_fp = type_is_floating_point(coord_type);
 	bool is_cube_fetch = false;
 
 	string tex_coords = coord_expr;
@@ -2343,11 +2496,23 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
 
 	// If fetch from cube, add face explicitly
 	if (is_cube_fetch)
-		farg_str += ", uint(" + round_fp_tex_coords(coord_expr + ".z", coord_is_fp) + ")";
+	{
+		// Special case for cube arrays, face and layer are packed in one dimension.
+		if (imgtype.image.arrayed)
+			farg_str += ", uint(" + join(coord_expr, ".z) % 6u");
+		else
+			farg_str += ", uint(" + round_fp_tex_coords(coord_expr + ".z", coord_is_fp) + ")";
+	}
 
 	// If array, use alt coord
 	if (imgtype.image.arrayed)
-		farg_str += ", uint(" + round_fp_tex_coords(coord_expr + alt_coord, coord_is_fp) + ")";
+	{
+		// Special case for cube arrays, face and layer are packed in one dimension.
+		if (imgtype.image.dim == DimCube && is_fetch)
+			farg_str += ", uint(" + join(coord_expr, ".z) / 6u");
+		else
+			farg_str += ", uint(" + round_fp_tex_coords(coord_expr + alt_coord, coord_is_fp) + ")";
+	}
 
 	// Depth compare reference value
 	if (dref)
@@ -2358,13 +2523,15 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
 	}
 
 	// LOD Options
-	if (bias)
+	// Metal does not support LOD for 1D textures.
+	if (bias && imgtype.image.dim != Dim1D)
 	{
 		forward = forward && should_forward(bias);
 		farg_str += ", bias(" + to_expression(bias) + ")";
 	}
 
-	if (lod)
+	// Metal does not support LOD for 1D textures.
+	if (lod && imgtype.image.dim != Dim1D)
 	{
 		forward = forward && should_forward(lod);
 		if (is_fetch)
@@ -2376,8 +2543,16 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
 			farg_str += ", level(" + to_expression(lod) + ")";
 		}
 	}
+	else if (is_fetch && !lod && imgtype.image.dim != Dim1D && imgtype.image.dim != DimBuffer && !imgtype.image.ms &&
+	         imgtype.image.sampled != 2)
+	{
+		// Lod argument is optional in OpImageFetch, but we require a LOD value, pick 0 as the default.
+		// Check for sampled type as well, because is_fetch is also used for OpImageRead in MSL.
+		farg_str += ", 0";
+	}
 
-	if (grad_x || grad_y)
+	// Metal does not support LOD for 1D textures.
+	if ((grad_x || grad_y) && imgtype.image.dim != Dim1D)
 	{
 		forward = forward && should_forward(grad_x);
 		forward = forward && should_forward(grad_y);
@@ -2419,14 +2594,14 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
 		{
 		case Dim2D:
 			if (coord_type.vecsize > 2)
-				offset_expr += ".xy";
+				offset_expr = enclose_expression(offset_expr) + ".xy";
 
 			farg_str += ", " + offset_expr;
 			break;
 
 		case Dim3D:
 			if (coord_type.vecsize > 3)
-				offset_expr += ".xyz";
+				offset_expr = enclose_expression(offset_expr) + ".xyz";
 
 			farg_str += ", " + offset_expr;
 			break;
@@ -2438,6 +2613,10 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
 
 	if (comp)
 	{
+		// If 2D has gather component, ensure it also has an offset arg
+		if (imgtype.image.dim == Dim2D && offset_expr.empty())
+			farg_str += ", int2(0)";
+
 		forward = forward && should_forward(comp);
 		farg_str += ", " + to_component_argument(comp);
 	}
@@ -2502,14 +2681,9 @@ string CompilerMSL::to_func_call_arg(uint32_t id)
 	string arg_str = CompilerGLSL::to_func_call_arg(id);
 
 	// Manufacture automatic sampler arg if the arg is a SampledImage texture.
-	Variant &id_v = ids[id];
-	if (id_v.get_type() == TypeVariable)
-	{
-		auto &var = id_v.get<SPIRVariable>();
-		auto &type = get<SPIRType>(var.basetype);
-		if (type.basetype == SPIRType::SampledImage && type.image.dim != DimBuffer)
-			arg_str += ", " + to_sampler_expression(id);
-	}
+	auto &type = expression_type(id);
+	if (type.basetype == SPIRType::SampledImage && type.image.dim != DimBuffer)
+		arg_str += ", " + to_sampler_expression(id);
 
 	return arg_str;
 }
@@ -2519,8 +2693,18 @@ string CompilerMSL::to_func_call_arg(uint32_t id)
 // by appending a suffix to the expression constructed from the ID.
 string CompilerMSL::to_sampler_expression(uint32_t id)
 {
+	auto expr = to_expression(id);
+	auto index = expr.find_first_of('[');
 	uint32_t samp_id = meta[id].sampler;
-	return samp_id ? to_expression(samp_id) : to_expression(id) + sampler_name_suffix;
+
+	if (index == string::npos)
+		return samp_id ? to_expression(samp_id) : expr + sampler_name_suffix;
+	else
+	{
+		auto image_expr = expr.substr(0, index);
+		auto array_expr = expr.substr(index);
+		return samp_id ? to_expression(samp_id) : (image_expr + sampler_name_suffix + array_expr);
+	}
 }
 
 // Checks whether the ID is a row_major matrix that requires conversion before use
@@ -2531,7 +2715,7 @@ bool CompilerMSL::is_non_native_row_major_matrix(uint32_t id)
 		return false;
 
 	// Non-matrix or column-major matrix types do not need to be converted.
-	if (!(meta[id].decoration.decoration_flags & (1ull << DecorationRowMajor)))
+	if (!meta[id].decoration.decoration_flags.get(DecorationRowMajor))
 		return false;
 
 	// Generate a function that will swap matrix elements from row-major to column-major.
@@ -2553,7 +2737,7 @@ bool CompilerMSL::member_is_non_native_row_major_matrix(const SPIRType &type, ui
 		return false;
 
 	// Non-matrix or column-major matrix types do not need to be converted.
-	if (!has_member_decoration(type.self, index, DecorationRowMajor))
+	if (!combined_decoration_for_member(type, index).get(DecorationRowMajor))
 		return false;
 
 	// Generate a function that will swap matrix elements from row-major to column-major.
@@ -2590,7 +2774,10 @@ void CompilerMSL::add_convert_row_major_matrix_function(uint32_t cols, uint32_t
 
 	auto rslt = spv_function_implementations.insert(spv_func);
 	if (rslt.second)
+	{
 		add_pragma_line("#pragma clang diagnostic ignored \"-Wmissing-prototypes\"");
+		force_recompile = true;
+	}
 }
 
 // Wraps the expression string in a function call that converts the
@@ -2617,11 +2804,11 @@ void CompilerMSL::emit_fixup()
 
 	if ((execution.model == ExecutionModelVertex) && stage_out_var_id && !qual_pos_var_name.empty())
 	{
-		if (CompilerGLSL::options.vertex.fixup_clipspace)
+		if (options.vertex.fixup_clipspace)
 			statement(qual_pos_var_name, ".z = (", qual_pos_var_name, ".z + ", qual_pos_var_name,
 			          ".w) * 0.5;       // Adjust clip-space for Metal");
 
-		if (CompilerGLSL::options.vertex.flip_vert_y)
+		if (options.vertex.flip_vert_y)
 			statement(qual_pos_var_name, ".y = -(", qual_pos_var_name, ".y);", "    // Invert Y-axis for Metal");
 	}
 }
@@ -2706,7 +2893,7 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in
 				// Some shaders may include a PointSize builtin even when used to render
 				// non-point topologies, and Metal will reject this builtin when compiling
 				// the shader into a render pipeline that uses a non-point topology.
-				return options.enable_point_size_builtin ? (string(" [[") + builtin_qualifier(builtin) + "]]") : "";
+				return msl_options.enable_point_size_builtin ? (string(" [[") + builtin_qualifier(builtin) + "]]") : "";
 
 			case BuiltInPosition:
 			case BuiltInLayer:
@@ -2762,8 +2949,15 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in
 			}
 		}
 		uint32_t locn = get_ordered_member_location(type.self, index);
-		if (locn != k_unknown_location)
-			return string(" [[color(") + convert_to_string(locn) + ")]]";
+		if (locn != k_unknown_location && has_member_decoration(type.self, index, DecorationIndex))
+			return join(" [[color(", locn, "), index(", get_member_decoration(type.self, index, DecorationIndex),
+			            ")]]");
+		else if (locn != k_unknown_location)
+			return join(" [[color(", locn, ")]]");
+		else if (has_member_decoration(type.self, index, DecorationIndex))
+			return join(" [[index(", get_member_decoration(type.self, index, DecorationIndex), ")]]");
+		else
+			return "";
 	}
 
 	// Compute function inputs
@@ -2799,7 +2993,7 @@ uint32_t CompilerMSL::get_ordered_member_location(uint32_t type_id, uint32_t ind
 	if (index < m.members.size())
 	{
 		auto &dec = m.members[index];
-		if (dec.decoration_flags & (1ull << DecorationLocation))
+		if (dec.decoration_flags.get(DecorationLocation))
 			return dec.location;
 	}
 
@@ -2865,9 +3059,8 @@ string CompilerMSL::func_type_decl(SPIRType &type)
 		entry_type = "vertex";
 		break;
 	case ExecutionModelFragment:
-		entry_type = (execution.flags & (1ull << ExecutionModeEarlyFragmentTests)) ?
-		                 "fragment [[ early_fragment_tests ]]" :
-		                 "fragment";
+		entry_type =
+		    execution.flags.get(ExecutionModeEarlyFragmentTests) ? "fragment [[ early_fragment_tests ]]" : "fragment";
 		break;
 	case ExecutionModelGLCompute:
 	case ExecutionModelKernel:
@@ -2892,17 +3085,25 @@ string CompilerMSL::get_argument_address_space(const SPIRVariable &argument)
 		return "threadgroup";
 
 	case StorageClassStorageBuffer:
-		return "device";
+	{
+		auto flags = get_buffer_block_flags(argument);
+		return flags.get(DecorationNonWritable) ? "const device" : "device";
+	}
 
 	case StorageClassUniform:
 	case StorageClassUniformConstant:
 	case StorageClassPushConstant:
 		if (type.basetype == SPIRType::Struct)
-			return ((meta[type.self].decoration.decoration_flags & (1ull << DecorationBufferBlock)) != 0 &&
-			        (meta[argument.self].decoration.decoration_flags & (1ull << DecorationNonWritable)) == 0) ?
-			           "device" :
-			           "constant";
-
+		{
+			bool ssbo = has_decoration(type.self, DecorationBufferBlock);
+			if (!ssbo)
+				return "constant";
+			else
+			{
+				bool readonly = get_buffer_block_flags(argument).get(DecorationNonWritable);
+				return readonly ? "const device" : "device";
+			}
+		}
 		break;
 
 	default:
@@ -2929,19 +3130,6 @@ string CompilerMSL::entry_point_args(bool append_comma)
 		ep_args += type_to_glsl(type) + " " + to_name(var.self) + " [[stage_in]]";
 	}
 
-	// Non-stage-in vertex attribute structures
-	for (auto &nsi_var : non_stage_in_input_var_ids)
-	{
-		auto &var = get<SPIRVariable>(nsi_var.second);
-		auto &type = get<SPIRType>(var.basetype);
-
-		if (!ep_args.empty())
-			ep_args += ", ";
-
-		ep_args += "device " + type_to_glsl(type) + "* " + to_name(var.self) + " [[buffer(" +
-		           convert_to_string(nsi_var.first) + ")]]";
-	}
-
 	// Output resources, sorted by resource index & type
 	// We need to sort to work around a bug on macOS 10.13 with NVidia drivers where switching between shaders
 	// with different order of buffers can result in issues with buffer assignments inside the driver.
@@ -2973,12 +3161,15 @@ string CompilerMSL::entry_point_args(bool append_comma)
 					resources.push_back(
 					    { &id, to_name(var_id), SPIRType::Image, get_metal_resource_index(var, SPIRType::Image) });
 
-					if (type.image.dim != DimBuffer)
+					if (type.image.dim != DimBuffer && constexpr_samplers.count(var_id) == 0)
+					{
 						resources.push_back({ &id, to_sampler_expression(var_id), SPIRType::Sampler,
 						                      get_metal_resource_index(var, SPIRType::Sampler) });
+					}
 				}
-				else
+				else if (constexpr_samplers.count(var_id) == 0)
 				{
+					// constexpr samplers are not declared as resources.
 					resources.push_back(
 					    { &id, to_name(var_id), type.basetype, get_metal_resource_index(var, type.basetype) });
 				}
@@ -3013,13 +3204,13 @@ string CompilerMSL::entry_point_args(bool append_comma)
 		case SPIRType::Sampler:
 			if (!ep_args.empty())
 				ep_args += ", ";
-			ep_args += "sampler " + r.name;
+			ep_args += sampler_type(type) + " " + r.name;
 			ep_args += " [[sampler(" + convert_to_string(r.index) + ")]]";
 			break;
 		case SPIRType::Image:
 			if (!ep_args.empty())
 				ep_args += ", ";
-			ep_args += type_to_glsl(type, var_id) + " " + r.name;
+			ep_args += image_type_glsl(type, var_id) + " " + r.name;
 			ep_args += " [[texture(" + convert_to_string(r.index) + ")]]";
 			break;
 		default:
@@ -3092,24 +3283,36 @@ uint32_t CompilerMSL::get_metal_resource_index(SPIRVariable &var, SPIRType::Base
 		}
 	}
 
+	// If there is no explicit mapping of bindings to MSL, use the declared binding.
+	if (has_decoration(var.self, DecorationBinding))
+		return get_decoration(var.self, DecorationBinding);
+
+	uint32_t binding_stride = 1;
+	auto &type = get<SPIRType>(var.basetype);
+	for (uint32_t i = 0; i < uint32_t(type.array.size()); i++)
+		binding_stride *= type.array_size_literal[i] ? type.array[i] : get<SPIRConstant>(type.array[i]).scalar();
+
 	// If a binding has not been specified, revert to incrementing resource indices
+	uint32_t resource_index;
 	switch (basetype)
 	{
 	case SPIRType::Struct:
-		return next_metal_resource_index.msl_buffer++;
+		resource_index = next_metal_resource_index.msl_buffer;
+		next_metal_resource_index.msl_buffer += binding_stride;
+		break;
 	case SPIRType::Image:
-		return next_metal_resource_index.msl_texture++;
+		resource_index = next_metal_resource_index.msl_texture;
+		next_metal_resource_index.msl_texture += binding_stride;
+		break;
 	case SPIRType::Sampler:
-		return next_metal_resource_index.msl_sampler++;
+		resource_index = next_metal_resource_index.msl_sampler;
+		next_metal_resource_index.msl_sampler += binding_stride;
+		break;
 	default:
-		return 0;
+		resource_index = 0;
+		break;
 	}
-}
-
-// Returns the name of the entry point of this shader
-string CompilerMSL::get_entry_point_name()
-{
-	return to_name(entry_point);
+	return resource_index;
 }
 
 string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
@@ -3118,6 +3321,13 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
 	auto &type = expression_type(arg.id);
 	bool constref = !arg.alias_global_variable && (!type.pointer || arg.write_count == 0);
 
+	bool type_is_image = type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage ||
+	                     type.basetype == SPIRType::Sampler;
+
+	// Arrays of images/samplers in MSL are always const.
+	if (!type.array.empty() && type_is_image)
+		constref = true;
+
 	// TODO: Check if this arg is an uniform pointer
 	bool pointer = type.storage == StorageClassUniformConstant;
 
@@ -3130,7 +3340,8 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
 	else
 		decl += type_to_glsl(type, arg.id);
 
-	if (is_array(type))
+	// Arrays of images and samplers are special cased.
+	if (is_array(type) && !type_is_image)
 	{
 		decl += " (&";
 		decl += to_expression(var.self);
@@ -3190,9 +3401,10 @@ string CompilerMSL::ensure_valid_name(string name, string pfx)
 // Replace all names that match MSL keywords or Metal Standard Library functions.
 void CompilerMSL::replace_illegal_names()
 {
+	// FIXME: MSL and GLSL are doing two different things here.
+	// Agree on convention and remove this override.
 	static const unordered_set<string> keywords = {
-		"kernel",
-		"bias",
+		"kernel", "vertex", "fragment", "compute", "bias",
 	};
 
 	static const unordered_set<string> illegal_func_names = {
@@ -3246,6 +3458,8 @@ void CompilerMSL::replace_illegal_names()
 		// Always write this because entry point might have been renamed earlier.
 		meta[entry.first].decoration.alias = ep_name;
 	}
+
+	CompilerGLSL::replace_illegal_names();
 }
 
 string CompilerMSL::to_qualifiers_glsl(uint32_t id)
@@ -3279,7 +3493,7 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id)
 		return image_type_glsl(type, id);
 
 	case SPIRType::Sampler:
-		return "sampler";
+		return sampler_type(type);
 
 	case SPIRType::Void:
 		return "void";
@@ -3306,8 +3520,11 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id)
 	case SPIRType::UInt64:
 		type_name = "size_t";
 		break;
+	case SPIRType::Half:
+		type_name = "half";
+		break;
 	case SPIRType::Float:
-		type_name = (type.width == 16 ? "half" : "float");
+		type_name = "float";
 		break;
 	case SPIRType::Double:
 		type_name = "double"; // Currently unsupported
@@ -3328,9 +3545,59 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id)
 	return type_name;
 }
 
+std::string CompilerMSL::sampler_type(const SPIRType &type)
+{
+	if (!type.array.empty())
+	{
+		if (!msl_options.supports_msl_version(2))
+			SPIRV_CROSS_THROW("MSL 2.0 or greater is required for arrays of samplers.");
+
+		// Arrays of samplers in MSL must be declared with a special array<T, N> syntax ala C++11 std::array.
+		auto *parent = &type;
+		while (parent->pointer)
+			parent = &get<SPIRType>(parent->parent_type);
+		parent = &get<SPIRType>(parent->parent_type);
+
+		uint32_t array_size =
+		    type.array_size_literal.back() ? type.array.back() : get<SPIRConstant>(type.array.back()).scalar();
+
+		if (array_size == 0)
+			SPIRV_CROSS_THROW("Unsized array of samplers is not supported in MSL.");
+		return join("array<", sampler_type(*parent), ", ", array_size, ">");
+	}
+	else
+		return "sampler";
+}
+
 // Returns an MSL string describing  the SPIR-V image type
 string CompilerMSL::image_type_glsl(const SPIRType &type, uint32_t id)
 {
+	auto *var = maybe_get<SPIRVariable>(id);
+	if (var && var->basevariable)
+	{
+		// For comparison images, check against the base variable,
+		// and not the fake ID which might have been generated for this variable.
+		id = var->basevariable;
+	}
+
+	if (!type.array.empty())
+	{
+		if (!msl_options.supports_msl_version(2))
+			SPIRV_CROSS_THROW("MSL 2.0 or greater is required for arrays of textures.");
+
+		// Arrays of images in MSL must be declared with a special array<T, N> syntax ala C++11 std::array.
+		auto *parent = &type;
+		while (parent->pointer)
+			parent = &get<SPIRType>(parent->parent_type);
+		parent = &get<SPIRType>(parent->parent_type);
+
+		uint32_t array_size =
+		    type.array_size_literal.back() ? type.array.back() : get<SPIRConstant>(type.array.back()).scalar();
+		if (array_size == 0)
+			SPIRV_CROSS_THROW("Unsized array of images is not supported in MSL.");
+		return join("array<", image_type_glsl(*parent, id), ", ", array_size, ">");
+	}
+
 	string img_type_name;
 
 	// Bypass pointers because we need the real image struct
@@ -3444,7 +3711,9 @@ string CompilerMSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in
 	    (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Double) ||
 	    (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Double) ||
 	    (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::Int64) ||
-	    (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::UInt64))
+	    (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::UInt64) ||
+	    (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UInt) ||
+	    (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Half))
 		return "as_type<" + type_to_glsl(out_type) + ">";
 
 	return "";
@@ -3529,9 +3798,9 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin)
 
 	// Fragment function out
 	case BuiltInFragDepth:
-		if (execution.flags & (1ull << ExecutionModeDepthGreater))
+		if (execution.flags.get(ExecutionModeDepthGreater))
 			return "depth(greater)";
-		else if (execution.flags & (1ull << ExecutionModeDepthLess))
+		else if (execution.flags.get(ExecutionModeDepthLess))
 			return "depth(less)";
 		else
 			return "depth(any)";
@@ -3900,3 +4169,13 @@ CompilerMSL::MemberSorter::MemberSorter(SPIRType &t, Meta &m, SortAspect sa)
 	// Ensure enough meta info is available
 	meta.members.resize(max(type.member_types.size(), meta.members.size()));
 }
+
+void CompilerMSL::remap_constexpr_sampler(uint32_t id, const spirv_cross::MSLConstexprSampler &sampler)
+{
+	auto &type = get<SPIRType>(get<SPIRVariable>(id).basetype);
+	if (type.basetype != SPIRType::SampledImage && type.basetype != SPIRType::Sampler)
+		SPIRV_CROSS_THROW("Can only remap SampledImage and Sampler type.");
+	if (!type.array.empty())
+		SPIRV_CROSS_THROW("Can not remap array of samplers.");
+	constexpr_samplers[id] = sampler;
+}
diff --git a/spirv_msl.hpp b/spirv_msl.hpp
index 8e6a1130b4..61f5eb2995 100644
--- a/spirv_msl.hpp
+++ b/spirv_msl.hpp
@@ -59,6 +59,73 @@ struct MSLResourceBinding
 	bool used_by_shader = false;
 };
 
+enum MSLSamplerCoord
+{
+	MSL_SAMPLER_COORD_NORMALIZED,
+	MSL_SAMPLER_COORD_PIXEL
+};
+
+enum MSLSamplerFilter
+{
+	MSL_SAMPLER_FILTER_NEAREST,
+	MSL_SAMPLER_FILTER_LINEAR
+};
+
+enum MSLSamplerMipFilter
+{
+	MSL_SAMPLER_MIP_FILTER_NONE,
+	MSL_SAMPLER_MIP_FILTER_NEAREST,
+	MSL_SAMPLER_MIP_FILTER_LINEAR,
+};
+
+enum MSLSamplerAddress
+{
+	MSL_SAMPLER_ADDRESS_CLAMP_TO_ZERO,
+	MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE,
+	MSL_SAMPLER_ADDRESS_CLAMP_TO_BORDER,
+	MSL_SAMPLER_ADDRESS_REPEAT,
+	MSL_SAMPLER_ADDRESS_MIRRORED_REPEAT
+};
+
+enum MSLSamplerCompareFunc
+{
+	MSL_SAMPLER_COMPARE_FUNC_NEVER,
+	MSL_SAMPLER_COMPARE_FUNC_LESS,
+	MSL_SAMPLER_COMPARE_FUNC_LESS_EQUAL,
+	MSL_SAMPLER_COMPARE_FUNC_GREATER,
+	MSL_SAMPLER_COMPARE_FUNC_GREATER_EQUAL,
+	MSL_SAMPLER_COMPARE_FUNC_EQUAL,
+	MSL_SAMPLER_COMPARE_FUNC_NOT_EQUAL,
+	MSL_SAMPLER_COMPARE_FUNC_ALWAYS
+};
+
+enum MSLSamplerBorderColor
+{
+	MSL_SAMPLER_BORDER_COLOR_TRANSPARENT_BLACK,
+	MSL_SAMPLER_BORDER_COLOR_OPAQUE_BLACK,
+	MSL_SAMPLER_BORDER_COLOR_OPAQUE_WHITE
+};
+
+struct MSLConstexprSampler
+{
+	MSLSamplerCoord coord = MSL_SAMPLER_COORD_NORMALIZED;
+	MSLSamplerFilter min_filter = MSL_SAMPLER_FILTER_NEAREST;
+	MSLSamplerFilter mag_filter = MSL_SAMPLER_FILTER_NEAREST;
+	MSLSamplerMipFilter mip_filter = MSL_SAMPLER_MIP_FILTER_NONE;
+	MSLSamplerAddress s_address = MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE;
+	MSLSamplerAddress t_address = MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE;
+	MSLSamplerAddress r_address = MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE;
+	MSLSamplerCompareFunc compare_func = MSL_SAMPLER_COMPARE_FUNC_NEVER;
+	MSLSamplerBorderColor border_color = MSL_SAMPLER_BORDER_COLOR_TRANSPARENT_BLACK;
+	float lod_clamp_min = 0.0f;
+	float lod_clamp_max = 1000.0f;
+	int max_anisotropy = 1;
+
+	bool compare_enable = false;
+	bool lod_clamp_enable = false;
+	bool anisotropy_enable = false;
+};
+
 // Tracks the type ID and member index of a struct member
 using MSLStructMemberKey = uint64_t;
 
@@ -77,7 +144,8 @@ public:
 	// Options for compiling to Metal Shading Language
 	struct Options
 	{
-		typedef enum {
+		typedef enum
+		{
 			iOS,
 			macOS,
 		} Platform;
@@ -113,14 +181,26 @@ public:
 		}
 	};
 
+	SPIRV_CROSS_DEPRECATED("CompilerMSL::get_options() is obsolete, use get_msl_options() instead.")
 	const Options &get_options() const
 	{
-		return options;
+		return msl_options;
 	}
 
+	const Options &get_msl_options() const
+	{
+		return msl_options;
+	}
+
+	SPIRV_CROSS_DEPRECATED("CompilerMSL::set_options() is obsolete, use set_msl_options() instead.")
 	void set_options(Options &opts)
 	{
-		options = opts;
+		msl_options = opts;
+	}
+
+	void set_msl_options(const Options &opts)
+	{
+		msl_options = opts;
 	}
 
 	// An enum of SPIR-V functions that are implemented in additional
@@ -174,22 +254,31 @@ public:
 
 	// This legacy method is deprecated.
 	typedef Options MSLConfiguration;
-	SPIRV_CROSS_DEPRECATED("Please use get_options() and set_options() instead.")
+	SPIRV_CROSS_DEPRECATED("Please use get_msl_options() and set_msl_options() instead.")
 	std::string compile(MSLConfiguration &msl_cfg, std::vector<MSLVertexAttr> *p_vtx_attrs = nullptr,
 	                    std::vector<MSLResourceBinding> *p_res_bindings = nullptr);
 
+	// Remap a sampler with ID to a constexpr sampler.
+	// Older iOS targets must use constexpr samplers in certain cases (PCF),
+	// so a static sampler must be used.
+	// The sampler will not consume a binding, but be declared in the entry point as a constexpr sampler.
+	// This can be used on both combined image/samplers (sampler2D) or standalone samplers.
+	// The remapped sampler must not be an array of samplers.
+	void remap_constexpr_sampler(uint32_t id, const MSLConstexprSampler &sampler);
+
 protected:
 	void emit_instruction(const Instruction &instr) override;
 	void emit_glsl_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args,
 	                  uint32_t count) override;
 	void emit_header() override;
-	void emit_function_prototype(SPIRFunction &func, uint64_t return_flags) override;
+	void emit_function_prototype(SPIRFunction &func, const Bitset &return_flags) override;
 	void emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id) override;
 	void emit_fixup() override;
 	void emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
 	                        const std::string &qualifier = "", uint32_t base_offset = 0) override;
 	std::string type_to_glsl(const SPIRType &type, uint32_t id = 0) override;
 	std::string image_type_glsl(const SPIRType &type, uint32_t id = 0) override;
+	std::string sampler_type(const SPIRType &type);
 	std::string builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClass storage) override;
 	std::string constant_expression(const SPIRConstant &c) override;
 	size_t get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const override;
@@ -238,7 +327,6 @@ protected:
 
 	std::string func_type_decl(SPIRType &type);
 	std::string entry_point_args(bool append_comma);
-	std::string get_entry_point_name();
 	std::string to_qualified_member_name(const SPIRType &type, uint32_t index);
 	std::string ensure_valid_name(std::string name, std::string pfx);
 	std::string to_sampler_expression(uint32_t id);
@@ -252,11 +340,6 @@ protected:
 	uint32_t get_ordered_member_location(uint32_t type_id, uint32_t index);
 	size_t get_declared_struct_member_alignment(const SPIRType &struct_type, uint32_t index) const;
 	std::string to_component_argument(uint32_t id);
-	bool should_move_to_input_buffer(uint32_t type_id, bool is_builtin, spv::StorageClass storage);
-	void move_to_input_buffer(SPIRVariable &var);
-	void move_member_to_input_buffer(const SPIRType &type, uint32_t index);
-	std::string add_input_buffer_block_member(uint32_t mbr_type_id, std::string mbr_name, uint32_t mbr_locn);
-	uint32_t get_input_buffer_block_var_id(uint32_t msl_buffer);
 	void align_struct(SPIRType &ib_type);
 	bool is_member_packable(SPIRType &ib_type, uint32_t index);
 	MSLStructMemberKey get_struct_member_key(uint32_t type_id, uint32_t index);
@@ -270,15 +353,16 @@ protected:
 	void emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uint32_t id_mem_sem);
 	void emit_array_copy(const std::string &lhs, uint32_t rhs_id) override;
 	void build_implicit_builtins();
+	void emit_entry_point_declarations() override;
 	uint32_t builtin_frag_coord_id = 0;
 
-	Options options;
+	Options msl_options;
 	std::set<SPVFuncImpl> spv_function_implementations;
 	std::unordered_map<uint32_t, MSLVertexAttr *> vtx_attrs_by_location;
-	std::map<uint32_t, uint32_t> non_stage_in_input_var_ids;
 	std::unordered_map<MSLStructMemberKey, uint32_t> struct_member_padding;
 	std::set<std::string> pragma_lines;
 	std::set<std::string> typedef_lines;
+	std::vector<uint32_t> vars_needing_early_declaration;
 	std::vector<MSLResourceBinding *> resource_bindings;
 	MSLResourceBinding next_metal_resource_index;
 	uint32_t stage_in_var_id = 0;
@@ -293,6 +377,8 @@ protected:
 	std::string sampler_name_suffix = "Smplr";
 	spv::Op previous_instruction_opcode = spv::OpNop;
 
+	std::unordered_map<uint32_t, MSLConstexprSampler> constexpr_samplers;
+
 	// OpcodeHandler that handles several MSL preprocessing operations.
 	struct OpCodePreprocessor : OpcodeHandler
 	{
@@ -333,6 +419,6 @@ protected:
 		SortAspect sort_aspect;
 	};
 };
-}
+} // namespace spirv_cross
 
 #endif
diff --git a/test_shaders.py b/test_shaders.py
index 1ca8f9af5f..3efcf923d1 100755
--- a/test_shaders.py
+++ b/test_shaders.py
@@ -14,6 +14,16 @@ import codecs
 
 force_no_external_validation = False
 
+def remove_file(path):
+    #print('Removing file:', path)
+    os.remove(path)
+
+def create_temporary(suff = ''):
+    f, path = tempfile.mkstemp(suffix = suff)
+    os.close(f)
+    #print('Creating temporary:', path)
+    return path
+
 def parse_stats(stats):
     m = re.search('([0-9]+) work registers', stats)
     registers = int(m.group(1)) if m else 0
@@ -49,12 +59,11 @@ def get_shader_type(shader):
         return ''
 
 def get_shader_stats(shader):
-    f, path = tempfile.mkstemp()
+    path = create_temporary()
 
-    os.close(f)
     p = subprocess.Popen(['malisc', get_shader_type(shader), '--core', 'Mali-T760', '-V', shader], stdout = subprocess.PIPE, stderr = subprocess.PIPE)
     stdout, stderr = p.communicate()
-    os.remove(path)
+    remove_file(path)
 
     if p.returncode != 0:
         print(stderr.decode('utf-8'))
@@ -74,10 +83,11 @@ def print_msl_compiler_version():
 
 def validate_shader_msl(shader, opt):
     msl_path = reference_path(shader[0], shader[1], opt)
+    msl2 = '.msl2.' in msl_path
     try:
         msl_os = 'macosx'
 #        msl_os = 'iphoneos'
-        subprocess.check_call(['xcrun', '--sdk', msl_os, 'metal', '-x', 'metal', '-std=osx-metal1.2', '-Werror', '-Wno-unused-variable', msl_path])
+        subprocess.check_call(['xcrun', '--sdk', msl_os, 'metal', '-x', 'metal', '-std=osx-metal{}'.format('2.0' if msl2 else '1.2'), '-Werror', '-Wno-unused-variable', msl_path])
         print('Compiled Metal shader: ' + msl_path)   # display after so xcrun FNF is silent
     except OSError as oe:
         if (oe.errno != os.errno.ENOENT):   # Ignore xcrun not found error
@@ -87,22 +97,30 @@ def validate_shader_msl(shader, opt):
         sys.exit(1)
 
 def cross_compile_msl(shader, spirv, opt):
-    spirv_f, spirv_path = tempfile.mkstemp()
-    msl_f, msl_path = tempfile.mkstemp(suffix = os.path.basename(shader))
-    os.close(spirv_f)
-    os.close(msl_f)
+    msl2 = '.msl2.' in shader
+    spirv_path = create_temporary()
+    msl_path = create_temporary(os.path.basename(shader))
 
     if spirv:
         subprocess.check_call(['spirv-as', '-o', spirv_path, shader])
     else:
-        subprocess.check_call(['glslangValidator', '-V', '-o', spirv_path, shader])
+        subprocess.check_call(['glslangValidator', '--target-env', 'vulkan1.1', '-V', '-o', spirv_path, shader])
 
     if opt:
         subprocess.check_call(['spirv-opt', '-O', '-o', spirv_path, spirv_path])
 
     spirv_cross_path = './spirv-cross'
-    subprocess.check_call([spirv_cross_path, '--entry', 'main', '--output', msl_path, spirv_path, '--msl'])
-    subprocess.check_call(['spirv-val', spirv_path])
+
+    msl_args = [spirv_cross_path, '--entry', 'main', '--output', msl_path, spirv_path, '--msl']
+    if msl2:
+        msl_args.append('--msl-version')
+        msl_args.append('20000')
+
+    subprocess.check_call(msl_args)
+
+    if not shader_is_invalid_spirv(msl_path):
+        subprocess.check_call(['spirv-val', '--target-env', 'vulkan1.1', spirv_path])
+
     return (spirv_path, msl_path)
 
 def shader_model_hlsl(shader):
@@ -129,22 +147,28 @@ def shader_to_win_path(shader):
 
     return shader
 
+ignore_fxc = False
 def validate_shader_hlsl(shader):
-    subprocess.check_call(['glslangValidator', '-e', 'main', '-D', '-V', shader])
+    subprocess.check_call(['glslangValidator', '-e', 'main', '-D', '--target-env', 'vulkan1.1', '-V', shader])
     is_no_fxc = '.nofxc.' in shader
-    if (not force_no_external_validation) and (not is_no_fxc):
+    global ignore_fxc
+    if (not ignore_fxc) and (not force_no_external_validation) and (not is_no_fxc):
         try:
             win_path = shader_to_win_path(shader)
             subprocess.check_call(['fxc', '-nologo', shader_model_hlsl(shader), win_path])
         except OSError as oe:
             if (oe.errno != os.errno.ENOENT): # Ignore not found errors
                 raise
+            else:
+                ignore_fxc = True
         except subprocess.CalledProcessError:
             print('Failed compiling HLSL shader:', shader, 'with FXC.')
             sys.exit(1)
 
 def shader_to_sm(shader):
-    if '.sm51.' in shader:
+    if '.sm60.' in shader:
+        return '60'
+    elif '.sm51.' in shader:
         return '51'
     elif '.sm20.' in shader:
         return '20'
@@ -152,15 +176,13 @@ def shader_to_sm(shader):
         return '50'
 
 def cross_compile_hlsl(shader, spirv, opt):
-    spirv_f, spirv_path = tempfile.mkstemp()
-    hlsl_f, hlsl_path = tempfile.mkstemp(suffix = os.path.basename(shader))
-    os.close(spirv_f)
-    os.close(hlsl_f)
+    spirv_path = create_temporary()
+    hlsl_path = create_temporary(os.path.basename(shader))
 
     if spirv:
         subprocess.check_call(['spirv-as', '-o', spirv_path, shader])
     else:
-        subprocess.check_call(['glslangValidator', '-V', '-o', spirv_path, shader])
+        subprocess.check_call(['glslangValidator', '--target-env', 'vulkan1.1', '-V', '-o', spirv_path, shader])
 
     if opt:
         subprocess.check_call(['spirv-opt', '-O', '-o', spirv_path, spirv_path])
@@ -169,7 +191,9 @@ def cross_compile_hlsl(shader, spirv, opt):
 
     sm = shader_to_sm(shader)
     subprocess.check_call([spirv_cross_path, '--entry', 'main', '--output', hlsl_path, spirv_path, '--hlsl-enable-compat', '--hlsl', '--shader-model', sm])
-    subprocess.check_call(['spirv-val', spirv_path])
+
+    if not shader_is_invalid_spirv(hlsl_path):
+        subprocess.check_call(['spirv-val', '--target-env', 'vulkan1.1', spirv_path])
 
     validate_shader_hlsl(hlsl_path)
     
@@ -177,30 +201,27 @@ def cross_compile_hlsl(shader, spirv, opt):
 
 def validate_shader(shader, vulkan):
     if vulkan:
-        subprocess.check_call(['glslangValidator', '-V', shader])
+        subprocess.check_call(['glslangValidator', '--target-env', 'vulkan1.1', '-V', shader])
     else:
         subprocess.check_call(['glslangValidator', shader])
 
 def cross_compile(shader, vulkan, spirv, invalid_spirv, eliminate, is_legacy, flatten_ubo, sso, flatten_dim, opt):
-    spirv_f, spirv_path = tempfile.mkstemp()
-    glsl_f, glsl_path = tempfile.mkstemp(suffix = os.path.basename(shader))
-    os.close(spirv_f)
-    os.close(glsl_f)
+    spirv_path = create_temporary()
+    glsl_path = create_temporary(os.path.basename(shader))
 
     if vulkan or spirv:
-        vulkan_glsl_f, vulkan_glsl_path = tempfile.mkstemp(suffix = os.path.basename(shader))
-        os.close(vulkan_glsl_f)
+        vulkan_glsl_path = create_temporary('vk' + os.path.basename(shader))
 
     if spirv:
         subprocess.check_call(['spirv-as', '-o', spirv_path, shader])
     else:
-        subprocess.check_call(['glslangValidator', '-V', '-o', spirv_path, shader])
+        subprocess.check_call(['glslangValidator', '--target-env', 'vulkan1.1', '-V', '-o', spirv_path, shader])
 
     if opt and (not invalid_spirv):
         subprocess.check_call(['spirv-opt', '-O', '-o', spirv_path, spirv_path])
 
     if not invalid_spirv:
-        subprocess.check_call(['spirv-val', spirv_path])
+        subprocess.check_call(['spirv-val', '--target-env', 'vulkan1.1', spirv_path])
 
     extra_args = []
     if eliminate:
@@ -215,15 +236,21 @@ def cross_compile(shader, vulkan, spirv, invalid_spirv, eliminate, is_legacy, fl
         extra_args += ['--flatten-multidimensional-arrays']
 
     spirv_cross_path = './spirv-cross'
-    subprocess.check_call([spirv_cross_path, '--entry', 'main', '--output', glsl_path, spirv_path] + extra_args)
 
     # A shader might not be possible to make valid GLSL from, skip validation for this case.
-    if (not ('nocompat' in glsl_path)) and (not spirv):
+    if not ('nocompat' in glsl_path):
+        subprocess.check_call([spirv_cross_path, '--entry', 'main', '--output', glsl_path, spirv_path] + extra_args)
         validate_shader(glsl_path, False)
+    else:
+        remove_file(glsl_path)
+        glsl_path = None
 
     if vulkan or spirv:
         subprocess.check_call([spirv_cross_path, '--entry', 'main', '--vulkan-semantics', '--output', vulkan_glsl_path, spirv_path] + extra_args)
         validate_shader(vulkan_glsl_path, True)
+        # SPIR-V shaders might just want to validate Vulkan GLSL output, we don't always care about the output.
+        if not vulkan:
+            remove_file(vulkan_glsl_path)
 
     return (spirv_path, glsl_path, vulkan_glsl_path if vulkan else None)
 
@@ -261,7 +288,7 @@ def regression_check(shader, glsl, update, keep, opt):
                 print('Generated source code has changed for {}!'.format(reference))
                 # If we expect changes, update the reference file.
                 if os.path.exists(reference):
-                    os.remove(reference)
+                    remove_file(reference)
                 make_reference_dir(reference)
                 shutil.move(glsl, reference)
             else:
@@ -276,10 +303,10 @@ def regression_check(shader, glsl, update, keep, opt):
 
                 # Otherwise, fail the test. Keep the shader file around so we can inspect.
                 if not keep:
-                    os.remove(glsl)
+                    remove_file(glsl)
                 sys.exit(1)
         else:
-            os.remove(glsl)
+            remove_file(glsl)
     else:
         print('Found new shader {}. Placing generated source code in {}'.format(joined_path, reference))
         make_reference_dir(reference)
@@ -335,10 +362,12 @@ def test_shader(stats, shader, update, keep, opt):
     if stats and (not vulkan) and (not is_spirv) and (not desktop):
         cross_stats = get_shader_stats(glsl)
 
-    regression_check(shader, glsl, update, keep, opt)
+    if glsl:
+        regression_check(shader, glsl, update, keep, opt)
     if vulkan_glsl:
         regression_check((shader[0], shader[1] + '.vk'), vulkan_glsl, update, keep, opt)
-    os.remove(spirv)
+
+    remove_file(spirv)
 
     if stats and (not vulkan) and (not is_spirv) and (not desktop):
         pristine_stats = get_shader_stats(joined_path)
@@ -370,16 +399,16 @@ def test_shader_msl(stats, shader, update, keep, opt):
     if not force_no_external_validation:
         validate_shader_msl(shader, opt)
 
-    os.remove(spirv)
+    remove_file(spirv)
 
 def test_shader_hlsl(stats, shader, update, keep, opt):
     joined_path = os.path.join(shader[0], shader[1])
     print('Testing HLSL shader:', joined_path)
     is_spirv = shader_is_spirv(shader[1])
     noopt = shader_is_noopt(shader[1])
-    spirv, msl = cross_compile_hlsl(joined_path, is_spirv, opt and (not noopt))
-    regression_check(shader, msl, update, keep, opt)
-    os.remove(spirv)
+    spirv, hlsl = cross_compile_hlsl(joined_path, is_spirv, opt and (not noopt))
+    regression_check(shader, hlsl, update, keep, opt)
+    remove_file(spirv)
 
 def test_shaders_helper(stats, shader_dir, update, malisc, keep, opt, backend):
     for root, dirs, files in os.walk(os.path.join(shader_dir)):
diff --git a/test_shaders.sh b/test_shaders.sh
index a3608730b9..e96978a1e6 100755
--- a/test_shaders.sh
+++ b/test_shaders.sh
@@ -9,9 +9,11 @@ echo "Using spirv-opt in: $(which spirv-opt)."
 
 ./test_shaders.py shaders || exit 1
 ./test_shaders.py shaders --opt || exit 1
+./test_shaders.py shaders-no-opt || exit 1
 ./test_shaders.py shaders-msl --msl || exit 1
 ./test_shaders.py shaders-msl --msl --opt || exit 1
 ./test_shaders.py shaders-msl-no-opt --msl || exit 1
 ./test_shaders.py shaders-hlsl --hlsl || exit 1
 ./test_shaders.py shaders-hlsl --hlsl --opt || exit 1
+./test_shaders.py shaders-hlsl-no-opt --hlsl || exit 1
 
diff --git a/tests-other/hlsl_wave_mask.cpp b/tests-other/hlsl_wave_mask.cpp
new file mode 100644
index 0000000000..de11dd9fe0
--- /dev/null
+++ b/tests-other/hlsl_wave_mask.cpp
@@ -0,0 +1,73 @@
+// Ad-hoc test that the wave op masks work as expected.
+#include <glm/glm.hpp>
+#include <assert.h>
+
+using namespace glm;
+
+static uvec4 gl_SubgroupEqMask;
+static uvec4 gl_SubgroupGeMask;
+static uvec4 gl_SubgroupGtMask;
+static uvec4 gl_SubgroupLeMask;
+static uvec4 gl_SubgroupLtMask;
+using uint4 = uvec4;
+
+static void test_main(unsigned wave_index)
+{    
+	const auto WaveGetLaneIndex = [&]() { return wave_index; };
+
+	gl_SubgroupEqMask = 1u << (WaveGetLaneIndex() - uint4(0, 32, 64, 96));
+	if (WaveGetLaneIndex() >= 32) gl_SubgroupEqMask.x = 0;
+	if (WaveGetLaneIndex() >= 64 || WaveGetLaneIndex() < 32) gl_SubgroupEqMask.y = 0;
+	if (WaveGetLaneIndex() >= 96 || WaveGetLaneIndex() < 64) gl_SubgroupEqMask.z = 0;
+	if (WaveGetLaneIndex() < 96) gl_SubgroupEqMask.w = 0;
+	gl_SubgroupGeMask = ~((1u << (WaveGetLaneIndex() - uint4(0, 32, 64, 96))) - 1u);
+	if (WaveGetLaneIndex() >= 32) gl_SubgroupGeMask.x = 0u;
+	if (WaveGetLaneIndex() >= 64) gl_SubgroupGeMask.y = 0u;
+	if (WaveGetLaneIndex() >= 96) gl_SubgroupGeMask.z = 0u;
+	if (WaveGetLaneIndex() < 32) gl_SubgroupGeMask.y = ~0u;
+	if (WaveGetLaneIndex() < 64) gl_SubgroupGeMask.z = ~0u;
+	if (WaveGetLaneIndex() < 96) gl_SubgroupGeMask.w = ~0u;
+	uint gt_lane_index = WaveGetLaneIndex() + 1;
+	gl_SubgroupGtMask = ~((1u << (gt_lane_index - uint4(0, 32, 64, 96))) - 1u);
+	if (gt_lane_index >= 32) gl_SubgroupGtMask.x = 0u;
+	if (gt_lane_index >= 64) gl_SubgroupGtMask.y = 0u;
+	if (gt_lane_index >= 96) gl_SubgroupGtMask.z = 0u;
+	if (gt_lane_index >= 128) gl_SubgroupGtMask.w = 0u;
+	if (gt_lane_index < 32) gl_SubgroupGtMask.y = ~0u;
+	if (gt_lane_index < 64) gl_SubgroupGtMask.z = ~0u;
+	if (gt_lane_index < 96) gl_SubgroupGtMask.w = ~0u;
+	uint le_lane_index = WaveGetLaneIndex() + 1;
+	gl_SubgroupLeMask = (1u << (le_lane_index - uint4(0, 32, 64, 96))) - 1u;
+	if (le_lane_index >= 32) gl_SubgroupLeMask.x = ~0u;
+	if (le_lane_index >= 64) gl_SubgroupLeMask.y = ~0u;
+	if (le_lane_index >= 96) gl_SubgroupLeMask.z = ~0u;
+	if (le_lane_index >= 128) gl_SubgroupLeMask.w = ~0u;
+	if (le_lane_index < 32) gl_SubgroupLeMask.y = 0u;
+	if (le_lane_index < 64) gl_SubgroupLeMask.z = 0u;
+	if (le_lane_index < 96) gl_SubgroupLeMask.w = 0u;
+	gl_SubgroupLtMask = (1u << (WaveGetLaneIndex() - uint4(0, 32, 64, 96))) - 1u;
+	if (WaveGetLaneIndex() >= 32) gl_SubgroupLtMask.x = ~0u;
+	if (WaveGetLaneIndex() >= 64) gl_SubgroupLtMask.y = ~0u;
+	if (WaveGetLaneIndex() >= 96) gl_SubgroupLtMask.z = ~0u;
+	if (WaveGetLaneIndex() < 32) gl_SubgroupLtMask.y = 0u;
+	if (WaveGetLaneIndex() < 64) gl_SubgroupLtMask.z = 0u;
+	if (WaveGetLaneIndex() < 96) gl_SubgroupLtMask.w = 0u;
+}
+
+int main()
+{
+	for (unsigned subgroup_id = 0; subgroup_id < 128; subgroup_id++)
+	{
+		test_main(subgroup_id);
+
+		for (unsigned bit = 0; bit < 128; bit++)
+		{
+			assert(bool(gl_SubgroupEqMask[bit / 32] & (1u << (bit & 31))) == (bit == subgroup_id));
+			assert(bool(gl_SubgroupGtMask[bit / 32] & (1u << (bit & 31))) == (bit > subgroup_id));
+			assert(bool(gl_SubgroupGeMask[bit / 32] & (1u << (bit & 31))) == (bit >= subgroup_id));
+			assert(bool(gl_SubgroupLtMask[bit / 32] & (1u << (bit & 31))) == (bit < subgroup_id));
+			assert(bool(gl_SubgroupLeMask[bit / 32] & (1u << (bit & 31))) == (bit <= subgroup_id));
+		}
+	}
+}
+
diff --git a/update_test_shaders.sh b/update_test_shaders.sh
index 712c3eec5d..4bc87a1564 100755
--- a/update_test_shaders.sh
+++ b/update_test_shaders.sh
@@ -9,9 +9,12 @@ echo "Using spirv-opt in: $(which spirv-opt)."
 
 ./test_shaders.py shaders --update || exit 1
 ./test_shaders.py shaders --update --opt || exit 1
-./test_shaders.py shaders-msl --msl --update || exit 1
-./test_shaders.py shaders-msl --msl --update --opt || exit 1
-./test_shaders.py shaders-msl-no-opt --msl --update || exit 1
-./test_shaders.py shaders-hlsl --hlsl --update || exit 1
-./test_shaders.py shaders-hlsl --hlsl --update --opt || exit 1
+./test_shaders.py shaders-no-opt --update || exit 1
+./test_shaders.py shaders-msl --update --msl || exit 1
+./test_shaders.py shaders-msl --update --msl --opt || exit 1
+./test_shaders.py shaders-msl-no-opt --update --msl || exit 1
+./test_shaders.py shaders-hlsl --update --hlsl || exit 1
+./test_shaders.py shaders-hlsl --update --hlsl --opt || exit 1
+./test_shaders.py shaders-hlsl-no-opt --update --hlsl || exit 1
+