From f48c1800c0122e8e7957f572c359e4594932cd4c Mon Sep 17 00:00:00 2001
From: aliaspider <aliaspider@gmail.com>
Date: Tue, 6 Mar 2018 04:10:13 +0100
Subject: [PATCH] Squashed 'deps/SPIRV-Cross/' changes from 33c5cd9..c9516fa

c9516fa Fix OSX Travis.
94cd777 Move `rename_interface_variable` to util
467c956 Merge pull request #480 from KhronosGroup/fix-476
1e4db56 Run format_all.sh.
eecbeaa Take execution model into account for entry point methods.
337150f Merge pull request #479 from KhronosGroup/fix-477
3c1b147 Support Invariant for BuiltInPosition.
cae1722 Merge pull request #475 from KhronosGroup/fix-convert-cast
e69b1ae Fix implicit conversion bug.
b39c063 Merge pull request #474 from KhronosGroup/fix-pointer-overload
fda36f8 Fix function overload when SPIR-V overloads on pointer type.
a61e728 Merge pull request #472 from KhronosGroup/fix-446
6a12ff7 Fix multiple declaration of spvDet2x2 on MSL.
b380a21 Implement MatrixInverse on HLSL.
6066fe4 Merge pull request #471 from KhronosGroup/fix-462
f6d08e6 Add workaround for PointCoord builtin on HLSL.
fb196c2 Merge pull request #470 from KhronosGroup/fix-459
dd603ea Support spec constant array size in blocks.
32b1c7d Merge pull request #469 from KhronosGroup/fix-454
a04bdcc Handle overloaded functions which share the same OpName.
f887b20 Merge pull request #468 from KhronosGroup/fix-461
4543dac Run format_all.sh.
047ad7d Support special float constants (NaN/Inf).
3f64f56 Merge pull request #465 from KhronosGroup/fix-445
0673f27 Fix incorrect loop range.
114c2c5 Add clip/cull-distance support to HLSL.
fb3f92a Overhaul clip/cull distance support in GLSL.
11bbccb Merge pull request #464 from billhollings/master
50ef6cd CompilerMSL remove incorrect packing of non-interface type-aliased structs.
3925fe8 Merge pull request #460 from KhronosGroup/fix-424
47b3742 Run format_all.sh.
1a2e4de Add test for texelFetch without sampler.
4db7061 Begin implementing texelFetch(texture2D) workaround on GLSL.
d871a89 Remove "EXPERIMENTAL" tag from MSL/HLSL.
218b0a5 Merge pull request #455 from KhronosGroup/fix-452
8a3bef2 Add OpFRem tests.
54a065b Run format_all.sh.
3fa6cc8 Implement FRem.
a9a6bca Merge pull request #453 from KhronosGroup/fix-444
843e34b Add IsFrontFace support to HLSL.
3d0c61f Merge pull request #451 from KhronosGroup/fix-437
636cc30 Fix case where hoisted temporaries were used before being declared.
4d1c5ad Merge pull request #450 from jodavis42/OpCopyMemoryCrash
b4b629b Null crash fix in OpCopyMemory.
6a0f698 Set width when creating NumWorkgroups buffer.
2bda0d5 Merge pull request #449 from billhollings/master
2964e32 CompilerMSL support gl_SampleMask and convert it to scalar uint from array.
64d3083 Merge pull request #448 from billhollings/master
b453348 Merge branch 'master' of https://github.com/billhollings/SPIRV-Cross
607b0d6 CompilerMSL support smaller offsets for 3-row row-major matrices.
7be30aa Merge pull request #442 from KhronosGroup/fix-435
a3104e9 Also check that type we load is an image.
a3ae861 Fix depth image usage in MSL for separate image/samplers.
702e086 Support passing implicit frag_coord arguments down to functions.
0912427 Begin implementing subpassLoad in MSL.
18a594a Implement subpass input support in HLSL.
f4bce68 Merge pull request #433 from KhronosGroup/fix-428
c9db3e5 Overload on constant storage.
b2c9487 Attempt to deduce constant/thread storage.
1a9c960 MSL cannot declare inline arrays except in certain cases.
156dd90 Implicit return value takes thread storage.
d89b790 Fix wrong function declaration in MSL.
00ccd59 Return arrays in HLSL/MSL by writing to an output variable instead.
9fa91f7 Support returning arrays from functions in GLSL/MSL.
ed6b775 Merge pull request #417 from msiglreith/root_constants_overwrite
369f5f6 Add missing include
d096f5c hlsl: Support custom root constant layout
4b58f65 Merge pull request #438 from zeux/msvc-warning-fix
24cf308 Fix MSVC 2015 warning
8b53b70 Merge pull request #434 from KhronosGroup/fix-429
18e8833 Support gl_NumWorkgroups in HLSL.
181a5fa Fix formatting after merge.
e3f4041 Fixes MSVC 2013 compilation
ef33770 Merge pull request #431 from twinaphex/master
59a12c7 Uniquely name these static functions

git-subtree-dir: deps/SPIRV-Cross
git-subtree-split: c9516fa91709b68ac20fa614625e85e0ced3f23f
---
 .travis.yml                                   |   4 +-
 CMakeLists.txt                                |   7 +-
 README.md                                     |   4 +-
 jni/Android.mk                                |   2 +-
 main.cpp                                      | 163 ++++-
 msvc/SPIRV-Cross.vcxproj                      |   2 +
 msvc/SPIRV-Cross.vcxproj.filters              |   6 +
 .../opt/shaders-hlsl/asm/frag/frem.asm.frag   |  29 +
 .../asm/frag/function-overload-alias.asm.frag |  19 +
 reference/opt/shaders-hlsl/comp/inverse.comp  | 122 ++++
 .../comp/num-workgroups-alone.comp            |  16 +
 .../comp/num-workgroups-with-builtins.comp    |  23 +
 .../shaders-hlsl/frag/clip-cull-distance.frag |  30 +
 .../opt/shaders-hlsl/frag/front-facing.frag   |  39 ++
 .../shaders-hlsl/frag/inf-nan-constant.frag   |  19 +
 .../frag/input-attachment-ms.frag             |  32 +
 .../shaders-hlsl/frag/input-attachment.frag   |  29 +
 .../shaders-hlsl/frag/point-coord-compat.frag |  19 +
 .../frag/spec-constant-block-size.frag        |  33 +
 .../shaders-hlsl/vert/clip-cull-distance.vert |  28 +
 .../shaders-hlsl/vert/point-size-compat.vert  |   2 +-
 .../opt/shaders-hlsl/vert/return-array.vert   |  31 +
 .../opt/shaders-msl/asm/frag/frem.asm.frag    |  23 +
 .../asm/frag/function-overload-alias.asm.frag |  17 +
 reference/opt/shaders-msl/comp/inverse.comp   | 123 ++++
 .../opt/shaders-msl/comp/struct-nested.comp   |  29 +
 .../opt/shaders-msl/comp/struct-packing.comp  |   7 +-
 .../opt/shaders-msl/frag/constant-array.frag  |  22 +
 .../shaders-msl/frag/constant-composites.frag |  19 +
 .../opt/shaders-msl/frag/front-facing.frag    |  30 +
 .../shaders-msl/frag/inf-nan-constant.frag    |  17 +
 .../shaders-msl/frag/input-attachment-ms.frag |  17 +
 .../shaders-msl/frag/input-attachment.frag    |  17 +
 .../sample-depth-separate-image-sampler.frag  |  17 +
 .../opt/shaders-msl/frag/sample-mask.frag     |  19 +
 .../frag/spec-constant-block-size.frag        |  27 +
 reference/opt/shaders-msl/vert/functions.vert |  12 +-
 .../opt/shaders-msl/vert/packed_matrix.vert   |  56 ++
 .../opt/shaders-msl/vert/return-array.vert    |  24 +
 reference/opt/shaders/asm/frag/frem.asm.frag  |  13 +
 .../asm/frag/function-overload-alias.asm.frag |  11 +
 .../frag/image-fetch-no-sampler.asm.vk.frag   |  13 +
 .../image-fetch-no-sampler.asm.vk.frag.vk     |  14 +
 .../shaders/asm/vert/invariant-block.asm.vert |   9 +
 .../asm/vert/invariant-block.sso.asm.vert     |  17 +
 .../opt/shaders/asm/vert/invariant.asm.vert   |   9 +
 .../shaders/asm/vert/invariant.sso.asm.vert   |  14 +
 .../frag/clip-cull-distance.desktop.frag      |  12 +
 .../frag/inf-nan-constant-double.desktop.frag |  11 +
 .../vert/clip-cull-distance.desktop.sso.vert  |  20 +
 .../vert/clip-cull-distance.desktop.vert      |  14 +-
 reference/opt/shaders/frag/front-facing.frag  |  20 +
 ...temporary-use-continue-block-as-value.frag |  37 ++
 .../opt/shaders/frag/inf-nan-constant.frag    |  11 +
 reference/opt/shaders/vert/return-array.vert  |   9 +
 .../frag/spec-constant-block-size.vk.frag     |  17 +
 .../frag/spec-constant-block-size.vk.frag.vk  |  19 +
 reference/shaders-hlsl/asm/frag/frem.asm.frag |  29 +
 .../asm/frag/function-overload-alias.asm.frag |  47 ++
 reference/shaders-hlsl/comp/inverse.comp      | 122 ++++
 .../comp/num-workgroups-alone.comp            |  16 +
 .../comp/num-workgroups-with-builtins.comp    |  23 +
 .../shaders-hlsl/frag/clip-cull-distance.frag |  30 +
 reference/shaders-hlsl/frag/front-facing.frag |  39 ++
 .../shaders-hlsl/frag/inf-nan-constant.frag   |  19 +
 .../frag/input-attachment-ms.frag             |  37 ++
 .../shaders-hlsl/frag/input-attachment.frag   |  34 +
 .../shaders-hlsl/frag/point-coord-compat.frag |  19 +
 .../frag/spec-constant-block-size.frag        |  33 +
 .../shaders-hlsl/vert/clip-cull-distance.vert |  28 +
 .../shaders-hlsl/vert/point-size-compat.vert  |   2 +-
 reference/shaders-hlsl/vert/return-array.vert |  48 ++
 reference/shaders-msl/asm/frag/frem.asm.frag  |  23 +
 .../asm/frag/function-overload-alias.asm.frag |  47 ++
 .../asm/frag/op-constant-null.asm.frag        |  18 +
 reference/shaders-msl/comp/inverse.comp       | 123 ++++
 reference/shaders-msl/comp/struct-nested.comp |  27 +
 .../shaders-msl/comp/struct-packing.comp      |   7 +-
 .../shaders-msl/frag/constant-array.frag      |  20 +
 .../shaders-msl/frag/constant-composites.frag |  19 +
 reference/shaders-msl/frag/front-facing.frag  |  30 +
 .../shaders-msl/frag/inf-nan-constant.frag    |  17 +
 .../shaders-msl/frag/input-attachment-ms.frag |  24 +
 .../shaders-msl/frag/input-attachment.frag    |  24 +
 .../sample-depth-separate-image-sampler.frag  |  29 +
 reference/shaders-msl/frag/sample-mask.frag   |  19 +
 .../frag/spec-constant-block-size.frag        |  27 +
 reference/shaders-msl/vert/functions.vert     |  12 +-
 reference/shaders-msl/vert/packed_matrix.vert |  56 ++
 reference/shaders-msl/vert/return-array.vert  |  58 ++
 reference/shaders/asm/frag/frem.asm.frag      |  13 +
 .../asm/frag/function-overload-alias.asm.frag |  39 ++
 .../frag/image-fetch-no-sampler.asm.vk.frag   |  38 ++
 .../image-fetch-no-sampler.asm.vk.frag.vk     |  37 ++
 .../shaders/asm/vert/invariant-block.asm.vert |   9 +
 .../asm/vert/invariant-block.sso.asm.vert     |  17 +
 reference/shaders/asm/vert/invariant.asm.vert |  14 +
 .../shaders/asm/vert/invariant.sso.asm.vert   |  19 +
 .../frag/clip-cull-distance.desktop.frag      |  12 +
 .../frag/inf-nan-constant-double.desktop.frag |  11 +
 .../vert/clip-cull-distance.desktop.sso.vert  |  20 +
 .../vert/clip-cull-distance.desktop.vert      |  14 +-
 reference/shaders/frag/front-facing.frag      |  20 +
 ...temporary-use-continue-block-as-value.frag |  31 +
 reference/shaders/frag/inf-nan-constant.frag  |  11 +
 reference/shaders/vert/return-array.vert      |  23 +
 .../frag/spec-constant-block-size.vk.frag     |  17 +
 .../frag/spec-constant-block-size.vk.frag.vk  |  19 +
 shaders-hlsl/asm/frag/frem.asm.frag           |  41 ++
 .../asm/frag/function-overload-alias.asm.frag | 153 +++++
 shaders-hlsl/comp/inverse.comp                |  23 +
 shaders-hlsl/comp/num-workgroups-alone.comp   |  13 +
 .../comp/num-workgroups-with-builtins.comp    |  13 +
 shaders-hlsl/frag/clip-cull-distance.frag     |  12 +
 shaders-hlsl/frag/front-facing.frag           |  14 +
 shaders-hlsl/frag/inf-nan-constant.frag       |  14 +
 shaders-hlsl/frag/input-attachment-ms.frag    |  15 +
 shaders-hlsl/frag/input-attachment.frag       |  16 +
 shaders-hlsl/frag/point-coord-compat.frag     |  10 +
 .../frag/spec-constant-block-size.frag        |  17 +
 shaders-hlsl/vert/clip-cull-distance.vert     |  11 +
 shaders-hlsl/vert/point-size-compat.vert      |   3 +-
 shaders-hlsl/vert/return-array.vert           |  22 +
 shaders-msl/asm/frag/frem.asm.frag            |  41 ++
 .../asm/frag/function-overload-alias.asm.frag | 153 +++++
 shaders-msl/comp/inverse.comp                 |  23 +
 shaders-msl/comp/struct-nested.comp           |  20 +
 shaders-msl/comp/struct-packing.comp          |   1 +
 shaders-msl/frag/front-facing.frag            |  14 +
 shaders-msl/frag/inf-nan-constant.frag        |  14 +
 shaders-msl/frag/input-attachment-ms.frag     |  15 +
 shaders-msl/frag/input-attachment.frag        |  16 +
 .../sample-depth-separate-image-sampler.frag  |  22 +
 shaders-msl/frag/sample-mask.frag             |  10 +
 .../frag/spec-constant-block-size.frag        |  17 +
 shaders-msl/vert/packed_matrix.vert           |  41 ++
 shaders-msl/vert/return-array.vert            |  22 +
 shaders-other/README.md                       |   4 +
 shaders-other/aliased-entry-point-names.asm   |  60 ++
 shaders/asm/frag/frem.asm.frag                |  41 ++
 .../asm/frag/function-overload-alias.asm.frag | 153 +++++
 .../frag/image-fetch-no-sampler.asm.vk.frag   | 163 +++++
 shaders/asm/vert/invariant-block.asm.vert     |  44 ++
 shaders/asm/vert/invariant-block.sso.asm.vert |  44 ++
 shaders/asm/vert/invariant.asm.vert           |  34 +
 shaders/asm/vert/invariant.sso.asm.vert       |  34 +
 .../frag/clip-cull-distance.desktop.frag      |  12 +
 .../frag/inf-nan-constant-double.desktop.frag |  13 +
 .../vert/clip-cull-distance.desktop.sso.vert  |  13 +
 .../vert/clip-cull-distance.desktop.vert      |  13 +-
 shaders/frag/front-facing.frag                |  14 +
 ...temporary-use-continue-block-as-value.frag |  24 +
 shaders/frag/inf-nan-constant.frag            |  14 +
 shaders/vert/return-array.vert                |  22 +
 .../frag/spec-constant-block-size.vk.frag     |  17 +
 spirv_common.hpp                              |  37 +-
 spirv_cpp.cpp                                 |   3 +
 spirv_cross.cpp                               | 303 ++++++++-
 spirv_cross.hpp                               |  79 ++-
 spirv_cross_util.cpp                          |  51 ++
 spirv_cross_util.hpp                          |  28 +
 spirv_glsl.cpp                                | 480 ++++++++++++--
 spirv_glsl.hpp                                |  17 +-
 spirv_hlsl.cpp                                | 596 ++++++++++++++++--
 spirv_hlsl.hpp                                |  50 +-
 spirv_msl.cpp                                 | 543 +++++++++++-----
 spirv_msl.hpp                                 |  17 +-
 167 files changed, 6221 insertions(+), 363 deletions(-)
 create mode 100644 reference/opt/shaders-hlsl/asm/frag/frem.asm.frag
 create mode 100644 reference/opt/shaders-hlsl/asm/frag/function-overload-alias.asm.frag
 create mode 100644 reference/opt/shaders-hlsl/comp/inverse.comp
 create mode 100644 reference/opt/shaders-hlsl/comp/num-workgroups-alone.comp
 create mode 100644 reference/opt/shaders-hlsl/comp/num-workgroups-with-builtins.comp
 create mode 100644 reference/opt/shaders-hlsl/frag/clip-cull-distance.frag
 create mode 100644 reference/opt/shaders-hlsl/frag/front-facing.frag
 create mode 100644 reference/opt/shaders-hlsl/frag/inf-nan-constant.frag
 create mode 100644 reference/opt/shaders-hlsl/frag/input-attachment-ms.frag
 create mode 100644 reference/opt/shaders-hlsl/frag/input-attachment.frag
 create mode 100644 reference/opt/shaders-hlsl/frag/point-coord-compat.frag
 create mode 100644 reference/opt/shaders-hlsl/frag/spec-constant-block-size.frag
 create mode 100644 reference/opt/shaders-hlsl/vert/clip-cull-distance.vert
 create mode 100644 reference/opt/shaders-hlsl/vert/return-array.vert
 create mode 100644 reference/opt/shaders-msl/asm/frag/frem.asm.frag
 create mode 100644 reference/opt/shaders-msl/asm/frag/function-overload-alias.asm.frag
 create mode 100644 reference/opt/shaders-msl/comp/inverse.comp
 create mode 100644 reference/opt/shaders-msl/comp/struct-nested.comp
 create mode 100644 reference/opt/shaders-msl/frag/front-facing.frag
 create mode 100644 reference/opt/shaders-msl/frag/inf-nan-constant.frag
 create mode 100644 reference/opt/shaders-msl/frag/input-attachment-ms.frag
 create mode 100644 reference/opt/shaders-msl/frag/input-attachment.frag
 create mode 100644 reference/opt/shaders-msl/frag/sample-depth-separate-image-sampler.frag
 create mode 100644 reference/opt/shaders-msl/frag/sample-mask.frag
 create mode 100644 reference/opt/shaders-msl/frag/spec-constant-block-size.frag
 create mode 100644 reference/opt/shaders-msl/vert/packed_matrix.vert
 create mode 100644 reference/opt/shaders-msl/vert/return-array.vert
 create mode 100644 reference/opt/shaders/asm/frag/frem.asm.frag
 create mode 100644 reference/opt/shaders/asm/frag/function-overload-alias.asm.frag
 create mode 100644 reference/opt/shaders/asm/frag/image-fetch-no-sampler.asm.vk.frag
 create mode 100644 reference/opt/shaders/asm/frag/image-fetch-no-sampler.asm.vk.frag.vk
 create mode 100644 reference/opt/shaders/asm/vert/invariant-block.asm.vert
 create mode 100644 reference/opt/shaders/asm/vert/invariant-block.sso.asm.vert
 create mode 100644 reference/opt/shaders/asm/vert/invariant.asm.vert
 create mode 100644 reference/opt/shaders/asm/vert/invariant.sso.asm.vert
 create mode 100644 reference/opt/shaders/desktop-only/frag/clip-cull-distance.desktop.frag
 create mode 100644 reference/opt/shaders/desktop-only/frag/inf-nan-constant-double.desktop.frag
 create mode 100644 reference/opt/shaders/desktop-only/vert/clip-cull-distance.desktop.sso.vert
 create mode 100644 reference/opt/shaders/frag/front-facing.frag
 create mode 100644 reference/opt/shaders/frag/hoisted-temporary-use-continue-block-as-value.frag
 create mode 100644 reference/opt/shaders/frag/inf-nan-constant.frag
 create mode 100644 reference/opt/shaders/vert/return-array.vert
 create mode 100644 reference/opt/shaders/vulkan/frag/spec-constant-block-size.vk.frag
 create mode 100644 reference/opt/shaders/vulkan/frag/spec-constant-block-size.vk.frag.vk
 create mode 100644 reference/shaders-hlsl/asm/frag/frem.asm.frag
 create mode 100644 reference/shaders-hlsl/asm/frag/function-overload-alias.asm.frag
 create mode 100644 reference/shaders-hlsl/comp/inverse.comp
 create mode 100644 reference/shaders-hlsl/comp/num-workgroups-alone.comp
 create mode 100644 reference/shaders-hlsl/comp/num-workgroups-with-builtins.comp
 create mode 100644 reference/shaders-hlsl/frag/clip-cull-distance.frag
 create mode 100644 reference/shaders-hlsl/frag/front-facing.frag
 create mode 100644 reference/shaders-hlsl/frag/inf-nan-constant.frag
 create mode 100644 reference/shaders-hlsl/frag/input-attachment-ms.frag
 create mode 100644 reference/shaders-hlsl/frag/input-attachment.frag
 create mode 100644 reference/shaders-hlsl/frag/point-coord-compat.frag
 create mode 100644 reference/shaders-hlsl/frag/spec-constant-block-size.frag
 create mode 100644 reference/shaders-hlsl/vert/clip-cull-distance.vert
 create mode 100644 reference/shaders-hlsl/vert/return-array.vert
 create mode 100644 reference/shaders-msl/asm/frag/frem.asm.frag
 create mode 100644 reference/shaders-msl/asm/frag/function-overload-alias.asm.frag
 create mode 100644 reference/shaders-msl/comp/inverse.comp
 create mode 100644 reference/shaders-msl/comp/struct-nested.comp
 create mode 100644 reference/shaders-msl/frag/front-facing.frag
 create mode 100644 reference/shaders-msl/frag/inf-nan-constant.frag
 create mode 100644 reference/shaders-msl/frag/input-attachment-ms.frag
 create mode 100644 reference/shaders-msl/frag/input-attachment.frag
 create mode 100644 reference/shaders-msl/frag/sample-depth-separate-image-sampler.frag
 create mode 100644 reference/shaders-msl/frag/sample-mask.frag
 create mode 100644 reference/shaders-msl/frag/spec-constant-block-size.frag
 create mode 100644 reference/shaders-msl/vert/packed_matrix.vert
 create mode 100644 reference/shaders-msl/vert/return-array.vert
 create mode 100644 reference/shaders/asm/frag/frem.asm.frag
 create mode 100644 reference/shaders/asm/frag/function-overload-alias.asm.frag
 create mode 100644 reference/shaders/asm/frag/image-fetch-no-sampler.asm.vk.frag
 create mode 100644 reference/shaders/asm/frag/image-fetch-no-sampler.asm.vk.frag.vk
 create mode 100644 reference/shaders/asm/vert/invariant-block.asm.vert
 create mode 100644 reference/shaders/asm/vert/invariant-block.sso.asm.vert
 create mode 100644 reference/shaders/asm/vert/invariant.asm.vert
 create mode 100644 reference/shaders/asm/vert/invariant.sso.asm.vert
 create mode 100644 reference/shaders/desktop-only/frag/clip-cull-distance.desktop.frag
 create mode 100644 reference/shaders/desktop-only/frag/inf-nan-constant-double.desktop.frag
 create mode 100644 reference/shaders/desktop-only/vert/clip-cull-distance.desktop.sso.vert
 create mode 100644 reference/shaders/frag/front-facing.frag
 create mode 100644 reference/shaders/frag/hoisted-temporary-use-continue-block-as-value.frag
 create mode 100644 reference/shaders/frag/inf-nan-constant.frag
 create mode 100644 reference/shaders/vert/return-array.vert
 create mode 100644 reference/shaders/vulkan/frag/spec-constant-block-size.vk.frag
 create mode 100644 reference/shaders/vulkan/frag/spec-constant-block-size.vk.frag.vk
 create mode 100644 shaders-hlsl/asm/frag/frem.asm.frag
 create mode 100644 shaders-hlsl/asm/frag/function-overload-alias.asm.frag
 create mode 100644 shaders-hlsl/comp/inverse.comp
 create mode 100644 shaders-hlsl/comp/num-workgroups-alone.comp
 create mode 100644 shaders-hlsl/comp/num-workgroups-with-builtins.comp
 create mode 100644 shaders-hlsl/frag/clip-cull-distance.frag
 create mode 100644 shaders-hlsl/frag/front-facing.frag
 create mode 100644 shaders-hlsl/frag/inf-nan-constant.frag
 create mode 100644 shaders-hlsl/frag/input-attachment-ms.frag
 create mode 100644 shaders-hlsl/frag/input-attachment.frag
 create mode 100644 shaders-hlsl/frag/point-coord-compat.frag
 create mode 100644 shaders-hlsl/frag/spec-constant-block-size.frag
 create mode 100644 shaders-hlsl/vert/clip-cull-distance.vert
 create mode 100644 shaders-hlsl/vert/return-array.vert
 create mode 100644 shaders-msl/asm/frag/frem.asm.frag
 create mode 100644 shaders-msl/asm/frag/function-overload-alias.asm.frag
 create mode 100644 shaders-msl/comp/inverse.comp
 create mode 100644 shaders-msl/comp/struct-nested.comp
 create mode 100644 shaders-msl/frag/front-facing.frag
 create mode 100644 shaders-msl/frag/inf-nan-constant.frag
 create mode 100644 shaders-msl/frag/input-attachment-ms.frag
 create mode 100644 shaders-msl/frag/input-attachment.frag
 create mode 100644 shaders-msl/frag/sample-depth-separate-image-sampler.frag
 create mode 100644 shaders-msl/frag/sample-mask.frag
 create mode 100644 shaders-msl/frag/spec-constant-block-size.frag
 create mode 100644 shaders-msl/vert/packed_matrix.vert
 create mode 100644 shaders-msl/vert/return-array.vert
 create mode 100644 shaders-other/README.md
 create mode 100644 shaders-other/aliased-entry-point-names.asm
 create mode 100644 shaders/asm/frag/frem.asm.frag
 create mode 100644 shaders/asm/frag/function-overload-alias.asm.frag
 create mode 100644 shaders/asm/frag/image-fetch-no-sampler.asm.vk.frag
 create mode 100644 shaders/asm/vert/invariant-block.asm.vert
 create mode 100644 shaders/asm/vert/invariant-block.sso.asm.vert
 create mode 100644 shaders/asm/vert/invariant.asm.vert
 create mode 100644 shaders/asm/vert/invariant.sso.asm.vert
 create mode 100644 shaders/desktop-only/frag/clip-cull-distance.desktop.frag
 create mode 100644 shaders/desktop-only/frag/inf-nan-constant-double.desktop.frag
 create mode 100644 shaders/desktop-only/vert/clip-cull-distance.desktop.sso.vert
 create mode 100644 shaders/frag/front-facing.frag
 create mode 100644 shaders/frag/hoisted-temporary-use-continue-block-as-value.frag
 create mode 100644 shaders/frag/inf-nan-constant.frag
 create mode 100644 shaders/vert/return-array.vert
 create mode 100644 shaders/vulkan/frag/spec-constant-block-size.vk.frag
 create mode 100644 spirv_cross_util.cpp
 create mode 100644 spirv_cross_util.hpp

diff --git a/.travis.yml b/.travis.yml
index 21af2a0203..26941f43a3 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -2,7 +2,7 @@ language: cpp
 os:
   - linux
   - osx
-osx_image: xcode8.2
+osx_image: xcode8.3
 
 # Use Ubuntu 14.04 LTS (Trusty) as the Linux testing environment.
 sudo: required
@@ -13,7 +13,7 @@ env:
   - GLSLANG_REV=9c6f8cc29ba303b43ccf36deea6bb38a304f9b92 SPIRV_TOOLS_REV=e28edd458b729da7bbfd51e375feb33103709e6f
 
 before_script:
-  - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; brew install python3; fi
+  - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew upgrade python3; fi
   - git clone https://github.com/KhronosGroup/glslang.git glslang
   - git clone https://github.com/KhronosGroup/SPIRV-Tools SPIRV-Tools
   - git clone https://github.com/KhronosGroup/SPIRV-Headers.git SPIRV-Tools/external/spirv-headers
diff --git a/CMakeLists.txt b/CMakeLists.txt
index cd3f677475..659315c785 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -101,12 +101,17 @@ spirv_cross_add_library(spirv-cross-hlsl spirv_cross_hlsl STATIC
     ${CMAKE_CURRENT_SOURCE_DIR}/spirv_hlsl.hpp
     ${CMAKE_CURRENT_SOURCE_DIR}/spirv_hlsl.cpp)
 
+spirv_cross_add_library(spirv-cross-util spirv_cross_util STATIC
+    ${CMAKE_CURRENT_SOURCE_DIR}/spirv_cross_util.hpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/spirv_cross_util.cpp)
+
 add_executable(spirv-cross main.cpp)
 target_compile_options(spirv-cross PRIVATE ${spirv-compiler-options})
 target_compile_definitions(spirv-cross PRIVATE ${spirv-compiler-defines})
 
 install(TARGETS spirv-cross RUNTIME DESTINATION bin)
-target_link_libraries(spirv-cross spirv-cross-glsl spirv-cross-hlsl spirv-cross-cpp spirv-cross-msl spirv-cross-core)
+target_link_libraries(spirv-cross spirv-cross-glsl spirv-cross-hlsl spirv-cross-cpp spirv-cross-msl spirv-cross-util spirv-cross-core)
+target_link_libraries(spirv-cross-util spirv-cross-core)
 target_link_libraries(spirv-cross-glsl spirv-cross-core)
 target_link_libraries(spirv-cross-msl spirv-cross-glsl)
 target_link_libraries(spirv-cross-hlsl spirv-cross-glsl)
diff --git a/README.md b/README.md
index e1409a4bf7..a6f5231e7a 100644
--- a/README.md
+++ b/README.md
@@ -7,8 +7,8 @@ SPIRV-Cross is a tool designed for parsing and converting SPIR-V to other shader
 ## Features
 
   - Convert SPIR-V to readable, usable and efficient GLSL
-  - Convert SPIR-V to readable, usable and efficient Metal Shading Language (MSL) [EXPERIMENTAL]
-  - Convert SPIR-V to readable, usable and efficient HLSL [EXPERIMENTAL]
+  - Convert SPIR-V to readable, usable and efficient Metal Shading Language (MSL)
+  - Convert SPIR-V to readable, usable and efficient HLSL
   - Convert SPIR-V to debuggable C++ [EXPERIMENTAL]
   - Reflection API to simplify the creation of Vulkan pipeline layouts
   - Reflection API to modify and tweak OpDecorations
diff --git a/jni/Android.mk b/jni/Android.mk
index ca5014d63a..d5e94b5311 100644
--- a/jni/Android.mk
+++ b/jni/Android.mk
@@ -4,7 +4,7 @@ include $(CLEAR_VARS)
 
 LOCAL_CFLAGS += -std=c++11 -Wall -Wextra
 LOCAL_MODULE := spirv-cross
-LOCAL_SRC_FILES := ../spirv_cfg.cpp ../spirv_cross.cpp ../spirv_glsl.cpp ../spirv_msl.cpp ../spirv_cpp.cpp
+LOCAL_SRC_FILES := ../spirv_cfg.cpp ../spirv_cross.cpp ../spirv_cross_util.cpp ../spirv_glsl.cpp ../spirv_hlsl.cpp ../spirv_msl.cpp ../spirv_cpp.cpp
 LOCAL_CPP_FEATURES := exceptions
 LOCAL_ARM_MODE := arm
 LOCAL_CFLAGS := -D__STDC_LIMIT_MACROS
diff --git a/main.cpp b/main.cpp
index 49fc8653b3..4f0265d1af 100644
--- a/main.cpp
+++ b/main.cpp
@@ -15,6 +15,7 @@
  */
 
 #include "spirv_cpp.hpp"
+#include "spirv_cross_util.hpp"
 #include "spirv_glsl.hpp"
 #include "spirv_hlsl.hpp"
 #include "spirv_msl.hpp"
@@ -286,9 +287,9 @@ static void print_resources(const Compiler &compiler, const ShaderResources &res
 	uint64_t modes = compiler.get_execution_mode_mask();
 
 	fprintf(stderr, "Entry points:\n");
-	auto entry_points = compiler.get_entry_points();
+	auto entry_points = compiler.get_entry_points_and_stages();
 	for (auto &e : entry_points)
-		fprintf(stderr, "  %s (%s)\n", e.c_str(), execution_model_to_str(compiler.get_entry_point(e).model));
+		fprintf(stderr, "  %s (%s)\n", e.name.c_str(), execution_model_to_str(e.execution_model));
 	fprintf(stderr, "\n");
 
 	fprintf(stderr, "Execution modes:\n");
@@ -467,8 +468,15 @@ struct CLIArguments
 	vector<InterfaceVariableRename> interface_variable_renames;
 	vector<HLSLVertexAttributeRemap> hlsl_attr_remap;
 	string entry;
+	string entry_stage;
 
-	vector<pair<string, string>> entry_point_rename;
+	struct Rename
+	{
+		string old_name;
+		string new_name;
+		ExecutionModel execution_model;
+	};
+	vector<Rename> entry_point_rename;
 
 	uint32_t iterations = 1;
 	bool cpp = false;
@@ -491,12 +499,13 @@ static void print_help()
 	                "[--hlsl] [--shader-model] [--hlsl-enable-compat] "
 	                "[--separate-shader-objects]"
 	                "[--pls-in format input-name] [--pls-out format output-name] [--remap source_name target_name "
-	                "components] [--extension ext] [--entry name] [--remove-unused-variables] "
+	                "components] [--extension ext] [--entry name] [--stage <stage (vert, frag, geom, tesc, tese, "
+	                "comp)>] [--remove-unused-variables] "
 	                "[--flatten-multidimensional-arrays] [--no-420pack-extension] "
 	                "[--remap-variable-type <variable_name> <new_variable_type>] "
 	                "[--rename-interface-variable <in|out> <location> <new_variable_name>] "
 	                "[--set-hlsl-vertex-input-semantic <location> <semantic>] "
-	                "[--rename-entry-point <old> <new>] "
+	                "[--rename-entry-point <old> <new> <stage>] "
 	                "\n");
 }
 
@@ -584,31 +593,22 @@ static PlsFormat pls_format(const char *str)
 		return PlsNone;
 }
 
-void rename_interface_variable(Compiler &compiler, const vector<Resource> &resources,
-                               const InterfaceVariableRename &rename)
+static ExecutionModel stage_to_execution_model(const std::string &stage)
 {
-	for (auto &v : resources)
-	{
-		if (!compiler.has_decoration(v.id, spv::DecorationLocation))
-			continue;
-
-		auto loc = compiler.get_decoration(v.id, spv::DecorationLocation);
-		if (loc != rename.location)
-			continue;
-
-		auto &type = compiler.get_type(v.base_type_id);
-
-		// This is more of a friendly variant. If we need to rename interface variables, we might have to rename
-		// structs as well and make sure all the names match up.
-		if (type.basetype == SPIRType::Struct)
-		{
-			compiler.set_name(v.base_type_id, join("SPIRV_Cross_Interface_Location", rename.location));
-			for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
-				compiler.set_member_name(v.base_type_id, i, join("InterfaceMember", i));
-		}
-
-		compiler.set_name(v.id, rename.variable_name);
-	}
+	if (stage == "vert")
+		return ExecutionModelVertex;
+	else if (stage == "frag")
+		return ExecutionModelFragment;
+	else if (stage == "comp")
+		return ExecutionModelGLCompute;
+	else if (stage == "tesc")
+		return ExecutionModelTessellationControl;
+	else if (stage == "tese")
+		return ExecutionModelTessellationEvaluation;
+	else if (stage == "geom")
+		return ExecutionModelGeometry;
+	else
+		SPIRV_CROSS_THROW("Invalid stage.");
 }
 
 static int main_inner(int argc, char *argv[])
@@ -652,9 +652,11 @@ static int main_inner(int argc, char *argv[])
 	cbs.add("--rename-entry-point", [&args](CLIParser &parser) {
 		auto old_name = parser.next_string();
 		auto new_name = parser.next_string();
-		args.entry_point_rename.push_back({ old_name, new_name });
+		auto model = stage_to_execution_model(parser.next_string());
+		args.entry_point_rename.push_back({ old_name, new_name, move(model) });
 	});
 	cbs.add("--entry", [&args](CLIParser &parser) { args.entry = parser.next_string(); });
+	cbs.add("--stage", [&args](CLIParser &parser) { args.entry_stage = parser.next_string(); });
 	cbs.add("--separate-shader-objects", [&args](CLIParser &) { args.sso = true; });
 	cbs.add("--set-hlsl-vertex-input-semantic", [&args](CLIParser &parser) {
 		HLSLVertexAttributeRemap remap;
@@ -733,6 +735,7 @@ static int main_inner(int argc, char *argv[])
 	unique_ptr<CompilerGLSL> compiler;
 
 	bool combined_image_samplers = false;
+	bool build_dummy_sampler = false;
 
 	if (args.cpp)
 	{
@@ -755,6 +758,7 @@ static int main_inner(int argc, char *argv[])
 	else
 	{
 		combined_image_samplers = !args.vulkan_semantics;
+		build_dummy_sampler = true;
 		compiler = unique_ptr<CompilerGLSL>(new CompilerGLSL(read_spirv_file(args.input)));
 	}
 
@@ -770,10 +774,82 @@ static int main_inner(int argc, char *argv[])
 	}
 
 	for (auto &rename : args.entry_point_rename)
-		compiler->rename_entry_point(rename.first, rename.second);
+		compiler->rename_entry_point(rename.old_name, rename.new_name, rename.execution_model);
 
-	if (!args.entry.empty())
-		compiler->set_entry_point(args.entry);
+	auto entry_points = compiler->get_entry_points_and_stages();
+	auto entry_point = args.entry;
+	ExecutionModel model = ExecutionModelMax;
+
+	if (!args.entry_stage.empty())
+	{
+		model = stage_to_execution_model(args.entry_stage);
+		if (entry_point.empty())
+		{
+			// Just use the first entry point with this stage.
+			for (auto &e : entry_points)
+			{
+				if (e.execution_model == model)
+				{
+					entry_point = e.name;
+					break;
+				}
+			}
+
+			if (entry_point.empty())
+			{
+				fprintf(stderr, "Could not find an entry point with stage: %s\n", args.entry_stage.c_str());
+				return EXIT_FAILURE;
+			}
+		}
+		else
+		{
+			// Make sure both stage and name exists.
+			bool exists = false;
+			for (auto &e : entry_points)
+			{
+				if (e.execution_model == model && e.name == entry_point)
+				{
+					exists = true;
+					break;
+				}
+			}
+
+			if (!exists)
+			{
+				fprintf(stderr, "Could not find an entry point %s with stage: %s\n", entry_point.c_str(),
+				        args.entry_stage.c_str());
+				return EXIT_FAILURE;
+			}
+		}
+	}
+	else if (!entry_point.empty())
+	{
+		// Make sure there is just one entry point with this name, or the stage
+		// is ambiguous.
+		uint32_t stage_count = 0;
+		for (auto &e : entry_points)
+		{
+			if (e.name == entry_point)
+			{
+				stage_count++;
+				model = e.execution_model;
+			}
+		}
+
+		if (stage_count == 0)
+		{
+			fprintf(stderr, "There is no entry point with name: %s\n", entry_point.c_str());
+			return EXIT_FAILURE;
+		}
+		else if (stage_count > 1)
+		{
+			fprintf(stderr, "There is more than one entry point with name: %s. Use --stage.\n", entry_point.c_str());
+			return EXIT_FAILURE;
+		}
+	}
+
+	if (!entry_point.empty())
+		compiler->set_entry_point(entry_point, model);
 
 	if (!args.set_version && !compiler->get_options().version)
 	{
@@ -816,10 +892,14 @@ static int main_inner(int argc, char *argv[])
 		{
 			// Enable all compat options.
 			hlsl_opts.point_size_compat = true;
+			hlsl_opts.point_coord_compat = true;
 		}
 		hlsl->set_options(hlsl_opts);
 	}
 
+	if (build_dummy_sampler)
+		compiler->build_dummy_sampler_for_combined_images();
+
 	ShaderResources res;
 	if (args.remove_unused)
 	{
@@ -858,9 +938,11 @@ static int main_inner(int argc, char *argv[])
 	for (auto &rename : args.interface_variable_renames)
 	{
 		if (rename.storageClass == StorageClassInput)
-			rename_interface_variable(*compiler, res.stage_inputs, rename);
+			spirv_cross_util::rename_interface_variable(*compiler, res.stage_inputs, rename.location,
+			                                            rename.variable_name);
 		else if (rename.storageClass == StorageClassOutput)
-			rename_interface_variable(*compiler, res.stage_outputs, rename);
+			spirv_cross_util::rename_interface_variable(*compiler, res.stage_outputs, rename.location,
+			                                            rename.variable_name);
 		else
 		{
 			fprintf(stderr, "error at --rename-interface-variable <in|out> ...\n");
@@ -887,6 +969,17 @@ static int main_inner(int argc, char *argv[])
 		}
 	}
 
+	if (args.hlsl)
+	{
+		auto *hlsl_compiler = static_cast<CompilerHLSL *>(compiler.get());
+		uint32_t new_builtin = hlsl_compiler->remap_num_workgroups_builtin();
+		if (new_builtin)
+		{
+			hlsl_compiler->set_decoration(new_builtin, DecorationDescriptorSet, 0);
+			hlsl_compiler->set_decoration(new_builtin, DecorationBinding, 0);
+		}
+	}
+
 	string glsl;
 	for (uint32_t i = 0; i < args.iterations; i++)
 	{
diff --git a/msvc/SPIRV-Cross.vcxproj b/msvc/SPIRV-Cross.vcxproj
index 8c57633e94..6040e2e116 100644
--- a/msvc/SPIRV-Cross.vcxproj
+++ b/msvc/SPIRV-Cross.vcxproj
@@ -130,6 +130,7 @@
     <ClCompile Include="..\spirv_hlsl.cpp" />
     <ClCompile Include="..\spirv_msl.cpp" />
     <ClCompile Include="..\spirv_cfg.cpp" />
+    <ClCompile Include="..\spirv_cross_util.cpp" />
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="..\GLSL.std.450.h" />
@@ -141,6 +142,7 @@
     <ClInclude Include="..\spirv_hlsl.hpp" />
     <ClInclude Include="..\spirv_msl.hpp" />
     <ClInclude Include="..\spirv_cfg.hpp" />
+    <ClInclude Include="..\spirv_cross_util.hpp" />
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
diff --git a/msvc/SPIRV-Cross.vcxproj.filters b/msvc/SPIRV-Cross.vcxproj.filters
index c9edf4608b..f853c08b5b 100644
--- a/msvc/SPIRV-Cross.vcxproj.filters
+++ b/msvc/SPIRV-Cross.vcxproj.filters
@@ -36,6 +36,9 @@
     <ClCompile Include="..\spirv_hlsl.cpp">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\spirv_cross_util.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="..\GLSL.std.450.h">
@@ -65,5 +68,8 @@
     <ClInclude Include="..\spirv_hlsl.hpp">
       <Filter>Header Files</Filter>
     </ClInclude>
+    <ClInclude Include="..\spirv_cross_util.hpp">
+      <Filter>Header Files</Filter>
+    </ClInclude>
   </ItemGroup>
 </Project>
diff --git a/reference/opt/shaders-hlsl/asm/frag/frem.asm.frag b/reference/opt/shaders-hlsl/asm/frag/frem.asm.frag
new file mode 100644
index 0000000000..67998c56a7
--- /dev/null
+++ b/reference/opt/shaders-hlsl/asm/frag/frem.asm.frag
@@ -0,0 +1,29 @@
+static float4 FragColor;
+static float4 vA;
+static float4 vB;
+
+struct SPIRV_Cross_Input
+{
+    float4 vA : TEXCOORD0;
+    float4 vB : TEXCOORD1;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    FragColor = fmod(vA, vB);
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    vA = stage_input.vA;
+    vB = stage_input.vB;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/opt/shaders-hlsl/asm/frag/function-overload-alias.asm.frag b/reference/opt/shaders-hlsl/asm/frag/function-overload-alias.asm.frag
new file mode 100644
index 0000000000..432915da36
--- /dev/null
+++ b/reference/opt/shaders-hlsl/asm/frag/function-overload-alias.asm.frag
@@ -0,0 +1,19 @@
+static float4 FragColor;
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    FragColor = (((1.0f.xxxx + 1.0f.xxxx) + (1.0f.xxx.xyzz + 1.0f.xxxx)) + (1.0f.xxxx + 2.0f.xxxx)) + (1.0f.xx.xyxy + 2.0f.xxxx);
+}
+
+SPIRV_Cross_Output main()
+{
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/opt/shaders-hlsl/comp/inverse.comp b/reference/opt/shaders-hlsl/comp/inverse.comp
new file mode 100644
index 0000000000..3be954a6f6
--- /dev/null
+++ b/reference/opt/shaders-hlsl/comp/inverse.comp
@@ -0,0 +1,122 @@
+RWByteAddressBuffer _15 : register(u0);
+ByteAddressBuffer _20 : register(t1);
+
+// Returns the inverse of a matrix, by using the algorithm of calculating the classical
+// adjoint and dividing by the determinant. The contents of the matrix are changed.
+float2x2 SPIRV_Cross_Inverse(float2x2 m)
+{
+    float2x2 adj;	// The adjoint matrix (inverse after dividing by determinant)
+
+    // Create the transpose of the cofactors, as the classical adjoint of the matrix.
+    adj[0][0] =  m[1][1];
+    adj[0][1] = -m[0][1];
+
+    adj[1][0] = -m[1][0];
+    adj[1][1] =  m[0][0];
+
+    // Calculate the determinant as a combination of the cofactors of the first row.
+    float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]);
+
+    // Divide the classical adjoint matrix by the determinant.
+    // If determinant is zero, matrix is not invertable, so leave it unchanged.
+    return (det != 0.0f) ? (adj * (1.0f / det)) : m;
+}
+
+// Returns the determinant of a 2x2 matrix.
+float SPIRV_Cross_Det2x2(float a1, float a2, float b1, float b2)
+{
+    return a1 * b2 - b1 * a2;
+}
+
+// Returns the inverse of a matrix, by using the algorithm of calculating the classical
+// adjoint and dividing by the determinant. The contents of the matrix are changed.
+float3x3 SPIRV_Cross_Inverse(float3x3 m)
+{
+    float3x3 adj;	// The adjoint matrix (inverse after dividing by determinant)
+
+    // Create the transpose of the cofactors, as the classical adjoint of the matrix.
+    adj[0][0] =  SPIRV_Cross_Det2x2(m[1][1], m[1][2], m[2][1], m[2][2]);
+    adj[0][1] = -SPIRV_Cross_Det2x2(m[0][1], m[0][2], m[2][1], m[2][2]);
+    adj[0][2] =  SPIRV_Cross_Det2x2(m[0][1], m[0][2], m[1][1], m[1][2]);
+
+    adj[1][0] = -SPIRV_Cross_Det2x2(m[1][0], m[1][2], m[2][0], m[2][2]);
+    adj[1][1] =  SPIRV_Cross_Det2x2(m[0][0], m[0][2], m[2][0], m[2][2]);
+    adj[1][2] = -SPIRV_Cross_Det2x2(m[0][0], m[0][2], m[1][0], m[1][2]);
+
+    adj[2][0] =  SPIRV_Cross_Det2x2(m[1][0], m[1][1], m[2][0], m[2][1]);
+    adj[2][1] = -SPIRV_Cross_Det2x2(m[0][0], m[0][1], m[2][0], m[2][1]);
+    adj[2][2] =  SPIRV_Cross_Det2x2(m[0][0], m[0][1], m[1][0], m[1][1]);
+
+    // Calculate the determinant as a combination of the cofactors of the first row.
+    float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]);
+
+    // Divide the classical adjoint matrix by the determinant.
+    // If determinant is zero, matrix is not invertable, so leave it unchanged.
+    return (det != 0.0f) ? (adj * (1.0f / det)) : m;
+}
+
+// Returns the determinant of a 3x3 matrix.
+float SPIRV_Cross_Det3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, float c2, float c3)
+{
+    return a1 * SPIRV_Cross_Det2x2(b2, b3, c2, c3) - b1 * SPIRV_Cross_Det2x2(a2, a3, c2, c3) + c1 * SPIRV_Cross_Det2x2(a2, a3, b2, b3);
+}
+
+// Returns the inverse of a matrix, by using the algorithm of calculating the classical
+// adjoint and dividing by the determinant. The contents of the matrix are changed.
+float4x4 SPIRV_Cross_Inverse(float4x4 m)
+{
+    float4x4 adj;	// The adjoint matrix (inverse after dividing by determinant)
+
+    // Create the transpose of the cofactors, as the classical adjoint of the matrix.
+    adj[0][0] =  SPIRV_Cross_Det3x3(m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], m[3][3]);
+    adj[0][1] = -SPIRV_Cross_Det3x3(m[0][1], m[0][2], m[0][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], m[3][3]);
+    adj[0][2] =  SPIRV_Cross_Det3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[3][1], m[3][2], m[3][3]);
+    adj[0][3] = -SPIRV_Cross_Det3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3]);
+
+    adj[1][0] = -SPIRV_Cross_Det3x3(m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], m[3][3]);
+    adj[1][1] =  SPIRV_Cross_Det3x3(m[0][0], m[0][2], m[0][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], m[3][3]);
+    adj[1][2] = -SPIRV_Cross_Det3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[3][0], m[3][2], m[3][3]);
+    adj[1][3] =  SPIRV_Cross_Det3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3]);
+
+    adj[2][0] =  SPIRV_Cross_Det3x3(m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], m[3][3]);
+    adj[2][1] = -SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], m[3][3]);
+    adj[2][2] =  SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[3][0], m[3][1], m[3][3]);
+    adj[2][3] = -SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3]);
+
+    adj[3][0] = -SPIRV_Cross_Det3x3(m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], m[3][2]);
+    adj[3][1] =  SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], m[3][2]);
+    adj[3][2] = -SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[3][0], m[3][1], m[3][2]);
+    adj[3][3] =  SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2]);
+
+    // Calculate the determinant as a combination of the cofactors of the first row.
+    float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]) + (adj[0][3] * m[3][0]);
+
+    // Divide the classical adjoint matrix by the determinant.
+    // If determinant is zero, matrix is not invertable, so leave it unchanged.
+    return (det != 0.0f) ? (adj * (1.0f / det)) : m;
+}
+
+void comp_main()
+{
+    float2x2 _23 = asfloat(uint2x2(_20.Load2(0), _20.Load2(8)));
+    float2x2 _24 = SPIRV_Cross_Inverse(_23);
+    _15.Store2(0, asuint(_24[0]));
+    _15.Store2(8, asuint(_24[1]));
+    float3x3 _29 = asfloat(uint3x3(_20.Load3(16), _20.Load3(32), _20.Load3(48)));
+    float3x3 _30 = SPIRV_Cross_Inverse(_29);
+    _15.Store3(16, asuint(_30[0]));
+    _15.Store3(32, asuint(_30[1]));
+    _15.Store3(48, asuint(_30[2]));
+    float4x4 _35 = asfloat(uint4x4(_20.Load4(64), _20.Load4(80), _20.Load4(96), _20.Load4(112)));
+    float4x4 _36 = SPIRV_Cross_Inverse(_35);
+    _15.Store4(64, asuint(_36[0]));
+    _15.Store4(80, asuint(_36[1]));
+    _15.Store4(96, asuint(_36[2]));
+    _15.Store4(112, asuint(_36[3]));
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    comp_main();
+}
diff --git a/reference/opt/shaders-hlsl/comp/num-workgroups-alone.comp b/reference/opt/shaders-hlsl/comp/num-workgroups-alone.comp
new file mode 100644
index 0000000000..1e7dd542c1
--- /dev/null
+++ b/reference/opt/shaders-hlsl/comp/num-workgroups-alone.comp
@@ -0,0 +1,16 @@
+RWByteAddressBuffer _10 : register(u0);
+cbuffer SPIRV_Cross_NumWorkgroups : register(b0)
+{
+    uint3 SPIRV_Cross_NumWorkgroups_count : packoffset(c0);
+};
+
+void comp_main()
+{
+    _10.Store3(0, SPIRV_Cross_NumWorkgroups_count);
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    comp_main();
+}
diff --git a/reference/opt/shaders-hlsl/comp/num-workgroups-with-builtins.comp b/reference/opt/shaders-hlsl/comp/num-workgroups-with-builtins.comp
new file mode 100644
index 0000000000..f44754e823
--- /dev/null
+++ b/reference/opt/shaders-hlsl/comp/num-workgroups-with-builtins.comp
@@ -0,0 +1,23 @@
+RWByteAddressBuffer _10 : register(u0);
+cbuffer SPIRV_Cross_NumWorkgroups : register(b0)
+{
+    uint3 SPIRV_Cross_NumWorkgroups_count : packoffset(c0);
+};
+
+static uint3 gl_WorkGroupID;
+struct SPIRV_Cross_Input
+{
+    uint3 gl_WorkGroupID : SV_GroupID;
+};
+
+void comp_main()
+{
+    _10.Store3(0, SPIRV_Cross_NumWorkgroups_count + gl_WorkGroupID);
+}
+
+[numthreads(1, 1, 1)]
+void main(SPIRV_Cross_Input stage_input)
+{
+    gl_WorkGroupID = stage_input.gl_WorkGroupID;
+    comp_main();
+}
diff --git a/reference/opt/shaders-hlsl/frag/clip-cull-distance.frag b/reference/opt/shaders-hlsl/frag/clip-cull-distance.frag
new file mode 100644
index 0000000000..52f1ac30b6
--- /dev/null
+++ b/reference/opt/shaders-hlsl/frag/clip-cull-distance.frag
@@ -0,0 +1,30 @@
+static float gl_ClipDistance[2];
+static float gl_CullDistance[1];
+static float FragColor;
+
+struct SPIRV_Cross_Input
+{
+    float2 gl_ClipDistance0 : SV_ClipDistance0;
+    float gl_CullDistance0 : SV_CullDistance0;
+};
+
+struct SPIRV_Cross_Output
+{
+    float FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    FragColor = (gl_ClipDistance[0] + gl_CullDistance[0]) + gl_ClipDistance[1];
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    gl_ClipDistance[0] = stage_input.gl_ClipDistance0.x;
+    gl_ClipDistance[1] = stage_input.gl_ClipDistance0.y;
+    gl_CullDistance[0] = stage_input.gl_CullDistance0.x;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/opt/shaders-hlsl/frag/front-facing.frag b/reference/opt/shaders-hlsl/frag/front-facing.frag
new file mode 100644
index 0000000000..4ed09a2bd1
--- /dev/null
+++ b/reference/opt/shaders-hlsl/frag/front-facing.frag
@@ -0,0 +1,39 @@
+static bool gl_FrontFacing;
+static float4 FragColor;
+static float4 vA;
+static float4 vB;
+
+struct SPIRV_Cross_Input
+{
+    float4 vA : TEXCOORD0;
+    float4 vB : TEXCOORD1;
+    bool gl_FrontFacing : SV_IsFrontFace;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    if (gl_FrontFacing)
+    {
+        FragColor = vA;
+    }
+    else
+    {
+        FragColor = vB;
+    }
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    gl_FrontFacing = stage_input.gl_FrontFacing;
+    vA = stage_input.vA;
+    vB = stage_input.vB;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/opt/shaders-hlsl/frag/inf-nan-constant.frag b/reference/opt/shaders-hlsl/frag/inf-nan-constant.frag
new file mode 100644
index 0000000000..06dac0bf72
--- /dev/null
+++ b/reference/opt/shaders-hlsl/frag/inf-nan-constant.frag
@@ -0,0 +1,19 @@
+static float3 FragColor;
+
+struct SPIRV_Cross_Output
+{
+    float3 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    FragColor = float3(asfloat(0x7f800000u), asfloat(0xff800000u), asfloat(0xffc00000u));
+}
+
+SPIRV_Cross_Output main()
+{
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/opt/shaders-hlsl/frag/input-attachment-ms.frag b/reference/opt/shaders-hlsl/frag/input-attachment-ms.frag
new file mode 100644
index 0000000000..e206b83798
--- /dev/null
+++ b/reference/opt/shaders-hlsl/frag/input-attachment-ms.frag
@@ -0,0 +1,32 @@
+Texture2DMS<float4> uSubpass0 : register(t0);
+Texture2DMS<float4> uSubpass1 : register(t1);
+
+static float4 gl_FragCoord;
+static int gl_SampleID;
+static float4 FragColor;
+
+struct SPIRV_Cross_Input
+{
+    float4 gl_FragCoord : SV_Position;
+    uint gl_SampleID : SV_SampleIndex;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    FragColor = (uSubpass0.Load(int2(gl_FragCoord.xy), 1) + uSubpass1.Load(int2(gl_FragCoord.xy), 2)) + uSubpass0.Load(int2(gl_FragCoord.xy), gl_SampleID);
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    gl_FragCoord = stage_input.gl_FragCoord;
+    gl_SampleID = stage_input.gl_SampleID;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/opt/shaders-hlsl/frag/input-attachment.frag b/reference/opt/shaders-hlsl/frag/input-attachment.frag
new file mode 100644
index 0000000000..d87661e5f9
--- /dev/null
+++ b/reference/opt/shaders-hlsl/frag/input-attachment.frag
@@ -0,0 +1,29 @@
+Texture2D<float4> uSubpass0 : register(t0);
+Texture2D<float4> uSubpass1 : register(t1);
+
+static float4 gl_FragCoord;
+static float4 FragColor;
+
+struct SPIRV_Cross_Input
+{
+    float4 gl_FragCoord : SV_Position;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    FragColor = uSubpass0.Load(int3(int2(gl_FragCoord.xy), 0)) + uSubpass1.Load(int3(int2(gl_FragCoord.xy), 0));
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    gl_FragCoord = stage_input.gl_FragCoord;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/opt/shaders-hlsl/frag/point-coord-compat.frag b/reference/opt/shaders-hlsl/frag/point-coord-compat.frag
new file mode 100644
index 0000000000..629153982d
--- /dev/null
+++ b/reference/opt/shaders-hlsl/frag/point-coord-compat.frag
@@ -0,0 +1,19 @@
+static float2 FragColor;
+
+struct SPIRV_Cross_Output
+{
+    float2 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    FragColor = float2(0.5f, 0.5f);
+}
+
+SPIRV_Cross_Output main()
+{
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/opt/shaders-hlsl/frag/spec-constant-block-size.frag b/reference/opt/shaders-hlsl/frag/spec-constant-block-size.frag
new file mode 100644
index 0000000000..094d509010
--- /dev/null
+++ b/reference/opt/shaders-hlsl/frag/spec-constant-block-size.frag
@@ -0,0 +1,33 @@
+static const int Value = 2;
+
+cbuffer _15 : register(b0)
+{
+    float4 _15_samples[Value] : packoffset(c0);
+};
+
+static float4 FragColor;
+static int Index;
+
+struct SPIRV_Cross_Input
+{
+    nointerpolation int Index : TEXCOORD0;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    FragColor = _15_samples[Index];
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    Index = stage_input.Index;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/opt/shaders-hlsl/vert/clip-cull-distance.vert b/reference/opt/shaders-hlsl/vert/clip-cull-distance.vert
new file mode 100644
index 0000000000..7e0d104acd
--- /dev/null
+++ b/reference/opt/shaders-hlsl/vert/clip-cull-distance.vert
@@ -0,0 +1,28 @@
+static float4 gl_Position;
+static float gl_ClipDistance[2];
+static float gl_CullDistance[1];
+struct SPIRV_Cross_Output
+{
+    float4 gl_Position : SV_Position;
+    float2 gl_ClipDistance0 : SV_ClipDistance0;
+    float gl_CullDistance0 : SV_CullDistance0;
+};
+
+void vert_main()
+{
+    gl_Position = 1.0f.xxxx;
+    gl_ClipDistance[0] = 0.0f;
+    gl_ClipDistance[1] = 0.0f;
+    gl_CullDistance[0] = 4.0f;
+}
+
+SPIRV_Cross_Output main()
+{
+    vert_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.gl_Position = gl_Position;
+    stage_output.gl_ClipDistance0.x = gl_ClipDistance[0];
+    stage_output.gl_ClipDistance0.y = gl_ClipDistance[1];
+    stage_output.gl_CullDistance0.x = gl_CullDistance[0];
+    return stage_output;
+}
diff --git a/reference/opt/shaders-hlsl/vert/point-size-compat.vert b/reference/opt/shaders-hlsl/vert/point-size-compat.vert
index 83333d0be2..95f45d02f0 100644
--- a/reference/opt/shaders-hlsl/vert/point-size-compat.vert
+++ b/reference/opt/shaders-hlsl/vert/point-size-compat.vert
@@ -8,7 +8,7 @@ struct SPIRV_Cross_Output
 void vert_main()
 {
     gl_Position = 1.0f.xxxx;
-    gl_PointSize = 10.0f;
+    gl_PointSize = 1.0f;
 }
 
 SPIRV_Cross_Output main()
diff --git a/reference/opt/shaders-hlsl/vert/return-array.vert b/reference/opt/shaders-hlsl/vert/return-array.vert
new file mode 100644
index 0000000000..902033b017
--- /dev/null
+++ b/reference/opt/shaders-hlsl/vert/return-array.vert
@@ -0,0 +1,31 @@
+static const float4 _20[2] = { 10.0f.xxxx, 20.0f.xxxx };
+
+static float4 gl_Position;
+static float4 vInput0;
+static float4 vInput1;
+
+struct SPIRV_Cross_Input
+{
+    float4 vInput0 : TEXCOORD0;
+    float4 vInput1 : TEXCOORD1;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 gl_Position : SV_Position;
+};
+
+void vert_main()
+{
+    gl_Position = 10.0f.xxxx + vInput1;
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    vInput0 = stage_input.vInput0;
+    vInput1 = stage_input.vInput1;
+    vert_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.gl_Position = gl_Position;
+    return stage_output;
+}
diff --git a/reference/opt/shaders-msl/asm/frag/frem.asm.frag b/reference/opt/shaders-msl/asm/frag/frem.asm.frag
new file mode 100644
index 0000000000..f7c1f2ce88
--- /dev/null
+++ b/reference/opt/shaders-msl/asm/frag/frem.asm.frag
@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_in
+{
+    float4 vB [[user(locn1)]];
+    float4 vA [[user(locn0)]];
+};
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    out.FragColor = fmod(in.vA, in.vB);
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/asm/frag/function-overload-alias.asm.frag b/reference/opt/shaders-msl/asm/frag/function-overload-alias.asm.frag
new file mode 100644
index 0000000000..624a3e4807
--- /dev/null
+++ b/reference/opt/shaders-msl/asm/frag/function-overload-alias.asm.frag
@@ -0,0 +1,17 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+fragment main0_out main0()
+{
+    main0_out out = {};
+    out.FragColor = (((float4(1.0) + float4(1.0)) + (float3(1.0).xyzz + float4(1.0))) + (float4(1.0) + float4(2.0))) + (float2(1.0).xyxy + float4(2.0));
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/comp/inverse.comp b/reference/opt/shaders-msl/comp/inverse.comp
new file mode 100644
index 0000000000..567dba2c21
--- /dev/null
+++ b/reference/opt/shaders-msl/comp/inverse.comp
@@ -0,0 +1,123 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct MatrixOut
+{
+    float2x2 m2out;
+    float3x3 m3out;
+    float4x4 m4out;
+};
+
+struct MatrixIn
+{
+    float2x2 m2in;
+    float3x3 m3in;
+    float4x4 m4in;
+};
+
+// Returns the determinant of a 2x2 matrix.
+inline float spvDet2x2(float a1, float a2, float b1, float b2)
+{
+    return a1 * b2 - b1 * a2;
+}
+
+// Returns the determinant of a 3x3 matrix.
+inline float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, float c2, float c3)
+{
+    return a1 * spvDet2x2(b2, b3, c2, c3) - b1 * spvDet2x2(a2, a3, c2, c3) + c1 * spvDet2x2(a2, a3, b2, b3);
+}
+
+// Returns the inverse of a matrix, by using the algorithm of calculating the classical
+// adjoint and dividing by the determinant. The contents of the matrix are changed.
+float4x4 spvInverse4x4(float4x4 m)
+{
+    float4x4 adj;	// The adjoint matrix (inverse after dividing by determinant)
+
+    // Create the transpose of the cofactors, as the classical adjoint of the matrix.
+    adj[0][0] =  spvDet3x3(m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], m[3][3]);
+    adj[0][1] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], m[3][3]);
+    adj[0][2] =  spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[3][1], m[3][2], m[3][3]);
+    adj[0][3] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3]);
+
+    adj[1][0] = -spvDet3x3(m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], m[3][3]);
+    adj[1][1] =  spvDet3x3(m[0][0], m[0][2], m[0][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], m[3][3]);
+    adj[1][2] = -spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[3][0], m[3][2], m[3][3]);
+    adj[1][3] =  spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3]);
+
+    adj[2][0] =  spvDet3x3(m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], m[3][3]);
+    adj[2][1] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], m[3][3]);
+    adj[2][2] =  spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[3][0], m[3][1], m[3][3]);
+    adj[2][3] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3]);
+
+    adj[3][0] = -spvDet3x3(m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], m[3][2]);
+    adj[3][1] =  spvDet3x3(m[0][0], m[0][1], m[0][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], m[3][2]);
+    adj[3][2] = -spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[3][0], m[3][1], m[3][2]);
+    adj[3][3] =  spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2]);
+
+    // Calculate the determinant as a combination of the cofactors of the first row.
+    float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]) + (adj[0][3] * m[3][0]);
+
+    // Divide the classical adjoint matrix by the determinant.
+    // If determinant is zero, matrix is not invertable, so leave it unchanged.
+    return (det != 0.0f) ? (adj * (1.0f / det)) : m;
+}
+
+// Returns the inverse of a matrix, by using the algorithm of calculating the classical
+// adjoint and dividing by the determinant. The contents of the matrix are changed.
+float3x3 spvInverse3x3(float3x3 m)
+{
+    float3x3 adj;	// The adjoint matrix (inverse after dividing by determinant)
+
+    // Create the transpose of the cofactors, as the classical adjoint of the matrix.
+    adj[0][0] =  spvDet2x2(m[1][1], m[1][2], m[2][1], m[2][2]);
+    adj[0][1] = -spvDet2x2(m[0][1], m[0][2], m[2][1], m[2][2]);
+    adj[0][2] =  spvDet2x2(m[0][1], m[0][2], m[1][1], m[1][2]);
+
+    adj[1][0] = -spvDet2x2(m[1][0], m[1][2], m[2][0], m[2][2]);
+    adj[1][1] =  spvDet2x2(m[0][0], m[0][2], m[2][0], m[2][2]);
+    adj[1][2] = -spvDet2x2(m[0][0], m[0][2], m[1][0], m[1][2]);
+
+    adj[2][0] =  spvDet2x2(m[1][0], m[1][1], m[2][0], m[2][1]);
+    adj[2][1] = -spvDet2x2(m[0][0], m[0][1], m[2][0], m[2][1]);
+    adj[2][2] =  spvDet2x2(m[0][0], m[0][1], m[1][0], m[1][1]);
+
+    // Calculate the determinant as a combination of the cofactors of the first row.
+    float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]);
+
+    // Divide the classical adjoint matrix by the determinant.
+    // If determinant is zero, matrix is not invertable, so leave it unchanged.
+    return (det != 0.0f) ? (adj * (1.0f / det)) : m;
+}
+
+// Returns the inverse of a matrix, by using the algorithm of calculating the classical
+// adjoint and dividing by the determinant. The contents of the matrix are changed.
+float2x2 spvInverse2x2(float2x2 m)
+{
+    float2x2 adj;	// The adjoint matrix (inverse after dividing by determinant)
+
+    // Create the transpose of the cofactors, as the classical adjoint of the matrix.
+    adj[0][0] =  m[1][1];
+    adj[0][1] = -m[0][1];
+
+    adj[1][0] = -m[1][0];
+    adj[1][1] =  m[0][0];
+
+    // Calculate the determinant as a combination of the cofactors of the first row.
+    float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]);
+
+    // Divide the classical adjoint matrix by the determinant.
+    // If determinant is zero, matrix is not invertable, so leave it unchanged.
+    return (det != 0.0f) ? (adj * (1.0f / det)) : m;
+}
+
+kernel void main0(device MatrixOut& _15 [[buffer(0)]], device MatrixIn& _20 [[buffer(1)]])
+{
+    _15.m2out = spvInverse2x2(_20.m2in);
+    _15.m3out = spvInverse3x3(_20.m3in);
+    _15.m4out = spvInverse4x4(_20.m4in);
+}
+
diff --git a/reference/opt/shaders-msl/comp/struct-nested.comp b/reference/opt/shaders-msl/comp/struct-nested.comp
new file mode 100644
index 0000000000..0741b011c7
--- /dev/null
+++ b/reference/opt/shaders-msl/comp/struct-nested.comp
@@ -0,0 +1,29 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct s1
+{
+    int a;
+};
+
+struct s2
+{
+    s1 b;
+};
+
+struct dstbuffer
+{
+    s2 test[1];
+};
+
+constant s2 _31 = {};
+
+kernel void main0(device dstbuffer& _19 [[buffer(0)]])
+{
+    s2 _30 = _31;
+    _30.b.a = 0;
+    _19.test[0].b.a = _30.b.a;
+}
+
diff --git a/reference/opt/shaders-msl/comp/struct-packing.comp b/reference/opt/shaders-msl/comp/struct-packing.comp
index cf626ce63f..f59cba5b7d 100644
--- a/reference/opt/shaders-msl/comp/struct-packing.comp
+++ b/reference/opt/shaders-msl/comp/struct-packing.comp
@@ -3,6 +3,8 @@
 
 using namespace metal;
 
+typedef float3x2 packed_float2x3;
+
 struct S0
 {
     float2 a[1];
@@ -58,8 +60,10 @@ struct SSBO1
     float3x2 m3;
     float2x2 m4;
     float2x2 m5[9];
-    float2x3 m6[4][2];
+    packed_float2x3 m6[4][2];
+    char pad10[8];
     float3x2 m7;
+    char pad11[8];
     float array[1];
 };
 
@@ -96,5 +100,6 @@ kernel void main0(device SSBO1& ssbo_430 [[buffer(0)]], device SSBO0& ssbo_140 [
     ssbo_430.content.m3s[5].c = ssbo_140.content.m3s[5].c;
     ssbo_430.content.m3s[6].c = ssbo_140.content.m3s[6].c;
     ssbo_430.content.m3s[7].c = ssbo_140.content.m3s[7].c;
+    ssbo_430.content.m1.a = ssbo_430.content.m3.a * ssbo_430.m6[1][1];
 }
 
diff --git a/reference/opt/shaders-msl/frag/constant-array.frag b/reference/opt/shaders-msl/frag/constant-array.frag
index 7a9a0dea1c..9cdd52276b 100644
--- a/reference/opt/shaders-msl/frag/constant-array.frag
+++ b/reference/opt/shaders-msl/frag/constant-array.frag
@@ -1,3 +1,5 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
 #include <metal_stdlib>
 #include <simd/simd.h>
 
@@ -9,6 +11,12 @@ struct Foobar
     float b;
 };
 
+constant float4 _37[3] = {float4(1.0), float4(2.0), float4(3.0)};
+constant float4 _49[2] = {float4(1.0), float4(2.0)};
+constant float4 _54[2] = {float4(8.0), float4(10.0)};
+constant float4 _55[2][2] = {{float4(1.0), float4(2.0)}, {float4(8.0), float4(10.0)}};
+constant Foobar _75[2] = {{10.0, 40.0}, {90.0, 70.0}};
+
 struct main0_in
 {
     int index [[user(locn0)]];
@@ -19,6 +27,20 @@ struct main0_out
     float4 FragColor [[color(0)]];
 };
 
+// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment.
+template<typename T, uint N>
+void spvArrayCopy(thread T (&dst)[N], thread const T (&src)[N])
+{
+    for (uint i = 0; i < N; dst[i] = src[i], i++);
+}
+
+// An overload for constant arrays.
+template<typename T, uint N>
+void spvArrayCopyConstant(thread T (&dst)[N], constant T (&src)[N])
+{
+    for (uint i = 0; i < N; dst[i] = src[i], i++);
+}
+
 fragment main0_out main0(main0_in in [[stage_in]])
 {
     main0_out out = {};
diff --git a/reference/opt/shaders-msl/frag/constant-composites.frag b/reference/opt/shaders-msl/frag/constant-composites.frag
index 1962db1752..d216da6d13 100644
--- a/reference/opt/shaders-msl/frag/constant-composites.frag
+++ b/reference/opt/shaders-msl/frag/constant-composites.frag
@@ -1,3 +1,5 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
 #include <metal_stdlib>
 #include <simd/simd.h>
 
@@ -9,6 +11,9 @@ struct Foo
     float b;
 };
 
+constant float _16[4] = {1.0, 4.0, 3.0, 2.0};
+constant Foo _28[2] = {{10.0, 20.0}, {30.0, 40.0}};
+
 struct main0_in
 {
     int line [[user(locn0)]];
@@ -19,6 +24,20 @@ struct main0_out
     float4 FragColor [[color(0)]];
 };
 
+// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment.
+template<typename T, uint N>
+void spvArrayCopy(thread T (&dst)[N], thread const T (&src)[N])
+{
+    for (uint i = 0; i < N; dst[i] = src[i], i++);
+}
+
+// An overload for constant arrays.
+template<typename T, uint N>
+void spvArrayCopyConstant(thread T (&dst)[N], constant T (&src)[N])
+{
+    for (uint i = 0; i < N; dst[i] = src[i], i++);
+}
+
 fragment main0_out main0(main0_in in [[stage_in]])
 {
     main0_out out = {};
diff --git a/reference/opt/shaders-msl/frag/front-facing.frag b/reference/opt/shaders-msl/frag/front-facing.frag
new file mode 100644
index 0000000000..3856498943
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/front-facing.frag
@@ -0,0 +1,30 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_in
+{
+    float4 vB [[user(locn1)]];
+    float4 vA [[user(locn0)]];
+};
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], bool gl_FrontFacing [[front_facing]])
+{
+    main0_out out = {};
+    if (gl_FrontFacing)
+    {
+        out.FragColor = in.vA;
+    }
+    else
+    {
+        out.FragColor = in.vB;
+    }
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/inf-nan-constant.frag b/reference/opt/shaders-msl/frag/inf-nan-constant.frag
new file mode 100644
index 0000000000..08df0f4c47
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/inf-nan-constant.frag
@@ -0,0 +1,17 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float3 FragColor [[color(0)]];
+};
+
+fragment main0_out main0()
+{
+    main0_out out = {};
+    out.FragColor = float3(as_type<float>(0x7f800000u), as_type<float>(0xff800000u), as_type<float>(0xffc00000u));
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/input-attachment-ms.frag b/reference/opt/shaders-msl/frag/input-attachment-ms.frag
new file mode 100644
index 0000000000..906cabbf47
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/input-attachment-ms.frag
@@ -0,0 +1,17 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+fragment main0_out main0(texture2d_ms<float> uSubpass0 [[texture(0)]], texture2d_ms<float> uSubpass1 [[texture(1)]], uint gl_SampleID [[sample_id]], float4 gl_FragCoord [[position]])
+{
+    main0_out out = {};
+    out.FragColor = (uSubpass0.read(uint2(gl_FragCoord.xy), 1) + uSubpass1.read(uint2(gl_FragCoord.xy), 2)) + uSubpass0.read(uint2(gl_FragCoord.xy), gl_SampleID);
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/input-attachment.frag b/reference/opt/shaders-msl/frag/input-attachment.frag
new file mode 100644
index 0000000000..122190648a
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/input-attachment.frag
@@ -0,0 +1,17 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+fragment main0_out main0(texture2d<float> uSubpass0 [[texture(0)]], texture2d<float> uSubpass1 [[texture(1)]], float4 gl_FragCoord [[position]])
+{
+    main0_out out = {};
+    out.FragColor = uSubpass0.read(uint2(gl_FragCoord.xy), 0) + uSubpass1.read(uint2(gl_FragCoord.xy), 0);
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/sample-depth-separate-image-sampler.frag b/reference/opt/shaders-msl/frag/sample-depth-separate-image-sampler.frag
new file mode 100644
index 0000000000..6626946c45
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/sample-depth-separate-image-sampler.frag
@@ -0,0 +1,17 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float FragColor [[color(0)]];
+};
+
+fragment main0_out main0(depth2d<float> uDepth [[texture(0)]], texture2d<float> uColor [[texture(1)]], sampler uSamplerShadow [[sampler(0)]], sampler uSampler [[sampler(1)]])
+{
+    main0_out out = {};
+    out.FragColor = uDepth.sample_compare(uSamplerShadow, float3(0.5).xy, 0.5) + uColor.sample(uSampler, float2(0.5)).x;
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/sample-mask.frag b/reference/opt/shaders-msl/frag/sample-mask.frag
new file mode 100644
index 0000000000..6a282395d6
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/sample-mask.frag
@@ -0,0 +1,19 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+    uint gl_SampleMask [[sample_mask]];
+};
+
+fragment main0_out main0()
+{
+    main0_out out = {};
+    out.FragColor = float4(1.0);
+    out.gl_SampleMask = 0;
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/spec-constant-block-size.frag b/reference/opt/shaders-msl/frag/spec-constant-block-size.frag
new file mode 100644
index 0000000000..4237d941fe
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/spec-constant-block-size.frag
@@ -0,0 +1,27 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SpecConstArray
+{
+    float4 samples[2];
+};
+
+struct main0_in
+{
+    int Index [[user(locn0)]];
+};
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], constant SpecConstArray& _15 [[buffer(0)]])
+{
+    main0_out out = {};
+    out.FragColor = _15.samples[in.Index];
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/vert/functions.vert b/reference/opt/shaders-msl/vert/functions.vert
index 8ec2484c3e..6e07667b69 100644
--- a/reference/opt/shaders-msl/vert/functions.vert
+++ b/reference/opt/shaders-msl/vert/functions.vert
@@ -75,31 +75,31 @@ inline float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b
 float4x4 spvInverse4x4(float4x4 m)
 {
     float4x4 adj;	// The adjoint matrix (inverse after dividing by determinant)
-    
+
     // Create the transpose of the cofactors, as the classical adjoint of the matrix.
     adj[0][0] =  spvDet3x3(m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], m[3][3]);
     adj[0][1] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], m[3][3]);
     adj[0][2] =  spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[3][1], m[3][2], m[3][3]);
     adj[0][3] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3]);
-    
+
     adj[1][0] = -spvDet3x3(m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], m[3][3]);
     adj[1][1] =  spvDet3x3(m[0][0], m[0][2], m[0][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], m[3][3]);
     adj[1][2] = -spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[3][0], m[3][2], m[3][3]);
     adj[1][3] =  spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3]);
-    
+
     adj[2][0] =  spvDet3x3(m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], m[3][3]);
     adj[2][1] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], m[3][3]);
     adj[2][2] =  spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[3][0], m[3][1], m[3][3]);
     adj[2][3] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3]);
-    
+
     adj[3][0] = -spvDet3x3(m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], m[3][2]);
     adj[3][1] =  spvDet3x3(m[0][0], m[0][1], m[0][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], m[3][2]);
     adj[3][2] = -spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[3][0], m[3][1], m[3][2]);
     adj[3][3] =  spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2]);
-    
+
     // Calculate the determinant as a combination of the cofactors of the first row.
     float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]) + (adj[0][3] * m[3][0]);
-    
+
     // Divide the classical adjoint matrix by the determinant.
     // If determinant is zero, matrix is not invertable, so leave it unchanged.
     return (det != 0.0f) ? (adj * (1.0f / det)) : m;
diff --git a/reference/opt/shaders-msl/vert/packed_matrix.vert b/reference/opt/shaders-msl/vert/packed_matrix.vert
new file mode 100644
index 0000000000..53d7d164fa
--- /dev/null
+++ b/reference/opt/shaders-msl/vert/packed_matrix.vert
@@ -0,0 +1,56 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+typedef float3x4 packed_float4x3;
+
+struct _15
+{
+    packed_float4x3 _m0;
+    packed_float4x3 _m1;
+};
+
+struct _42
+{
+    float4x4 _m0;
+    float4x4 _m1;
+    float _m2;
+    char pad3[12];
+    packed_float3 _m3;
+    float _m4;
+    packed_float3 _m5;
+    float _m6;
+    float _m7;
+    float _m8;
+    float2 _m9;
+};
+
+struct main0_in
+{
+    float4 m_25 [[attribute(0)]];
+};
+
+struct main0_out
+{
+    float3 m_72 [[user(locn0)]];
+    float4 gl_Position [[position]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]], constant _15& _17 [[buffer(0)]], constant _42& _44 [[buffer(1)]])
+{
+    main0_out out = {};
+    float3 _34;
+    do
+    {
+        _34 = normalize(float4(in.m_25.xyz, 0.0) * _17._m1);
+        break;
+    } while (false);
+    float4 _70 = _44._m0 * float4(_44._m3 + (in.m_25.xyz * (_44._m6 + _44._m7)), 1.0);
+    out.m_72 = _34;
+    float4 _95 = _70;
+    _95.y = -_70.y;
+    out.gl_Position = _95;
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/vert/return-array.vert b/reference/opt/shaders-msl/vert/return-array.vert
new file mode 100644
index 0000000000..7804d2d94f
--- /dev/null
+++ b/reference/opt/shaders-msl/vert/return-array.vert
@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+constant float4 _20[2] = {float4(10.0), float4(20.0)};
+
+struct main0_in
+{
+    float4 vInput1 [[attribute(1)]];
+};
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    out.gl_Position = float4(10.0) + in.vInput1;
+    return out;
+}
+
diff --git a/reference/opt/shaders/asm/frag/frem.asm.frag b/reference/opt/shaders/asm/frag/frem.asm.frag
new file mode 100644
index 0000000000..1095ab04f2
--- /dev/null
+++ b/reference/opt/shaders/asm/frag/frem.asm.frag
@@ -0,0 +1,13 @@
+#version 310 es
+precision mediump float;
+precision highp int;
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) in vec4 vA;
+layout(location = 1) in vec4 vB;
+
+void main()
+{
+    FragColor = vA - vB * trunc(vA / vB);
+}
+
diff --git a/reference/opt/shaders/asm/frag/function-overload-alias.asm.frag b/reference/opt/shaders/asm/frag/function-overload-alias.asm.frag
new file mode 100644
index 0000000000..f726fabb18
--- /dev/null
+++ b/reference/opt/shaders/asm/frag/function-overload-alias.asm.frag
@@ -0,0 +1,11 @@
+#version 310 es
+precision mediump float;
+precision highp int;
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    FragColor = (((vec4(1.0) + vec4(1.0)) + (vec3(1.0).xyzz + vec4(1.0))) + (vec4(1.0) + vec4(2.0))) + (vec2(1.0).xyxy + vec4(2.0));
+}
+
diff --git a/reference/opt/shaders/asm/frag/image-fetch-no-sampler.asm.vk.frag b/reference/opt/shaders/asm/frag/image-fetch-no-sampler.asm.vk.frag
new file mode 100644
index 0000000000..ebf8c1201c
--- /dev/null
+++ b/reference/opt/shaders/asm/frag/image-fetch-no-sampler.asm.vk.frag
@@ -0,0 +1,13 @@
+#version 450
+
+uniform sampler2D SPIRV_Cross_CombinedSampledImageSPIRV_Cross_DummySampler;
+uniform sampler2D SPIRV_Cross_CombinedSampledImageSampler;
+
+layout(location = 0) out vec4 _entryPointOutput;
+
+void main()
+{
+    ivec3 _122 = ivec3(int(gl_FragCoord.x * 1280.0), int(gl_FragCoord.y * 720.0), 0);
+    _entryPointOutput = ((texelFetch(SPIRV_Cross_CombinedSampledImageSPIRV_Cross_DummySampler, _122.xy, 0) + texelFetch(SPIRV_Cross_CombinedSampledImageSPIRV_Cross_DummySampler, _122.xy, 0)) + texture(SPIRV_Cross_CombinedSampledImageSampler, gl_FragCoord.xy)) + texture(SPIRV_Cross_CombinedSampledImageSampler, gl_FragCoord.xy);
+}
+
diff --git a/reference/opt/shaders/asm/frag/image-fetch-no-sampler.asm.vk.frag.vk b/reference/opt/shaders/asm/frag/image-fetch-no-sampler.asm.vk.frag.vk
new file mode 100644
index 0000000000..433f5bc273
--- /dev/null
+++ b/reference/opt/shaders/asm/frag/image-fetch-no-sampler.asm.vk.frag.vk
@@ -0,0 +1,14 @@
+#version 450
+
+layout(set = 0, binding = 0) uniform sampler Sampler;
+layout(set = 0, binding = 0) uniform texture2D SampledImage;
+uniform sampler SPIRV_Cross_DummySampler;
+
+layout(location = 0) out vec4 _entryPointOutput;
+
+void main()
+{
+    ivec3 _122 = ivec3(int(gl_FragCoord.x * 1280.0), int(gl_FragCoord.y * 720.0), 0);
+    _entryPointOutput = ((texelFetch(sampler2D(SampledImage, SPIRV_Cross_DummySampler), _122.xy, 0) + texelFetch(sampler2D(SampledImage, SPIRV_Cross_DummySampler), _122.xy, 0)) + texture(sampler2D(SampledImage, Sampler), gl_FragCoord.xy)) + texture(sampler2D(SampledImage, Sampler), gl_FragCoord.xy);
+}
+
diff --git a/reference/opt/shaders/asm/vert/invariant-block.asm.vert b/reference/opt/shaders/asm/vert/invariant-block.asm.vert
new file mode 100644
index 0000000000..9b2f05a8bd
--- /dev/null
+++ b/reference/opt/shaders/asm/vert/invariant-block.asm.vert
@@ -0,0 +1,9 @@
+#version 450
+
+invariant gl_Position;
+
+void main()
+{
+    gl_Position = vec4(1.0);
+}
+
diff --git a/reference/opt/shaders/asm/vert/invariant-block.sso.asm.vert b/reference/opt/shaders/asm/vert/invariant-block.sso.asm.vert
new file mode 100644
index 0000000000..eb88694196
--- /dev/null
+++ b/reference/opt/shaders/asm/vert/invariant-block.sso.asm.vert
@@ -0,0 +1,17 @@
+#version 450
+
+out gl_PerVertex
+{
+    vec4 gl_Position;
+    float gl_PointSize;
+    float gl_ClipDistance[1];
+    float gl_CullDistance[1];
+};
+
+invariant gl_Position;
+
+void main()
+{
+    gl_Position = vec4(1.0);
+}
+
diff --git a/reference/opt/shaders/asm/vert/invariant.asm.vert b/reference/opt/shaders/asm/vert/invariant.asm.vert
new file mode 100644
index 0000000000..9b2f05a8bd
--- /dev/null
+++ b/reference/opt/shaders/asm/vert/invariant.asm.vert
@@ -0,0 +1,9 @@
+#version 450
+
+invariant gl_Position;
+
+void main()
+{
+    gl_Position = vec4(1.0);
+}
+
diff --git a/reference/opt/shaders/asm/vert/invariant.sso.asm.vert b/reference/opt/shaders/asm/vert/invariant.sso.asm.vert
new file mode 100644
index 0000000000..4f7e2f5f67
--- /dev/null
+++ b/reference/opt/shaders/asm/vert/invariant.sso.asm.vert
@@ -0,0 +1,14 @@
+#version 450
+
+out gl_PerVertex
+{
+    vec4 gl_Position;
+};
+
+invariant gl_Position;
+
+void main()
+{
+    gl_Position = vec4(1.0);
+}
+
diff --git a/reference/opt/shaders/desktop-only/frag/clip-cull-distance.desktop.frag b/reference/opt/shaders/desktop-only/frag/clip-cull-distance.desktop.frag
new file mode 100644
index 0000000000..3cc3205509
--- /dev/null
+++ b/reference/opt/shaders/desktop-only/frag/clip-cull-distance.desktop.frag
@@ -0,0 +1,12 @@
+#version 450
+
+in float gl_ClipDistance[4];
+in float gl_CullDistance[3];
+
+layout(location = 0) out float FragColor;
+
+void main()
+{
+    FragColor = gl_ClipDistance[0] + gl_CullDistance[0];
+}
+
diff --git a/reference/opt/shaders/desktop-only/frag/inf-nan-constant-double.desktop.frag b/reference/opt/shaders/desktop-only/frag/inf-nan-constant-double.desktop.frag
new file mode 100644
index 0000000000..b3df0c2313
--- /dev/null
+++ b/reference/opt/shaders/desktop-only/frag/inf-nan-constant-double.desktop.frag
@@ -0,0 +1,11 @@
+#version 450
+#extension GL_ARB_gpu_shader_int64 : require
+
+layout(location = 0) out vec3 FragColor;
+layout(location = 0) flat in double vTmp;
+
+void main()
+{
+    FragColor = vec3(dvec3(uint64BitsToDouble(0x7ff0000000000000ul), uint64BitsToDouble(0xfff0000000000000ul), uint64BitsToDouble(0xfff8000000000000ul)) + dvec3(vTmp));
+}
+
diff --git a/reference/opt/shaders/desktop-only/vert/clip-cull-distance.desktop.sso.vert b/reference/opt/shaders/desktop-only/vert/clip-cull-distance.desktop.sso.vert
new file mode 100644
index 0000000000..a7c5d761c9
--- /dev/null
+++ b/reference/opt/shaders/desktop-only/vert/clip-cull-distance.desktop.sso.vert
@@ -0,0 +1,20 @@
+#version 450
+
+out gl_PerVertex
+{
+    vec4 gl_Position;
+    float gl_PointSize;
+    float gl_ClipDistance[4];
+    float gl_CullDistance[3];
+};
+
+void main()
+{
+    gl_Position = vec4(1.0);
+    gl_ClipDistance[0] = 0.0;
+    gl_ClipDistance[1] = 0.0;
+    gl_ClipDistance[2] = 0.0;
+    gl_ClipDistance[3] = 0.0;
+    gl_CullDistance[1] = 4.0;
+}
+
diff --git a/reference/opt/shaders/desktop-only/vert/clip-cull-distance.desktop.vert b/reference/opt/shaders/desktop-only/vert/clip-cull-distance.desktop.vert
index 566809db23..2f3d49f55d 100644
--- a/reference/opt/shaders/desktop-only/vert/clip-cull-distance.desktop.vert
+++ b/reference/opt/shaders/desktop-only/vert/clip-cull-distance.desktop.vert
@@ -1,11 +1,15 @@
 #version 450
 
+out float gl_ClipDistance[4];
+out float gl_CullDistance[3];
+
 void main()
 {
-    gl_Position = vec4(10.0);
-    gl_ClipDistance[0] = 1.0;
-    gl_ClipDistance[1] = 4.0;
-    gl_CullDistance[0] = 4.0;
-    gl_CullDistance[1] = 9.0;
+    gl_Position = vec4(1.0);
+    gl_ClipDistance[0] = 0.0;
+    gl_ClipDistance[1] = 0.0;
+    gl_ClipDistance[2] = 0.0;
+    gl_ClipDistance[3] = 0.0;
+    gl_CullDistance[1] = 4.0;
 }
 
diff --git a/reference/opt/shaders/frag/front-facing.frag b/reference/opt/shaders/frag/front-facing.frag
new file mode 100644
index 0000000000..cc9aecc8ba
--- /dev/null
+++ b/reference/opt/shaders/frag/front-facing.frag
@@ -0,0 +1,20 @@
+#version 310 es
+precision mediump float;
+precision highp int;
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) in vec4 vA;
+layout(location = 1) in vec4 vB;
+
+void main()
+{
+    if (gl_FrontFacing)
+    {
+        FragColor = vA;
+    }
+    else
+    {
+        FragColor = vB;
+    }
+}
+
diff --git a/reference/opt/shaders/frag/hoisted-temporary-use-continue-block-as-value.frag b/reference/opt/shaders/frag/hoisted-temporary-use-continue-block-as-value.frag
new file mode 100644
index 0000000000..1d8e023582
--- /dev/null
+++ b/reference/opt/shaders/frag/hoisted-temporary-use-continue-block-as-value.frag
@@ -0,0 +1,37 @@
+#version 310 es
+precision mediump float;
+precision highp int;
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) flat in mediump int vA;
+layout(location = 1) flat in mediump int vB;
+
+void main()
+{
+    FragColor = vec4(0.0);
+    mediump int _49;
+    int _60;
+    for (int _57 = 0, _58 = 0; _58 < vA; _57 = _60, _58 += _49)
+    {
+        if ((vA + _58) == 20)
+        {
+            _60 = 50;
+        }
+        else
+        {
+            int _59;
+            if ((vB + _58) == 40)
+            {
+                _59 = 60;
+            }
+            else
+            {
+                _59 = _57;
+            }
+            _60 = _59;
+        }
+        _49 = _60 + 10;
+        FragColor += vec4(1.0);
+    }
+}
+
diff --git a/reference/opt/shaders/frag/inf-nan-constant.frag b/reference/opt/shaders/frag/inf-nan-constant.frag
new file mode 100644
index 0000000000..d683fe02e8
--- /dev/null
+++ b/reference/opt/shaders/frag/inf-nan-constant.frag
@@ -0,0 +1,11 @@
+#version 310 es
+precision mediump float;
+precision highp int;
+
+layout(location = 0) out highp vec3 FragColor;
+
+void main()
+{
+    FragColor = vec3(uintBitsToFloat(0x7f800000u), uintBitsToFloat(0xff800000u), uintBitsToFloat(0xffc00000u));
+}
+
diff --git a/reference/opt/shaders/vert/return-array.vert b/reference/opt/shaders/vert/return-array.vert
new file mode 100644
index 0000000000..b78ca5f1b8
--- /dev/null
+++ b/reference/opt/shaders/vert/return-array.vert
@@ -0,0 +1,9 @@
+#version 310 es
+
+layout(location = 1) in vec4 vInput1;
+
+void main()
+{
+    gl_Position = vec4(10.0) + vInput1;
+}
+
diff --git a/reference/opt/shaders/vulkan/frag/spec-constant-block-size.vk.frag b/reference/opt/shaders/vulkan/frag/spec-constant-block-size.vk.frag
new file mode 100644
index 0000000000..e8c3c4a36b
--- /dev/null
+++ b/reference/opt/shaders/vulkan/frag/spec-constant-block-size.vk.frag
@@ -0,0 +1,17 @@
+#version 310 es
+precision mediump float;
+precision highp int;
+
+layout(binding = 0, std140) uniform SpecConstArray
+{
+    vec4 samples[2];
+} _15;
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) flat in mediump int Index;
+
+void main()
+{
+    FragColor = _15.samples[Index];
+}
+
diff --git a/reference/opt/shaders/vulkan/frag/spec-constant-block-size.vk.frag.vk b/reference/opt/shaders/vulkan/frag/spec-constant-block-size.vk.frag.vk
new file mode 100644
index 0000000000..133761a83d
--- /dev/null
+++ b/reference/opt/shaders/vulkan/frag/spec-constant-block-size.vk.frag.vk
@@ -0,0 +1,19 @@
+#version 310 es
+precision mediump float;
+precision highp int;
+
+layout(constant_id = 10) const int Value = 2;
+
+layout(set = 0, binding = 0, std140) uniform SpecConstArray
+{
+    vec4 samples[Value];
+} _15;
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) flat in mediump int Index;
+
+void main()
+{
+    FragColor = _15.samples[Index];
+}
+
diff --git a/reference/shaders-hlsl/asm/frag/frem.asm.frag b/reference/shaders-hlsl/asm/frag/frem.asm.frag
new file mode 100644
index 0000000000..67998c56a7
--- /dev/null
+++ b/reference/shaders-hlsl/asm/frag/frem.asm.frag
@@ -0,0 +1,29 @@
+static float4 FragColor;
+static float4 vA;
+static float4 vB;
+
+struct SPIRV_Cross_Input
+{
+    float4 vA : TEXCOORD0;
+    float4 vB : TEXCOORD1;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    FragColor = fmod(vA, vB);
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    vA = stage_input.vA;
+    vB = stage_input.vB;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/asm/frag/function-overload-alias.asm.frag b/reference/shaders-hlsl/asm/frag/function-overload-alias.asm.frag
new file mode 100644
index 0000000000..e8978b6cd8
--- /dev/null
+++ b/reference/shaders-hlsl/asm/frag/function-overload-alias.asm.frag
@@ -0,0 +1,47 @@
+static float4 FragColor;
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+float4 foo(float4 foo_1)
+{
+    return foo_1 + 1.0f.xxxx;
+}
+
+float4 foo(float3 foo_1)
+{
+    return foo_1.xyzz + 1.0f.xxxx;
+}
+
+float4 foo_1(float4 foo_2)
+{
+    return foo_2 + 2.0f.xxxx;
+}
+
+float4 foo(float2 foo_2)
+{
+    return foo_2.xyxy + 2.0f.xxxx;
+}
+
+void frag_main()
+{
+    float4 foo_3 = 1.0f.xxxx;
+    float4 foo_2 = foo(foo_3);
+    float3 foo_5 = 1.0f.xxx;
+    float4 foo_4 = foo(foo_5);
+    float4 foo_7 = 1.0f.xxxx;
+    float4 foo_6 = foo_1(foo_7);
+    float2 foo_9 = 1.0f.xx;
+    float4 foo_8 = foo(foo_9);
+    FragColor = ((foo_2 + foo_4) + foo_6) + foo_8;
+}
+
+SPIRV_Cross_Output main()
+{
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/comp/inverse.comp b/reference/shaders-hlsl/comp/inverse.comp
new file mode 100644
index 0000000000..3be954a6f6
--- /dev/null
+++ b/reference/shaders-hlsl/comp/inverse.comp
@@ -0,0 +1,122 @@
+RWByteAddressBuffer _15 : register(u0);
+ByteAddressBuffer _20 : register(t1);
+
+// Returns the inverse of a matrix, by using the algorithm of calculating the classical
+// adjoint and dividing by the determinant. The contents of the matrix are changed.
+float2x2 SPIRV_Cross_Inverse(float2x2 m)
+{
+    float2x2 adj;	// The adjoint matrix (inverse after dividing by determinant)
+
+    // Create the transpose of the cofactors, as the classical adjoint of the matrix.
+    adj[0][0] =  m[1][1];
+    adj[0][1] = -m[0][1];
+
+    adj[1][0] = -m[1][0];
+    adj[1][1] =  m[0][0];
+
+    // Calculate the determinant as a combination of the cofactors of the first row.
+    float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]);
+
+    // Divide the classical adjoint matrix by the determinant.
+    // If determinant is zero, matrix is not invertable, so leave it unchanged.
+    return (det != 0.0f) ? (adj * (1.0f / det)) : m;
+}
+
+// Returns the determinant of a 2x2 matrix.
+float SPIRV_Cross_Det2x2(float a1, float a2, float b1, float b2)
+{
+    return a1 * b2 - b1 * a2;
+}
+
+// Returns the inverse of a matrix, by using the algorithm of calculating the classical
+// adjoint and dividing by the determinant. The contents of the matrix are changed.
+float3x3 SPIRV_Cross_Inverse(float3x3 m)
+{
+    float3x3 adj;	// The adjoint matrix (inverse after dividing by determinant)
+
+    // Create the transpose of the cofactors, as the classical adjoint of the matrix.
+    adj[0][0] =  SPIRV_Cross_Det2x2(m[1][1], m[1][2], m[2][1], m[2][2]);
+    adj[0][1] = -SPIRV_Cross_Det2x2(m[0][1], m[0][2], m[2][1], m[2][2]);
+    adj[0][2] =  SPIRV_Cross_Det2x2(m[0][1], m[0][2], m[1][1], m[1][2]);
+
+    adj[1][0] = -SPIRV_Cross_Det2x2(m[1][0], m[1][2], m[2][0], m[2][2]);
+    adj[1][1] =  SPIRV_Cross_Det2x2(m[0][0], m[0][2], m[2][0], m[2][2]);
+    adj[1][2] = -SPIRV_Cross_Det2x2(m[0][0], m[0][2], m[1][0], m[1][2]);
+
+    adj[2][0] =  SPIRV_Cross_Det2x2(m[1][0], m[1][1], m[2][0], m[2][1]);
+    adj[2][1] = -SPIRV_Cross_Det2x2(m[0][0], m[0][1], m[2][0], m[2][1]);
+    adj[2][2] =  SPIRV_Cross_Det2x2(m[0][0], m[0][1], m[1][0], m[1][1]);
+
+    // Calculate the determinant as a combination of the cofactors of the first row.
+    float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]);
+
+    // Divide the classical adjoint matrix by the determinant.
+    // If determinant is zero, matrix is not invertable, so leave it unchanged.
+    return (det != 0.0f) ? (adj * (1.0f / det)) : m;
+}
+
+// Returns the determinant of a 3x3 matrix.
+float SPIRV_Cross_Det3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, float c2, float c3)
+{
+    return a1 * SPIRV_Cross_Det2x2(b2, b3, c2, c3) - b1 * SPIRV_Cross_Det2x2(a2, a3, c2, c3) + c1 * SPIRV_Cross_Det2x2(a2, a3, b2, b3);
+}
+
+// Returns the inverse of a matrix, by using the algorithm of calculating the classical
+// adjoint and dividing by the determinant. The contents of the matrix are changed.
+float4x4 SPIRV_Cross_Inverse(float4x4 m)
+{
+    float4x4 adj;	// The adjoint matrix (inverse after dividing by determinant)
+
+    // Create the transpose of the cofactors, as the classical adjoint of the matrix.
+    adj[0][0] =  SPIRV_Cross_Det3x3(m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], m[3][3]);
+    adj[0][1] = -SPIRV_Cross_Det3x3(m[0][1], m[0][2], m[0][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], m[3][3]);
+    adj[0][2] =  SPIRV_Cross_Det3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[3][1], m[3][2], m[3][3]);
+    adj[0][3] = -SPIRV_Cross_Det3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3]);
+
+    adj[1][0] = -SPIRV_Cross_Det3x3(m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], m[3][3]);
+    adj[1][1] =  SPIRV_Cross_Det3x3(m[0][0], m[0][2], m[0][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], m[3][3]);
+    adj[1][2] = -SPIRV_Cross_Det3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[3][0], m[3][2], m[3][3]);
+    adj[1][3] =  SPIRV_Cross_Det3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3]);
+
+    adj[2][0] =  SPIRV_Cross_Det3x3(m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], m[3][3]);
+    adj[2][1] = -SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], m[3][3]);
+    adj[2][2] =  SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[3][0], m[3][1], m[3][3]);
+    adj[2][3] = -SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3]);
+
+    adj[3][0] = -SPIRV_Cross_Det3x3(m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], m[3][2]);
+    adj[3][1] =  SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], m[3][2]);
+    adj[3][2] = -SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[3][0], m[3][1], m[3][2]);
+    adj[3][3] =  SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2]);
+
+    // Calculate the determinant as a combination of the cofactors of the first row.
+    float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]) + (adj[0][3] * m[3][0]);
+
+    // Divide the classical adjoint matrix by the determinant.
+    // If determinant is zero, matrix is not invertable, so leave it unchanged.
+    return (det != 0.0f) ? (adj * (1.0f / det)) : m;
+}
+
+void comp_main()
+{
+    float2x2 _23 = asfloat(uint2x2(_20.Load2(0), _20.Load2(8)));
+    float2x2 _24 = SPIRV_Cross_Inverse(_23);
+    _15.Store2(0, asuint(_24[0]));
+    _15.Store2(8, asuint(_24[1]));
+    float3x3 _29 = asfloat(uint3x3(_20.Load3(16), _20.Load3(32), _20.Load3(48)));
+    float3x3 _30 = SPIRV_Cross_Inverse(_29);
+    _15.Store3(16, asuint(_30[0]));
+    _15.Store3(32, asuint(_30[1]));
+    _15.Store3(48, asuint(_30[2]));
+    float4x4 _35 = asfloat(uint4x4(_20.Load4(64), _20.Load4(80), _20.Load4(96), _20.Load4(112)));
+    float4x4 _36 = SPIRV_Cross_Inverse(_35);
+    _15.Store4(64, asuint(_36[0]));
+    _15.Store4(80, asuint(_36[1]));
+    _15.Store4(96, asuint(_36[2]));
+    _15.Store4(112, asuint(_36[3]));
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    comp_main();
+}
diff --git a/reference/shaders-hlsl/comp/num-workgroups-alone.comp b/reference/shaders-hlsl/comp/num-workgroups-alone.comp
new file mode 100644
index 0000000000..1e7dd542c1
--- /dev/null
+++ b/reference/shaders-hlsl/comp/num-workgroups-alone.comp
@@ -0,0 +1,16 @@
+RWByteAddressBuffer _10 : register(u0);
+cbuffer SPIRV_Cross_NumWorkgroups : register(b0)
+{
+    uint3 SPIRV_Cross_NumWorkgroups_count : packoffset(c0);
+};
+
+void comp_main()
+{
+    _10.Store3(0, SPIRV_Cross_NumWorkgroups_count);
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    comp_main();
+}
diff --git a/reference/shaders-hlsl/comp/num-workgroups-with-builtins.comp b/reference/shaders-hlsl/comp/num-workgroups-with-builtins.comp
new file mode 100644
index 0000000000..f44754e823
--- /dev/null
+++ b/reference/shaders-hlsl/comp/num-workgroups-with-builtins.comp
@@ -0,0 +1,23 @@
+RWByteAddressBuffer _10 : register(u0);
+cbuffer SPIRV_Cross_NumWorkgroups : register(b0)
+{
+    uint3 SPIRV_Cross_NumWorkgroups_count : packoffset(c0);
+};
+
+static uint3 gl_WorkGroupID;
+struct SPIRV_Cross_Input
+{
+    uint3 gl_WorkGroupID : SV_GroupID;
+};
+
+void comp_main()
+{
+    _10.Store3(0, SPIRV_Cross_NumWorkgroups_count + gl_WorkGroupID);
+}
+
+[numthreads(1, 1, 1)]
+void main(SPIRV_Cross_Input stage_input)
+{
+    gl_WorkGroupID = stage_input.gl_WorkGroupID;
+    comp_main();
+}
diff --git a/reference/shaders-hlsl/frag/clip-cull-distance.frag b/reference/shaders-hlsl/frag/clip-cull-distance.frag
new file mode 100644
index 0000000000..52f1ac30b6
--- /dev/null
+++ b/reference/shaders-hlsl/frag/clip-cull-distance.frag
@@ -0,0 +1,30 @@
+static float gl_ClipDistance[2];
+static float gl_CullDistance[1];
+static float FragColor;
+
+struct SPIRV_Cross_Input
+{
+    float2 gl_ClipDistance0 : SV_ClipDistance0;
+    float gl_CullDistance0 : SV_CullDistance0;
+};
+
+struct SPIRV_Cross_Output
+{
+    float FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    FragColor = (gl_ClipDistance[0] + gl_CullDistance[0]) + gl_ClipDistance[1];
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    gl_ClipDistance[0] = stage_input.gl_ClipDistance0.x;
+    gl_ClipDistance[1] = stage_input.gl_ClipDistance0.y;
+    gl_CullDistance[0] = stage_input.gl_CullDistance0.x;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/frag/front-facing.frag b/reference/shaders-hlsl/frag/front-facing.frag
new file mode 100644
index 0000000000..4ed09a2bd1
--- /dev/null
+++ b/reference/shaders-hlsl/frag/front-facing.frag
@@ -0,0 +1,39 @@
+static bool gl_FrontFacing;
+static float4 FragColor;
+static float4 vA;
+static float4 vB;
+
+struct SPIRV_Cross_Input
+{
+    float4 vA : TEXCOORD0;
+    float4 vB : TEXCOORD1;
+    bool gl_FrontFacing : SV_IsFrontFace;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    if (gl_FrontFacing)
+    {
+        FragColor = vA;
+    }
+    else
+    {
+        FragColor = vB;
+    }
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    gl_FrontFacing = stage_input.gl_FrontFacing;
+    vA = stage_input.vA;
+    vB = stage_input.vB;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/frag/inf-nan-constant.frag b/reference/shaders-hlsl/frag/inf-nan-constant.frag
new file mode 100644
index 0000000000..06dac0bf72
--- /dev/null
+++ b/reference/shaders-hlsl/frag/inf-nan-constant.frag
@@ -0,0 +1,19 @@
+static float3 FragColor;
+
+struct SPIRV_Cross_Output
+{
+    float3 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    FragColor = float3(asfloat(0x7f800000u), asfloat(0xff800000u), asfloat(0xffc00000u));
+}
+
+SPIRV_Cross_Output main()
+{
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/frag/input-attachment-ms.frag b/reference/shaders-hlsl/frag/input-attachment-ms.frag
new file mode 100644
index 0000000000..130b799651
--- /dev/null
+++ b/reference/shaders-hlsl/frag/input-attachment-ms.frag
@@ -0,0 +1,37 @@
+Texture2DMS<float4> uSubpass0 : register(t0);
+Texture2DMS<float4> uSubpass1 : register(t1);
+
+static float4 gl_FragCoord;
+static int gl_SampleID;
+static float4 FragColor;
+
+struct SPIRV_Cross_Input
+{
+    float4 gl_FragCoord : SV_Position;
+    uint gl_SampleID : SV_SampleIndex;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+float4 load_subpasses(Texture2DMS<float4> uInput)
+{
+    return uInput.Load(int2(gl_FragCoord.xy), gl_SampleID);
+}
+
+void frag_main()
+{
+    FragColor = (uSubpass0.Load(int2(gl_FragCoord.xy), 1) + uSubpass1.Load(int2(gl_FragCoord.xy), 2)) + load_subpasses(uSubpass0);
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    gl_FragCoord = stage_input.gl_FragCoord;
+    gl_SampleID = stage_input.gl_SampleID;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/frag/input-attachment.frag b/reference/shaders-hlsl/frag/input-attachment.frag
new file mode 100644
index 0000000000..0b815ae08a
--- /dev/null
+++ b/reference/shaders-hlsl/frag/input-attachment.frag
@@ -0,0 +1,34 @@
+Texture2D<float4> uSubpass0 : register(t0);
+Texture2D<float4> uSubpass1 : register(t1);
+
+static float4 gl_FragCoord;
+static float4 FragColor;
+
+struct SPIRV_Cross_Input
+{
+    float4 gl_FragCoord : SV_Position;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+float4 load_subpasses(Texture2D<float4> uInput)
+{
+    return uInput.Load(int3(int2(gl_FragCoord.xy), 0));
+}
+
+void frag_main()
+{
+    FragColor = uSubpass0.Load(int3(int2(gl_FragCoord.xy), 0)) + load_subpasses(uSubpass1);
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    gl_FragCoord = stage_input.gl_FragCoord;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/frag/point-coord-compat.frag b/reference/shaders-hlsl/frag/point-coord-compat.frag
new file mode 100644
index 0000000000..629153982d
--- /dev/null
+++ b/reference/shaders-hlsl/frag/point-coord-compat.frag
@@ -0,0 +1,19 @@
+static float2 FragColor;
+
+struct SPIRV_Cross_Output
+{
+    float2 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    FragColor = float2(0.5f, 0.5f);
+}
+
+SPIRV_Cross_Output main()
+{
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/frag/spec-constant-block-size.frag b/reference/shaders-hlsl/frag/spec-constant-block-size.frag
new file mode 100644
index 0000000000..094d509010
--- /dev/null
+++ b/reference/shaders-hlsl/frag/spec-constant-block-size.frag
@@ -0,0 +1,33 @@
+static const int Value = 2;
+
+cbuffer _15 : register(b0)
+{
+    float4 _15_samples[Value] : packoffset(c0);
+};
+
+static float4 FragColor;
+static int Index;
+
+struct SPIRV_Cross_Input
+{
+    nointerpolation int Index : TEXCOORD0;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    FragColor = _15_samples[Index];
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    Index = stage_input.Index;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/vert/clip-cull-distance.vert b/reference/shaders-hlsl/vert/clip-cull-distance.vert
new file mode 100644
index 0000000000..7e0d104acd
--- /dev/null
+++ b/reference/shaders-hlsl/vert/clip-cull-distance.vert
@@ -0,0 +1,28 @@
+static float4 gl_Position;
+static float gl_ClipDistance[2];
+static float gl_CullDistance[1];
+struct SPIRV_Cross_Output
+{
+    float4 gl_Position : SV_Position;
+    float2 gl_ClipDistance0 : SV_ClipDistance0;
+    float gl_CullDistance0 : SV_CullDistance0;
+};
+
+void vert_main()
+{
+    gl_Position = 1.0f.xxxx;
+    gl_ClipDistance[0] = 0.0f;
+    gl_ClipDistance[1] = 0.0f;
+    gl_CullDistance[0] = 4.0f;
+}
+
+SPIRV_Cross_Output main()
+{
+    vert_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.gl_Position = gl_Position;
+    stage_output.gl_ClipDistance0.x = gl_ClipDistance[0];
+    stage_output.gl_ClipDistance0.y = gl_ClipDistance[1];
+    stage_output.gl_CullDistance0.x = gl_CullDistance[0];
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/vert/point-size-compat.vert b/reference/shaders-hlsl/vert/point-size-compat.vert
index 83333d0be2..95f45d02f0 100644
--- a/reference/shaders-hlsl/vert/point-size-compat.vert
+++ b/reference/shaders-hlsl/vert/point-size-compat.vert
@@ -8,7 +8,7 @@ struct SPIRV_Cross_Output
 void vert_main()
 {
     gl_Position = 1.0f.xxxx;
-    gl_PointSize = 10.0f;
+    gl_PointSize = 1.0f;
 }
 
 SPIRV_Cross_Output main()
diff --git a/reference/shaders-hlsl/vert/return-array.vert b/reference/shaders-hlsl/vert/return-array.vert
new file mode 100644
index 0000000000..83e3a28123
--- /dev/null
+++ b/reference/shaders-hlsl/vert/return-array.vert
@@ -0,0 +1,48 @@
+static const float4 _20[2] = { 10.0f.xxxx, 20.0f.xxxx };
+
+static float4 gl_Position;
+static float4 vInput0;
+static float4 vInput1;
+
+struct SPIRV_Cross_Input
+{
+    float4 vInput0 : TEXCOORD0;
+    float4 vInput1 : TEXCOORD1;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 gl_Position : SV_Position;
+};
+
+void test(out float4 SPIRV_Cross_return_value[2])
+{
+    SPIRV_Cross_return_value = _20;
+}
+
+void test2(out float4 SPIRV_Cross_return_value[2])
+{
+    float4 foobar[2];
+    foobar[0] = vInput0;
+    foobar[1] = vInput1;
+    SPIRV_Cross_return_value = foobar;
+}
+
+void vert_main()
+{
+    float4 _42[2];
+    test(_42);
+    float4 _44[2];
+    test2(_44);
+    gl_Position = _42[0] + _44[1];
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    vInput0 = stage_input.vInput0;
+    vInput1 = stage_input.vInput1;
+    vert_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.gl_Position = gl_Position;
+    return stage_output;
+}
diff --git a/reference/shaders-msl/asm/frag/frem.asm.frag b/reference/shaders-msl/asm/frag/frem.asm.frag
new file mode 100644
index 0000000000..f7c1f2ce88
--- /dev/null
+++ b/reference/shaders-msl/asm/frag/frem.asm.frag
@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_in
+{
+    float4 vB [[user(locn1)]];
+    float4 vA [[user(locn0)]];
+};
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    out.FragColor = fmod(in.vA, in.vB);
+    return out;
+}
+
diff --git a/reference/shaders-msl/asm/frag/function-overload-alias.asm.frag b/reference/shaders-msl/asm/frag/function-overload-alias.asm.frag
new file mode 100644
index 0000000000..1a6314c819
--- /dev/null
+++ b/reference/shaders-msl/asm/frag/function-overload-alias.asm.frag
@@ -0,0 +1,47 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+float4 foo(thread const float4& foo_1)
+{
+    return foo_1 + float4(1.0);
+}
+
+float4 foo(thread const float3& foo_1)
+{
+    return foo_1.xyzz + float4(1.0);
+}
+
+float4 foo_1(thread const float4& foo_2)
+{
+    return foo_2 + float4(2.0);
+}
+
+float4 foo(thread const float2& foo_2)
+{
+    return foo_2.xyxy + float4(2.0);
+}
+
+fragment main0_out main0()
+{
+    main0_out out = {};
+    float4 foo_3 = float4(1.0);
+    float4 foo_2 = foo(foo_3);
+    float3 foo_5 = float3(1.0);
+    float4 foo_4 = foo(foo_5);
+    float4 foo_7 = float4(1.0);
+    float4 foo_6 = foo_1(foo_7);
+    float2 foo_9 = float2(1.0);
+    float4 foo_8 = foo(foo_9);
+    out.FragColor = ((foo_2 + foo_4) + foo_6) + foo_8;
+    return out;
+}
+
diff --git a/reference/shaders-msl/asm/frag/op-constant-null.asm.frag b/reference/shaders-msl/asm/frag/op-constant-null.asm.frag
index 588e7d1572..1d9d11c978 100644
--- a/reference/shaders-msl/asm/frag/op-constant-null.asm.frag
+++ b/reference/shaders-msl/asm/frag/op-constant-null.asm.frag
@@ -1,3 +1,5 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
 #include <metal_stdlib>
 #include <simd/simd.h>
 
@@ -9,11 +11,27 @@ struct D
     float b;
 };
 
+constant float4 _14[4] = {float4(0.0), float4(0.0), float4(0.0), float4(0.0)};
+
 struct main0_out
 {
     float FragColor [[color(0)]];
 };
 
+// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment.
+template<typename T, uint N>
+void spvArrayCopy(thread T (&dst)[N], thread const T (&src)[N])
+{
+    for (uint i = 0; i < N; dst[i] = src[i], i++);
+}
+
+// An overload for constant arrays.
+template<typename T, uint N>
+void spvArrayCopyConstant(thread T (&dst)[N], constant T (&src)[N])
+{
+    for (uint i = 0; i < N; dst[i] = src[i], i++);
+}
+
 fragment main0_out main0()
 {
     main0_out out = {};
diff --git a/reference/shaders-msl/comp/inverse.comp b/reference/shaders-msl/comp/inverse.comp
new file mode 100644
index 0000000000..567dba2c21
--- /dev/null
+++ b/reference/shaders-msl/comp/inverse.comp
@@ -0,0 +1,123 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct MatrixOut
+{
+    float2x2 m2out;
+    float3x3 m3out;
+    float4x4 m4out;
+};
+
+struct MatrixIn
+{
+    float2x2 m2in;
+    float3x3 m3in;
+    float4x4 m4in;
+};
+
+// Returns the determinant of a 2x2 matrix.
+inline float spvDet2x2(float a1, float a2, float b1, float b2)
+{
+    return a1 * b2 - b1 * a2;
+}
+
+// Returns the determinant of a 3x3 matrix.
+inline float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, float c2, float c3)
+{
+    return a1 * spvDet2x2(b2, b3, c2, c3) - b1 * spvDet2x2(a2, a3, c2, c3) + c1 * spvDet2x2(a2, a3, b2, b3);
+}
+
+// Returns the inverse of a matrix, by using the algorithm of calculating the classical
+// adjoint and dividing by the determinant. The contents of the matrix are changed.
+float4x4 spvInverse4x4(float4x4 m)
+{
+    float4x4 adj;	// The adjoint matrix (inverse after dividing by determinant)
+
+    // Create the transpose of the cofactors, as the classical adjoint of the matrix.
+    adj[0][0] =  spvDet3x3(m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], m[3][3]);
+    adj[0][1] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], m[3][3]);
+    adj[0][2] =  spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[3][1], m[3][2], m[3][3]);
+    adj[0][3] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3]);
+
+    adj[1][0] = -spvDet3x3(m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], m[3][3]);
+    adj[1][1] =  spvDet3x3(m[0][0], m[0][2], m[0][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], m[3][3]);
+    adj[1][2] = -spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[3][0], m[3][2], m[3][3]);
+    adj[1][3] =  spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3]);
+
+    adj[2][0] =  spvDet3x3(m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], m[3][3]);
+    adj[2][1] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], m[3][3]);
+    adj[2][2] =  spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[3][0], m[3][1], m[3][3]);
+    adj[2][3] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3]);
+
+    adj[3][0] = -spvDet3x3(m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], m[3][2]);
+    adj[3][1] =  spvDet3x3(m[0][0], m[0][1], m[0][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], m[3][2]);
+    adj[3][2] = -spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[3][0], m[3][1], m[3][2]);
+    adj[3][3] =  spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2]);
+
+    // Calculate the determinant as a combination of the cofactors of the first row.
+    float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]) + (adj[0][3] * m[3][0]);
+
+    // Divide the classical adjoint matrix by the determinant.
+    // If determinant is zero, matrix is not invertable, so leave it unchanged.
+    return (det != 0.0f) ? (adj * (1.0f / det)) : m;
+}
+
+// Returns the inverse of a matrix, by using the algorithm of calculating the classical
+// adjoint and dividing by the determinant. The contents of the matrix are changed.
+float3x3 spvInverse3x3(float3x3 m)
+{
+    float3x3 adj;	// The adjoint matrix (inverse after dividing by determinant)
+
+    // Create the transpose of the cofactors, as the classical adjoint of the matrix.
+    adj[0][0] =  spvDet2x2(m[1][1], m[1][2], m[2][1], m[2][2]);
+    adj[0][1] = -spvDet2x2(m[0][1], m[0][2], m[2][1], m[2][2]);
+    adj[0][2] =  spvDet2x2(m[0][1], m[0][2], m[1][1], m[1][2]);
+
+    adj[1][0] = -spvDet2x2(m[1][0], m[1][2], m[2][0], m[2][2]);
+    adj[1][1] =  spvDet2x2(m[0][0], m[0][2], m[2][0], m[2][2]);
+    adj[1][2] = -spvDet2x2(m[0][0], m[0][2], m[1][0], m[1][2]);
+
+    adj[2][0] =  spvDet2x2(m[1][0], m[1][1], m[2][0], m[2][1]);
+    adj[2][1] = -spvDet2x2(m[0][0], m[0][1], m[2][0], m[2][1]);
+    adj[2][2] =  spvDet2x2(m[0][0], m[0][1], m[1][0], m[1][1]);
+
+    // Calculate the determinant as a combination of the cofactors of the first row.
+    float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]);
+
+    // Divide the classical adjoint matrix by the determinant.
+    // If determinant is zero, matrix is not invertable, so leave it unchanged.
+    return (det != 0.0f) ? (adj * (1.0f / det)) : m;
+}
+
+// Returns the inverse of a matrix, by using the algorithm of calculating the classical
+// adjoint and dividing by the determinant. The contents of the matrix are changed.
+float2x2 spvInverse2x2(float2x2 m)
+{
+    float2x2 adj;	// The adjoint matrix (inverse after dividing by determinant)
+
+    // Create the transpose of the cofactors, as the classical adjoint of the matrix.
+    adj[0][0] =  m[1][1];
+    adj[0][1] = -m[0][1];
+
+    adj[1][0] = -m[1][0];
+    adj[1][1] =  m[0][0];
+
+    // Calculate the determinant as a combination of the cofactors of the first row.
+    float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]);
+
+    // Divide the classical adjoint matrix by the determinant.
+    // If determinant is zero, matrix is not invertable, so leave it unchanged.
+    return (det != 0.0f) ? (adj * (1.0f / det)) : m;
+}
+
+kernel void main0(device MatrixOut& _15 [[buffer(0)]], device MatrixIn& _20 [[buffer(1)]])
+{
+    _15.m2out = spvInverse2x2(_20.m2in);
+    _15.m3out = spvInverse3x3(_20.m3in);
+    _15.m4out = spvInverse4x4(_20.m4in);
+}
+
diff --git a/reference/shaders-msl/comp/struct-nested.comp b/reference/shaders-msl/comp/struct-nested.comp
new file mode 100644
index 0000000000..93694e31c2
--- /dev/null
+++ b/reference/shaders-msl/comp/struct-nested.comp
@@ -0,0 +1,27 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct s1
+{
+    int a;
+};
+
+struct s2
+{
+    s1 b;
+};
+
+struct dstbuffer
+{
+    s2 test[1];
+};
+
+kernel void main0(device dstbuffer& _19 [[buffer(0)]])
+{
+    s2 testVal;
+    testVal.b.a = 0;
+    _19.test[0].b.a = testVal.b.a;
+}
+
diff --git a/reference/shaders-msl/comp/struct-packing.comp b/reference/shaders-msl/comp/struct-packing.comp
index cf626ce63f..f59cba5b7d 100644
--- a/reference/shaders-msl/comp/struct-packing.comp
+++ b/reference/shaders-msl/comp/struct-packing.comp
@@ -3,6 +3,8 @@
 
 using namespace metal;
 
+typedef float3x2 packed_float2x3;
+
 struct S0
 {
     float2 a[1];
@@ -58,8 +60,10 @@ struct SSBO1
     float3x2 m3;
     float2x2 m4;
     float2x2 m5[9];
-    float2x3 m6[4][2];
+    packed_float2x3 m6[4][2];
+    char pad10[8];
     float3x2 m7;
+    char pad11[8];
     float array[1];
 };
 
@@ -96,5 +100,6 @@ kernel void main0(device SSBO1& ssbo_430 [[buffer(0)]], device SSBO0& ssbo_140 [
     ssbo_430.content.m3s[5].c = ssbo_140.content.m3s[5].c;
     ssbo_430.content.m3s[6].c = ssbo_140.content.m3s[6].c;
     ssbo_430.content.m3s[7].c = ssbo_140.content.m3s[7].c;
+    ssbo_430.content.m1.a = ssbo_430.content.m3.a * ssbo_430.m6[1][1];
 }
 
diff --git a/reference/shaders-msl/frag/constant-array.frag b/reference/shaders-msl/frag/constant-array.frag
index 15aa8fdce2..773d59407c 100644
--- a/reference/shaders-msl/frag/constant-array.frag
+++ b/reference/shaders-msl/frag/constant-array.frag
@@ -11,6 +11,12 @@ struct Foobar
     float b;
 };
 
+constant float4 _37[3] = {float4(1.0), float4(2.0), float4(3.0)};
+constant float4 _49[2] = {float4(1.0), float4(2.0)};
+constant float4 _54[2] = {float4(8.0), float4(10.0)};
+constant float4 _55[2][2] = {{float4(1.0), float4(2.0)}, {float4(8.0), float4(10.0)}};
+constant Foobar _75[2] = {{10.0, 40.0}, {90.0, 70.0}};
+
 struct main0_in
 {
     int index [[user(locn0)]];
@@ -21,6 +27,20 @@ struct main0_out
     float4 FragColor [[color(0)]];
 };
 
+// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment.
+template<typename T, uint N>
+void spvArrayCopy(thread T (&dst)[N], thread const T (&src)[N])
+{
+    for (uint i = 0; i < N; dst[i] = src[i], i++);
+}
+
+// An overload for constant arrays.
+template<typename T, uint N>
+void spvArrayCopyConstant(thread T (&dst)[N], constant T (&src)[N])
+{
+    for (uint i = 0; i < N; dst[i] = src[i], i++);
+}
+
 float4 resolve(thread const Foobar& f)
 {
     return float4(f.a + f.b);
diff --git a/reference/shaders-msl/frag/constant-composites.frag b/reference/shaders-msl/frag/constant-composites.frag
index 1962db1752..d216da6d13 100644
--- a/reference/shaders-msl/frag/constant-composites.frag
+++ b/reference/shaders-msl/frag/constant-composites.frag
@@ -1,3 +1,5 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
 #include <metal_stdlib>
 #include <simd/simd.h>
 
@@ -9,6 +11,9 @@ struct Foo
     float b;
 };
 
+constant float _16[4] = {1.0, 4.0, 3.0, 2.0};
+constant Foo _28[2] = {{10.0, 20.0}, {30.0, 40.0}};
+
 struct main0_in
 {
     int line [[user(locn0)]];
@@ -19,6 +24,20 @@ struct main0_out
     float4 FragColor [[color(0)]];
 };
 
+// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment.
+template<typename T, uint N>
+void spvArrayCopy(thread T (&dst)[N], thread const T (&src)[N])
+{
+    for (uint i = 0; i < N; dst[i] = src[i], i++);
+}
+
+// An overload for constant arrays.
+template<typename T, uint N>
+void spvArrayCopyConstant(thread T (&dst)[N], constant T (&src)[N])
+{
+    for (uint i = 0; i < N; dst[i] = src[i], i++);
+}
+
 fragment main0_out main0(main0_in in [[stage_in]])
 {
     main0_out out = {};
diff --git a/reference/shaders-msl/frag/front-facing.frag b/reference/shaders-msl/frag/front-facing.frag
new file mode 100644
index 0000000000..3856498943
--- /dev/null
+++ b/reference/shaders-msl/frag/front-facing.frag
@@ -0,0 +1,30 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_in
+{
+    float4 vB [[user(locn1)]];
+    float4 vA [[user(locn0)]];
+};
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], bool gl_FrontFacing [[front_facing]])
+{
+    main0_out out = {};
+    if (gl_FrontFacing)
+    {
+        out.FragColor = in.vA;
+    }
+    else
+    {
+        out.FragColor = in.vB;
+    }
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/inf-nan-constant.frag b/reference/shaders-msl/frag/inf-nan-constant.frag
new file mode 100644
index 0000000000..08df0f4c47
--- /dev/null
+++ b/reference/shaders-msl/frag/inf-nan-constant.frag
@@ -0,0 +1,17 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float3 FragColor [[color(0)]];
+};
+
+fragment main0_out main0()
+{
+    main0_out out = {};
+    out.FragColor = float3(as_type<float>(0x7f800000u), as_type<float>(0xff800000u), as_type<float>(0xffc00000u));
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/input-attachment-ms.frag b/reference/shaders-msl/frag/input-attachment-ms.frag
new file mode 100644
index 0000000000..d38712e91c
--- /dev/null
+++ b/reference/shaders-msl/frag/input-attachment-ms.frag
@@ -0,0 +1,24 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+float4 load_subpasses(thread const texture2d_ms<float> uInput, thread uint& gl_SampleID, thread float4& gl_FragCoord)
+{
+    return uInput.read(uint2(gl_FragCoord.xy), gl_SampleID);
+}
+
+fragment main0_out main0(texture2d_ms<float> uSubpass0 [[texture(0)]], texture2d_ms<float> uSubpass1 [[texture(1)]], uint gl_SampleID [[sample_id]], float4 gl_FragCoord [[position]])
+{
+    main0_out out = {};
+    out.FragColor = (uSubpass0.read(uint2(gl_FragCoord.xy), 1) + uSubpass1.read(uint2(gl_FragCoord.xy), 2)) + load_subpasses(uSubpass0, gl_SampleID, gl_FragCoord);
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/input-attachment.frag b/reference/shaders-msl/frag/input-attachment.frag
new file mode 100644
index 0000000000..3cc929182b
--- /dev/null
+++ b/reference/shaders-msl/frag/input-attachment.frag
@@ -0,0 +1,24 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+float4 load_subpasses(thread const texture2d<float> uInput, thread float4& gl_FragCoord)
+{
+    return uInput.read(uint2(gl_FragCoord.xy), 0);
+}
+
+fragment main0_out main0(texture2d<float> uSubpass0 [[texture(0)]], texture2d<float> uSubpass1 [[texture(1)]], float4 gl_FragCoord [[position]])
+{
+    main0_out out = {};
+    out.FragColor = uSubpass0.read(uint2(gl_FragCoord.xy), 0) + load_subpasses(uSubpass1, gl_FragCoord);
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/sample-depth-separate-image-sampler.frag b/reference/shaders-msl/frag/sample-depth-separate-image-sampler.frag
new file mode 100644
index 0000000000..a9c0f8b41b
--- /dev/null
+++ b/reference/shaders-msl/frag/sample-depth-separate-image-sampler.frag
@@ -0,0 +1,29 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float FragColor [[color(0)]];
+};
+
+float sample_depth_from_function(thread const depth2d<float> uT, thread const sampler uS)
+{
+    return uT.sample_compare(uS, float3(0.5).xy, float3(0.5).z);
+}
+
+float sample_color_from_function(thread const texture2d<float> uT, thread const sampler uS)
+{
+    return uT.sample(uS, float2(0.5)).x;
+}
+
+fragment main0_out main0(depth2d<float> uDepth [[texture(0)]], texture2d<float> uColor [[texture(1)]], sampler uSamplerShadow [[sampler(0)]], sampler uSampler [[sampler(1)]])
+{
+    main0_out out = {};
+    out.FragColor = sample_depth_from_function(uDepth, uSamplerShadow) + sample_color_from_function(uColor, uSampler);
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/sample-mask.frag b/reference/shaders-msl/frag/sample-mask.frag
new file mode 100644
index 0000000000..6a282395d6
--- /dev/null
+++ b/reference/shaders-msl/frag/sample-mask.frag
@@ -0,0 +1,19 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+    uint gl_SampleMask [[sample_mask]];
+};
+
+fragment main0_out main0()
+{
+    main0_out out = {};
+    out.FragColor = float4(1.0);
+    out.gl_SampleMask = 0;
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/spec-constant-block-size.frag b/reference/shaders-msl/frag/spec-constant-block-size.frag
new file mode 100644
index 0000000000..4237d941fe
--- /dev/null
+++ b/reference/shaders-msl/frag/spec-constant-block-size.frag
@@ -0,0 +1,27 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SpecConstArray
+{
+    float4 samples[2];
+};
+
+struct main0_in
+{
+    int Index [[user(locn0)]];
+};
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], constant SpecConstArray& _15 [[buffer(0)]])
+{
+    main0_out out = {};
+    out.FragColor = _15.samples[in.Index];
+    return out;
+}
+
diff --git a/reference/shaders-msl/vert/functions.vert b/reference/shaders-msl/vert/functions.vert
index 8ec2484c3e..6e07667b69 100644
--- a/reference/shaders-msl/vert/functions.vert
+++ b/reference/shaders-msl/vert/functions.vert
@@ -75,31 +75,31 @@ inline float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b
 float4x4 spvInverse4x4(float4x4 m)
 {
     float4x4 adj;	// The adjoint matrix (inverse after dividing by determinant)
-    
+
     // Create the transpose of the cofactors, as the classical adjoint of the matrix.
     adj[0][0] =  spvDet3x3(m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], m[3][3]);
     adj[0][1] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], m[3][3]);
     adj[0][2] =  spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[3][1], m[3][2], m[3][3]);
     adj[0][3] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3]);
-    
+
     adj[1][0] = -spvDet3x3(m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], m[3][3]);
     adj[1][1] =  spvDet3x3(m[0][0], m[0][2], m[0][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], m[3][3]);
     adj[1][2] = -spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[3][0], m[3][2], m[3][3]);
     adj[1][3] =  spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3]);
-    
+
     adj[2][0] =  spvDet3x3(m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], m[3][3]);
     adj[2][1] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], m[3][3]);
     adj[2][2] =  spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[3][0], m[3][1], m[3][3]);
     adj[2][3] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3]);
-    
+
     adj[3][0] = -spvDet3x3(m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], m[3][2]);
     adj[3][1] =  spvDet3x3(m[0][0], m[0][1], m[0][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], m[3][2]);
     adj[3][2] = -spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[3][0], m[3][1], m[3][2]);
     adj[3][3] =  spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2]);
-    
+
     // Calculate the determinant as a combination of the cofactors of the first row.
     float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]) + (adj[0][3] * m[3][0]);
-    
+
     // Divide the classical adjoint matrix by the determinant.
     // If determinant is zero, matrix is not invertable, so leave it unchanged.
     return (det != 0.0f) ? (adj * (1.0f / det)) : m;
diff --git a/reference/shaders-msl/vert/packed_matrix.vert b/reference/shaders-msl/vert/packed_matrix.vert
new file mode 100644
index 0000000000..5d025c4cde
--- /dev/null
+++ b/reference/shaders-msl/vert/packed_matrix.vert
@@ -0,0 +1,56 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+typedef float3x4 packed_float4x3;
+
+struct _15
+{
+    packed_float4x3 _m0;
+    packed_float4x3 _m1;
+};
+
+struct _42
+{
+    float4x4 _m0;
+    float4x4 _m1;
+    float _m2;
+    char pad3[12];
+    packed_float3 _m3;
+    float _m4;
+    packed_float3 _m5;
+    float _m6;
+    float _m7;
+    float _m8;
+    float2 _m9;
+};
+
+struct main0_in
+{
+    float4 m_25 [[attribute(0)]];
+};
+
+struct main0_out
+{
+    float3 m_72 [[user(locn0)]];
+    float4 gl_Position [[position]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]], constant _15& _17 [[buffer(0)]], constant _42& _44 [[buffer(1)]])
+{
+    main0_out out = {};
+    float3 _13;
+    do
+    {
+        _13 = normalize(float4(in.m_25.xyz, 0.0) * _17._m1);
+        break;
+    } while (false);
+    float4 _39 = _44._m0 * float4(_44._m3 + (in.m_25.xyz * (_44._m6 + _44._m7)), 1.0);
+    out.m_72 = _13;
+    float4 _74 = _39;
+    _74.y = -_39.y;
+    out.gl_Position = _74;
+    return out;
+}
+
diff --git a/reference/shaders-msl/vert/return-array.vert b/reference/shaders-msl/vert/return-array.vert
new file mode 100644
index 0000000000..c3857b909a
--- /dev/null
+++ b/reference/shaders-msl/vert/return-array.vert
@@ -0,0 +1,58 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+constant float4 _20[2] = {float4(10.0), float4(20.0)};
+
+struct main0_in
+{
+    float4 vInput1 [[attribute(1)]];
+    float4 vInput0 [[attribute(0)]];
+};
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+};
+
+// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment.
+template<typename T, uint N>
+void spvArrayCopy(thread T (&dst)[N], thread const T (&src)[N])
+{
+    for (uint i = 0; i < N; dst[i] = src[i], i++);
+}
+
+// An overload for constant arrays.
+template<typename T, uint N>
+void spvArrayCopyConstant(thread T (&dst)[N], constant T (&src)[N])
+{
+    for (uint i = 0; i < N; dst[i] = src[i], i++);
+}
+
+void test(thread float4 (&SPIRV_Cross_return_value)[2])
+{
+    spvArrayCopyConstant(SPIRV_Cross_return_value, _20);
+}
+
+void test2(thread float4 (&SPIRV_Cross_return_value)[2], thread float4& vInput0, thread float4& vInput1)
+{
+    float4 foobar[2];
+    foobar[0] = vInput0;
+    foobar[1] = vInput1;
+    spvArrayCopy(SPIRV_Cross_return_value, foobar);
+}
+
+vertex main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    float4 _42[2];
+    test(_42);
+    float4 _44[2];
+    test2(_44, in.vInput0, in.vInput1);
+    out.gl_Position = _42[0] + _44[1];
+    return out;
+}
+
diff --git a/reference/shaders/asm/frag/frem.asm.frag b/reference/shaders/asm/frag/frem.asm.frag
new file mode 100644
index 0000000000..1095ab04f2
--- /dev/null
+++ b/reference/shaders/asm/frag/frem.asm.frag
@@ -0,0 +1,13 @@
+#version 310 es
+precision mediump float;
+precision highp int;
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) in vec4 vA;
+layout(location = 1) in vec4 vB;
+
+void main()
+{
+    FragColor = vA - vB * trunc(vA / vB);
+}
+
diff --git a/reference/shaders/asm/frag/function-overload-alias.asm.frag b/reference/shaders/asm/frag/function-overload-alias.asm.frag
new file mode 100644
index 0000000000..676f986801
--- /dev/null
+++ b/reference/shaders/asm/frag/function-overload-alias.asm.frag
@@ -0,0 +1,39 @@
+#version 310 es
+precision mediump float;
+precision highp int;
+
+layout(location = 0) out vec4 FragColor;
+
+vec4 foo(vec4 foo_1)
+{
+    return foo_1 + vec4(1.0);
+}
+
+vec4 foo(vec3 foo_1)
+{
+    return foo_1.xyzz + vec4(1.0);
+}
+
+vec4 foo_1(vec4 foo_2)
+{
+    return foo_2 + vec4(2.0);
+}
+
+vec4 foo(vec2 foo_2)
+{
+    return foo_2.xyxy + vec4(2.0);
+}
+
+void main()
+{
+    highp vec4 foo_3 = vec4(1.0);
+    vec4 foo_2 = foo(foo_3);
+    highp vec3 foo_5 = vec3(1.0);
+    vec4 foo_4 = foo(foo_5);
+    highp vec4 foo_7 = vec4(1.0);
+    vec4 foo_6 = foo_1(foo_7);
+    highp vec2 foo_9 = vec2(1.0);
+    vec4 foo_8 = foo(foo_9);
+    FragColor = ((foo_2 + foo_4) + foo_6) + foo_8;
+}
+
diff --git a/reference/shaders/asm/frag/image-fetch-no-sampler.asm.vk.frag b/reference/shaders/asm/frag/image-fetch-no-sampler.asm.vk.frag
new file mode 100644
index 0000000000..60bb78aa5c
--- /dev/null
+++ b/reference/shaders/asm/frag/image-fetch-no-sampler.asm.vk.frag
@@ -0,0 +1,38 @@
+#version 450
+
+uniform sampler2D SPIRV_Cross_CombinedparamSPIRV_Cross_DummySampler;
+uniform sampler2D SPIRV_Cross_CombinedSampledImageSPIRV_Cross_DummySampler;
+uniform sampler2D SPIRV_Cross_CombinedparamSampler;
+uniform sampler2D SPIRV_Cross_CombinedSampledImageSampler;
+
+layout(location = 0) out vec4 _entryPointOutput;
+
+vec4 sample_fetch(ivec3 UV, sampler2D SPIRV_Cross_CombinedtexSPIRV_Cross_DummySampler)
+{
+    return texelFetch(SPIRV_Cross_CombinedtexSPIRV_Cross_DummySampler, UV.xy, UV.z);
+}
+
+vec4 sample_sampler(vec2 UV, sampler2D SPIRV_Cross_CombinedtexSampler)
+{
+    return texture(SPIRV_Cross_CombinedtexSampler, UV);
+}
+
+vec4 _main(vec4 xIn)
+{
+    ivec3 coord = ivec3(int(xIn.x * 1280.0), int(xIn.y * 720.0), 0);
+    ivec3 param = coord;
+    vec4 value = sample_fetch(param, SPIRV_Cross_CombinedparamSPIRV_Cross_DummySampler);
+    value += texelFetch(SPIRV_Cross_CombinedSampledImageSPIRV_Cross_DummySampler, coord.xy, coord.z);
+    vec2 param_1 = xIn.xy;
+    value += sample_sampler(param_1, SPIRV_Cross_CombinedparamSampler);
+    value += texture(SPIRV_Cross_CombinedSampledImageSampler, xIn.xy);
+    return value;
+}
+
+void main()
+{
+    vec4 xIn = gl_FragCoord;
+    vec4 param = xIn;
+    _entryPointOutput = _main(param);
+}
+
diff --git a/reference/shaders/asm/frag/image-fetch-no-sampler.asm.vk.frag.vk b/reference/shaders/asm/frag/image-fetch-no-sampler.asm.vk.frag.vk
new file mode 100644
index 0000000000..3682eaee62
--- /dev/null
+++ b/reference/shaders/asm/frag/image-fetch-no-sampler.asm.vk.frag.vk
@@ -0,0 +1,37 @@
+#version 450
+
+layout(set = 0, binding = 0) uniform sampler Sampler;
+layout(set = 0, binding = 0) uniform texture2D SampledImage;
+uniform sampler SPIRV_Cross_DummySampler;
+
+layout(location = 0) out vec4 _entryPointOutput;
+
+vec4 sample_fetch(texture2D tex, ivec3 UV)
+{
+    return texelFetch(sampler2D(tex, SPIRV_Cross_DummySampler), UV.xy, UV.z);
+}
+
+vec4 sample_sampler(texture2D tex, vec2 UV)
+{
+    return texture(sampler2D(tex, Sampler), UV);
+}
+
+vec4 _main(vec4 xIn)
+{
+    ivec3 coord = ivec3(int(xIn.x * 1280.0), int(xIn.y * 720.0), 0);
+    ivec3 param = coord;
+    vec4 value = sample_fetch(SampledImage, param);
+    value += texelFetch(sampler2D(SampledImage, SPIRV_Cross_DummySampler), coord.xy, coord.z);
+    vec2 param_1 = xIn.xy;
+    value += sample_sampler(SampledImage, param_1);
+    value += texture(sampler2D(SampledImage, Sampler), xIn.xy);
+    return value;
+}
+
+void main()
+{
+    vec4 xIn = gl_FragCoord;
+    vec4 param = xIn;
+    _entryPointOutput = _main(param);
+}
+
diff --git a/reference/shaders/asm/vert/invariant-block.asm.vert b/reference/shaders/asm/vert/invariant-block.asm.vert
new file mode 100644
index 0000000000..9b2f05a8bd
--- /dev/null
+++ b/reference/shaders/asm/vert/invariant-block.asm.vert
@@ -0,0 +1,9 @@
+#version 450
+
+invariant gl_Position;
+
+void main()
+{
+    gl_Position = vec4(1.0);
+}
+
diff --git a/reference/shaders/asm/vert/invariant-block.sso.asm.vert b/reference/shaders/asm/vert/invariant-block.sso.asm.vert
new file mode 100644
index 0000000000..eb88694196
--- /dev/null
+++ b/reference/shaders/asm/vert/invariant-block.sso.asm.vert
@@ -0,0 +1,17 @@
+#version 450
+
+out gl_PerVertex
+{
+    vec4 gl_Position;
+    float gl_PointSize;
+    float gl_ClipDistance[1];
+    float gl_CullDistance[1];
+};
+
+invariant gl_Position;
+
+void main()
+{
+    gl_Position = vec4(1.0);
+}
+
diff --git a/reference/shaders/asm/vert/invariant.asm.vert b/reference/shaders/asm/vert/invariant.asm.vert
new file mode 100644
index 0000000000..a151c1d042
--- /dev/null
+++ b/reference/shaders/asm/vert/invariant.asm.vert
@@ -0,0 +1,14 @@
+#version 450
+
+invariant gl_Position;
+
+vec4 _main()
+{
+    return vec4(1.0);
+}
+
+void main()
+{
+    gl_Position = _main();
+}
+
diff --git a/reference/shaders/asm/vert/invariant.sso.asm.vert b/reference/shaders/asm/vert/invariant.sso.asm.vert
new file mode 100644
index 0000000000..a52c7e5a08
--- /dev/null
+++ b/reference/shaders/asm/vert/invariant.sso.asm.vert
@@ -0,0 +1,19 @@
+#version 450
+
+out gl_PerVertex
+{
+    vec4 gl_Position;
+};
+
+invariant gl_Position;
+
+vec4 _main()
+{
+    return vec4(1.0);
+}
+
+void main()
+{
+    gl_Position = _main();
+}
+
diff --git a/reference/shaders/desktop-only/frag/clip-cull-distance.desktop.frag b/reference/shaders/desktop-only/frag/clip-cull-distance.desktop.frag
new file mode 100644
index 0000000000..3cc3205509
--- /dev/null
+++ b/reference/shaders/desktop-only/frag/clip-cull-distance.desktop.frag
@@ -0,0 +1,12 @@
+#version 450
+
+in float gl_ClipDistance[4];
+in float gl_CullDistance[3];
+
+layout(location = 0) out float FragColor;
+
+void main()
+{
+    FragColor = gl_ClipDistance[0] + gl_CullDistance[0];
+}
+
diff --git a/reference/shaders/desktop-only/frag/inf-nan-constant-double.desktop.frag b/reference/shaders/desktop-only/frag/inf-nan-constant-double.desktop.frag
new file mode 100644
index 0000000000..b3df0c2313
--- /dev/null
+++ b/reference/shaders/desktop-only/frag/inf-nan-constant-double.desktop.frag
@@ -0,0 +1,11 @@
+#version 450
+#extension GL_ARB_gpu_shader_int64 : require
+
+layout(location = 0) out vec3 FragColor;
+layout(location = 0) flat in double vTmp;
+
+void main()
+{
+    FragColor = vec3(dvec3(uint64BitsToDouble(0x7ff0000000000000ul), uint64BitsToDouble(0xfff0000000000000ul), uint64BitsToDouble(0xfff8000000000000ul)) + dvec3(vTmp));
+}
+
diff --git a/reference/shaders/desktop-only/vert/clip-cull-distance.desktop.sso.vert b/reference/shaders/desktop-only/vert/clip-cull-distance.desktop.sso.vert
new file mode 100644
index 0000000000..a7c5d761c9
--- /dev/null
+++ b/reference/shaders/desktop-only/vert/clip-cull-distance.desktop.sso.vert
@@ -0,0 +1,20 @@
+#version 450
+
+out gl_PerVertex
+{
+    vec4 gl_Position;
+    float gl_PointSize;
+    float gl_ClipDistance[4];
+    float gl_CullDistance[3];
+};
+
+void main()
+{
+    gl_Position = vec4(1.0);
+    gl_ClipDistance[0] = 0.0;
+    gl_ClipDistance[1] = 0.0;
+    gl_ClipDistance[2] = 0.0;
+    gl_ClipDistance[3] = 0.0;
+    gl_CullDistance[1] = 4.0;
+}
+
diff --git a/reference/shaders/desktop-only/vert/clip-cull-distance.desktop.vert b/reference/shaders/desktop-only/vert/clip-cull-distance.desktop.vert
index 566809db23..2f3d49f55d 100644
--- a/reference/shaders/desktop-only/vert/clip-cull-distance.desktop.vert
+++ b/reference/shaders/desktop-only/vert/clip-cull-distance.desktop.vert
@@ -1,11 +1,15 @@
 #version 450
 
+out float gl_ClipDistance[4];
+out float gl_CullDistance[3];
+
 void main()
 {
-    gl_Position = vec4(10.0);
-    gl_ClipDistance[0] = 1.0;
-    gl_ClipDistance[1] = 4.0;
-    gl_CullDistance[0] = 4.0;
-    gl_CullDistance[1] = 9.0;
+    gl_Position = vec4(1.0);
+    gl_ClipDistance[0] = 0.0;
+    gl_ClipDistance[1] = 0.0;
+    gl_ClipDistance[2] = 0.0;
+    gl_ClipDistance[3] = 0.0;
+    gl_CullDistance[1] = 4.0;
 }
 
diff --git a/reference/shaders/frag/front-facing.frag b/reference/shaders/frag/front-facing.frag
new file mode 100644
index 0000000000..cc9aecc8ba
--- /dev/null
+++ b/reference/shaders/frag/front-facing.frag
@@ -0,0 +1,20 @@
+#version 310 es
+precision mediump float;
+precision highp int;
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) in vec4 vA;
+layout(location = 1) in vec4 vB;
+
+void main()
+{
+    if (gl_FrontFacing)
+    {
+        FragColor = vA;
+    }
+    else
+    {
+        FragColor = vB;
+    }
+}
+
diff --git a/reference/shaders/frag/hoisted-temporary-use-continue-block-as-value.frag b/reference/shaders/frag/hoisted-temporary-use-continue-block-as-value.frag
new file mode 100644
index 0000000000..cd4f7d4d28
--- /dev/null
+++ b/reference/shaders/frag/hoisted-temporary-use-continue-block-as-value.frag
@@ -0,0 +1,31 @@
+#version 310 es
+precision mediump float;
+precision highp int;
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) flat in mediump int vA;
+layout(location = 1) flat in mediump int vB;
+
+void main()
+{
+    FragColor = vec4(0.0);
+    mediump int k = 0;
+    mediump int j;
+    for (mediump int i = 0; i < vA; i += j)
+    {
+        if ((vA + i) == 20)
+        {
+            k = 50;
+        }
+        else
+        {
+            if ((vB + i) == 40)
+            {
+                k = 60;
+            }
+        }
+        j = k + 10;
+        FragColor += vec4(1.0);
+    }
+}
+
diff --git a/reference/shaders/frag/inf-nan-constant.frag b/reference/shaders/frag/inf-nan-constant.frag
new file mode 100644
index 0000000000..d683fe02e8
--- /dev/null
+++ b/reference/shaders/frag/inf-nan-constant.frag
@@ -0,0 +1,11 @@
+#version 310 es
+precision mediump float;
+precision highp int;
+
+layout(location = 0) out highp vec3 FragColor;
+
+void main()
+{
+    FragColor = vec3(uintBitsToFloat(0x7f800000u), uintBitsToFloat(0xff800000u), uintBitsToFloat(0xffc00000u));
+}
+
diff --git a/reference/shaders/vert/return-array.vert b/reference/shaders/vert/return-array.vert
new file mode 100644
index 0000000000..20bb440ece
--- /dev/null
+++ b/reference/shaders/vert/return-array.vert
@@ -0,0 +1,23 @@
+#version 310 es
+
+layout(location = 0) in vec4 vInput0;
+layout(location = 1) in vec4 vInput1;
+
+vec4[2] test()
+{
+    return vec4[](vec4(10.0), vec4(20.0));
+}
+
+vec4[2] test2()
+{
+    vec4 foobar[2];
+    foobar[0] = vInput0;
+    foobar[1] = vInput1;
+    return foobar;
+}
+
+void main()
+{
+    gl_Position = test()[0] + test2()[1];
+}
+
diff --git a/reference/shaders/vulkan/frag/spec-constant-block-size.vk.frag b/reference/shaders/vulkan/frag/spec-constant-block-size.vk.frag
new file mode 100644
index 0000000000..e8c3c4a36b
--- /dev/null
+++ b/reference/shaders/vulkan/frag/spec-constant-block-size.vk.frag
@@ -0,0 +1,17 @@
+#version 310 es
+precision mediump float;
+precision highp int;
+
+layout(binding = 0, std140) uniform SpecConstArray
+{
+    vec4 samples[2];
+} _15;
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) flat in mediump int Index;
+
+void main()
+{
+    FragColor = _15.samples[Index];
+}
+
diff --git a/reference/shaders/vulkan/frag/spec-constant-block-size.vk.frag.vk b/reference/shaders/vulkan/frag/spec-constant-block-size.vk.frag.vk
new file mode 100644
index 0000000000..133761a83d
--- /dev/null
+++ b/reference/shaders/vulkan/frag/spec-constant-block-size.vk.frag.vk
@@ -0,0 +1,19 @@
+#version 310 es
+precision mediump float;
+precision highp int;
+
+layout(constant_id = 10) const int Value = 2;
+
+layout(set = 0, binding = 0, std140) uniform SpecConstArray
+{
+    vec4 samples[Value];
+} _15;
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) flat in mediump int Index;
+
+void main()
+{
+    FragColor = _15.samples[Index];
+}
+
diff --git a/shaders-hlsl/asm/frag/frem.asm.frag b/shaders-hlsl/asm/frag/frem.asm.frag
new file mode 100644
index 0000000000..8350c75c04
--- /dev/null
+++ b/shaders-hlsl/asm/frag/frem.asm.frag
@@ -0,0 +1,41 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 3
+; Bound: 16
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor %vA %vB
+               OpExecutionMode %main OriginUpperLeft
+               OpSource ESSL 310
+               OpName %main "main"
+               OpName %FragColor "FragColor"
+               OpName %vA "vA"
+               OpName %vB "vB"
+               OpDecorate %FragColor RelaxedPrecision
+               OpDecorate %FragColor Location 0
+               OpDecorate %vA RelaxedPrecision
+               OpDecorate %vA Location 0
+               OpDecorate %12 RelaxedPrecision
+               OpDecorate %vB RelaxedPrecision
+               OpDecorate %vB Location 1
+               OpDecorate %14 RelaxedPrecision
+               OpDecorate %15 RelaxedPrecision
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+         %vA = OpVariable %_ptr_Input_v4float Input
+         %vB = OpVariable %_ptr_Input_v4float Input
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %12 = OpLoad %v4float %vA
+         %14 = OpLoad %v4float %vB
+         %15 = OpFRem %v4float %12 %14
+               OpStore %FragColor %15
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-hlsl/asm/frag/function-overload-alias.asm.frag b/shaders-hlsl/asm/frag/function-overload-alias.asm.frag
new file mode 100644
index 0000000000..397aa98ce2
--- /dev/null
+++ b/shaders-hlsl/asm/frag/function-overload-alias.asm.frag
@@ -0,0 +1,153 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 3
+; Bound: 76
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource ESSL 310
+               OpName %main "main"
+               OpName %foobar_vf4_ "foo"
+               OpName %a "foo"
+               OpName %foobar_vf3_ "foo"
+               OpName %a_0 "foo"
+               OpName %foobaz_vf4_ "foo"
+               OpName %a_1 "foo"
+               OpName %foobaz_vf2_ "foo"
+               OpName %a_2 "foo"
+               OpName %a_3 "foo"
+               OpName %param "foo"
+               OpName %b "foo"
+               OpName %param_0 "foo"
+               OpName %c "foo"
+               OpName %param_1 "foo"
+               OpName %d "foo"
+               OpName %param_2 "foo"
+               OpName %FragColor "FragColor"
+               OpDecorate %foobar_vf4_ RelaxedPrecision
+               OpDecorate %a RelaxedPrecision
+               OpDecorate %foobar_vf3_ RelaxedPrecision
+               OpDecorate %a_0 RelaxedPrecision
+               OpDecorate %foobaz_vf4_ RelaxedPrecision
+               OpDecorate %a_1 RelaxedPrecision
+               OpDecorate %foobaz_vf2_ RelaxedPrecision
+               OpDecorate %a_2 RelaxedPrecision
+               OpDecorate %28 RelaxedPrecision
+               OpDecorate %30 RelaxedPrecision
+               OpDecorate %31 RelaxedPrecision
+               OpDecorate %34 RelaxedPrecision
+               OpDecorate %35 RelaxedPrecision
+               OpDecorate %36 RelaxedPrecision
+               OpDecorate %37 RelaxedPrecision
+               OpDecorate %40 RelaxedPrecision
+               OpDecorate %42 RelaxedPrecision
+               OpDecorate %43 RelaxedPrecision
+               OpDecorate %46 RelaxedPrecision
+               OpDecorate %47 RelaxedPrecision
+               OpDecorate %48 RelaxedPrecision
+               OpDecorate %49 RelaxedPrecision
+               OpDecorate %a_3 RelaxedPrecision
+               OpDecorate %55 RelaxedPrecision
+               OpDecorate %b RelaxedPrecision
+               OpDecorate %59 RelaxedPrecision
+               OpDecorate %c RelaxedPrecision
+               OpDecorate %62 RelaxedPrecision
+               OpDecorate %d RelaxedPrecision
+               OpDecorate %66 RelaxedPrecision
+               OpDecorate %FragColor RelaxedPrecision
+               OpDecorate %FragColor Location 0
+               OpDecorate %69 RelaxedPrecision
+               OpDecorate %70 RelaxedPrecision
+               OpDecorate %71 RelaxedPrecision
+               OpDecorate %72 RelaxedPrecision
+               OpDecorate %73 RelaxedPrecision
+               OpDecorate %74 RelaxedPrecision
+               OpDecorate %75 RelaxedPrecision
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Function_v4float = OpTypePointer Function %v4float
+          %9 = OpTypeFunction %v4float %_ptr_Function_v4float
+    %v3float = OpTypeVector %float 3
+%_ptr_Function_v3float = OpTypePointer Function %v3float
+         %15 = OpTypeFunction %v4float %_ptr_Function_v3float
+    %v2float = OpTypeVector %float 2
+%_ptr_Function_v2float = OpTypePointer Function %v2float
+         %24 = OpTypeFunction %v4float %_ptr_Function_v2float
+    %float_1 = OpConstant %float 1
+    %float_2 = OpConstant %float 2
+         %53 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+         %57 = OpConstantComposite %v3float %float_1 %float_1 %float_1
+         %64 = OpConstantComposite %v2float %float_1 %float_1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+        %a_3 = OpVariable %_ptr_Function_v4float Function
+      %param = OpVariable %_ptr_Function_v4float Function
+          %b = OpVariable %_ptr_Function_v4float Function
+    %param_0 = OpVariable %_ptr_Function_v3float Function
+          %c = OpVariable %_ptr_Function_v4float Function
+    %param_1 = OpVariable %_ptr_Function_v4float Function
+          %d = OpVariable %_ptr_Function_v4float Function
+    %param_2 = OpVariable %_ptr_Function_v2float Function
+               OpStore %param %53
+         %55 = OpFunctionCall %v4float %foobar_vf4_ %param
+               OpStore %a_3 %55
+               OpStore %param_0 %57
+         %59 = OpFunctionCall %v4float %foobar_vf3_ %param_0
+               OpStore %b %59
+               OpStore %param_1 %53
+         %62 = OpFunctionCall %v4float %foobaz_vf4_ %param_1
+               OpStore %c %62
+               OpStore %param_2 %64
+         %66 = OpFunctionCall %v4float %foobaz_vf2_ %param_2
+               OpStore %d %66
+         %69 = OpLoad %v4float %a_3
+         %70 = OpLoad %v4float %b
+         %71 = OpFAdd %v4float %69 %70
+         %72 = OpLoad %v4float %c
+         %73 = OpFAdd %v4float %71 %72
+         %74 = OpLoad %v4float %d
+         %75 = OpFAdd %v4float %73 %74
+               OpStore %FragColor %75
+               OpReturn
+               OpFunctionEnd
+%foobar_vf4_ = OpFunction %v4float None %9
+          %a = OpFunctionParameter %_ptr_Function_v4float
+         %12 = OpLabel
+         %28 = OpLoad %v4float %a
+         %30 = OpCompositeConstruct %v4float %float_1 %float_1 %float_1 %float_1
+         %31 = OpFAdd %v4float %28 %30
+               OpReturnValue %31
+               OpFunctionEnd
+%foobar_vf3_ = OpFunction %v4float None %15
+        %a_0 = OpFunctionParameter %_ptr_Function_v3float
+         %18 = OpLabel
+         %34 = OpLoad %v3float %a_0
+         %35 = OpVectorShuffle %v4float %34 %34 0 1 2 2
+         %36 = OpCompositeConstruct %v4float %float_1 %float_1 %float_1 %float_1
+         %37 = OpFAdd %v4float %35 %36
+               OpReturnValue %37
+               OpFunctionEnd
+%foobaz_vf4_ = OpFunction %v4float None %9
+        %a_1 = OpFunctionParameter %_ptr_Function_v4float
+         %21 = OpLabel
+         %40 = OpLoad %v4float %a_1
+         %42 = OpCompositeConstruct %v4float %float_2 %float_2 %float_2 %float_2
+         %43 = OpFAdd %v4float %40 %42
+               OpReturnValue %43
+               OpFunctionEnd
+%foobaz_vf2_ = OpFunction %v4float None %24
+        %a_2 = OpFunctionParameter %_ptr_Function_v2float
+         %27 = OpLabel
+         %46 = OpLoad %v2float %a_2
+         %47 = OpVectorShuffle %v4float %46 %46 0 1 0 1
+         %48 = OpCompositeConstruct %v4float %float_2 %float_2 %float_2 %float_2
+         %49 = OpFAdd %v4float %47 %48
+               OpReturnValue %49
+               OpFunctionEnd
diff --git a/shaders-hlsl/comp/inverse.comp b/shaders-hlsl/comp/inverse.comp
new file mode 100644
index 0000000000..03b06d6464
--- /dev/null
+++ b/shaders-hlsl/comp/inverse.comp
@@ -0,0 +1,23 @@
+#version 450
+layout(local_size_x = 1) in;
+
+layout(std430, binding = 0) writeonly buffer MatrixOut
+{
+	mat2 m2out;
+	mat3 m3out;
+	mat4 m4out;
+};
+
+layout(std430, binding = 1) readonly buffer MatrixIn
+{
+	mat2 m2in;
+	mat3 m3in;
+	mat4 m4in;
+};
+
+void main()
+{
+	m2out = inverse(m2in);
+	m3out = inverse(m3in);
+	m4out = inverse(m4in);
+}
diff --git a/shaders-hlsl/comp/num-workgroups-alone.comp b/shaders-hlsl/comp/num-workgroups-alone.comp
new file mode 100644
index 0000000000..10b5817ce2
--- /dev/null
+++ b/shaders-hlsl/comp/num-workgroups-alone.comp
@@ -0,0 +1,13 @@
+#version 310 es
+layout(local_size_x = 1) in;
+
+layout(std430, binding = 0) buffer SSBO
+{
+	uvec3 outdata;
+};
+
+void main()
+{
+	outdata = gl_NumWorkGroups;
+}
+
diff --git a/shaders-hlsl/comp/num-workgroups-with-builtins.comp b/shaders-hlsl/comp/num-workgroups-with-builtins.comp
new file mode 100644
index 0000000000..d19a06c104
--- /dev/null
+++ b/shaders-hlsl/comp/num-workgroups-with-builtins.comp
@@ -0,0 +1,13 @@
+#version 310 es
+layout(local_size_x = 1) in;
+
+layout(std430, binding = 0) buffer SSBO
+{
+	uvec3 outdata;
+};
+
+void main()
+{
+	outdata = gl_NumWorkGroups + gl_WorkGroupID;
+}
+
diff --git a/shaders-hlsl/frag/clip-cull-distance.frag b/shaders-hlsl/frag/clip-cull-distance.frag
new file mode 100644
index 0000000000..625a7dab31
--- /dev/null
+++ b/shaders-hlsl/frag/clip-cull-distance.frag
@@ -0,0 +1,12 @@
+#version 450
+
+in float gl_ClipDistance[2];
+in float gl_CullDistance[1];
+
+layout(location = 0) out float FragColor;
+
+void main()
+{
+	FragColor = gl_ClipDistance[0] + gl_CullDistance[0] + gl_ClipDistance[1];
+}
+
diff --git a/shaders-hlsl/frag/front-facing.frag b/shaders-hlsl/frag/front-facing.frag
new file mode 100644
index 0000000000..90ca1abf4e
--- /dev/null
+++ b/shaders-hlsl/frag/front-facing.frag
@@ -0,0 +1,14 @@
+#version 310 es
+precision mediump float;
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) in vec4 vA;
+layout(location = 1) in vec4 vB;
+
+void main()
+{
+	if (gl_FrontFacing)
+		FragColor = vA;
+	else
+		FragColor = vB;
+}
diff --git a/shaders-hlsl/frag/inf-nan-constant.frag b/shaders-hlsl/frag/inf-nan-constant.frag
new file mode 100644
index 0000000000..78b93c31e0
--- /dev/null
+++ b/shaders-hlsl/frag/inf-nan-constant.frag
@@ -0,0 +1,14 @@
+#version 310 es
+precision highp float;
+
+const float posinf = 1.0 / 0.0;
+const float neginf = -1.0 / 0.0;
+const float nan = 0.0 / 0.0;
+
+layout(location = 0) out vec3 FragColor;
+
+void main()
+{
+	FragColor = vec3(posinf, neginf, nan);
+}
+
diff --git a/shaders-hlsl/frag/input-attachment-ms.frag b/shaders-hlsl/frag/input-attachment-ms.frag
new file mode 100644
index 0000000000..b3d44c9436
--- /dev/null
+++ b/shaders-hlsl/frag/input-attachment-ms.frag
@@ -0,0 +1,15 @@
+#version 450
+
+layout(input_attachment_index = 0, set = 0, binding = 0) uniform subpassInputMS uSubpass0;
+layout(input_attachment_index = 1, set = 0, binding = 1) uniform subpassInputMS uSubpass1;
+layout(location = 0) out vec4 FragColor;
+
+vec4 load_subpasses(mediump subpassInputMS uInput)
+{
+	return subpassLoad(uInput, gl_SampleID);
+}
+
+void main()
+{
+    FragColor = subpassLoad(uSubpass0, 1) + subpassLoad(uSubpass1, 2) + load_subpasses(uSubpass0);
+}
diff --git a/shaders-hlsl/frag/input-attachment.frag b/shaders-hlsl/frag/input-attachment.frag
new file mode 100644
index 0000000000..877d0525a4
--- /dev/null
+++ b/shaders-hlsl/frag/input-attachment.frag
@@ -0,0 +1,16 @@
+#version 310 es
+precision mediump float;
+
+layout(input_attachment_index = 0, set = 0, binding = 0) uniform mediump subpassInput uSubpass0;
+layout(input_attachment_index = 1, set = 0, binding = 1) uniform mediump subpassInput uSubpass1;
+layout(location = 0) out vec4 FragColor;
+
+vec4 load_subpasses(mediump subpassInput uInput)
+{
+	return subpassLoad(uInput);
+}
+
+void main()
+{
+    FragColor = subpassLoad(uSubpass0) + load_subpasses(uSubpass1);
+}
diff --git a/shaders-hlsl/frag/point-coord-compat.frag b/shaders-hlsl/frag/point-coord-compat.frag
new file mode 100644
index 0000000000..dc7d6b55fb
--- /dev/null
+++ b/shaders-hlsl/frag/point-coord-compat.frag
@@ -0,0 +1,10 @@
+#version 310 es
+precision mediump float;
+
+layout(location = 0) out vec2 FragColor;
+
+void main()
+{
+	FragColor = gl_PointCoord;
+}
+
diff --git a/shaders-hlsl/frag/spec-constant-block-size.frag b/shaders-hlsl/frag/spec-constant-block-size.frag
new file mode 100644
index 0000000000..8d2b1f326f
--- /dev/null
+++ b/shaders-hlsl/frag/spec-constant-block-size.frag
@@ -0,0 +1,17 @@
+#version 310 es
+precision mediump float;
+
+layout(constant_id = 10) const int Value = 2;
+layout(binding = 0) uniform SpecConstArray
+{
+	vec4 samples[Value];
+};
+
+layout(location = 0) flat in int Index;
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+	FragColor = samples[Index];
+}
+
diff --git a/shaders-hlsl/vert/clip-cull-distance.vert b/shaders-hlsl/vert/clip-cull-distance.vert
new file mode 100644
index 0000000000..34f65fc18e
--- /dev/null
+++ b/shaders-hlsl/vert/clip-cull-distance.vert
@@ -0,0 +1,11 @@
+#version 450
+out float gl_ClipDistance[2];
+out float gl_CullDistance[1];
+
+void main()
+{
+	gl_Position = vec4(1.0);
+	gl_ClipDistance[0] = 0.0;
+	gl_ClipDistance[1] = 0.0;
+	gl_CullDistance[0] = 4.0;
+}
diff --git a/shaders-hlsl/vert/point-size-compat.vert b/shaders-hlsl/vert/point-size-compat.vert
index 64eff36315..ed86c764a6 100644
--- a/shaders-hlsl/vert/point-size-compat.vert
+++ b/shaders-hlsl/vert/point-size-compat.vert
@@ -3,6 +3,5 @@
 void main()
 {
 	gl_Position = vec4(1.0);
-	gl_PointSize = 10.0;
+	gl_PointSize = 1.0;
 }
-
diff --git a/shaders-hlsl/vert/return-array.vert b/shaders-hlsl/vert/return-array.vert
new file mode 100644
index 0000000000..708460114e
--- /dev/null
+++ b/shaders-hlsl/vert/return-array.vert
@@ -0,0 +1,22 @@
+#version 310 es
+
+layout(location = 0) in vec4 vInput0;
+layout(location = 1) in vec4 vInput1;
+
+vec4[2] test()
+{
+	return vec4[](vec4(10.0), vec4(20.0));
+}
+
+vec4[2] test2()
+{
+	vec4 foobar[2];
+	foobar[0] = vInput0;
+	foobar[1] = vInput1;
+	return foobar;
+}
+
+void main()
+{
+	gl_Position = test()[0] + test2()[1];
+}
diff --git a/shaders-msl/asm/frag/frem.asm.frag b/shaders-msl/asm/frag/frem.asm.frag
new file mode 100644
index 0000000000..8350c75c04
--- /dev/null
+++ b/shaders-msl/asm/frag/frem.asm.frag
@@ -0,0 +1,41 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 3
+; Bound: 16
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor %vA %vB
+               OpExecutionMode %main OriginUpperLeft
+               OpSource ESSL 310
+               OpName %main "main"
+               OpName %FragColor "FragColor"
+               OpName %vA "vA"
+               OpName %vB "vB"
+               OpDecorate %FragColor RelaxedPrecision
+               OpDecorate %FragColor Location 0
+               OpDecorate %vA RelaxedPrecision
+               OpDecorate %vA Location 0
+               OpDecorate %12 RelaxedPrecision
+               OpDecorate %vB RelaxedPrecision
+               OpDecorate %vB Location 1
+               OpDecorate %14 RelaxedPrecision
+               OpDecorate %15 RelaxedPrecision
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+         %vA = OpVariable %_ptr_Input_v4float Input
+         %vB = OpVariable %_ptr_Input_v4float Input
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %12 = OpLoad %v4float %vA
+         %14 = OpLoad %v4float %vB
+         %15 = OpFRem %v4float %12 %14
+               OpStore %FragColor %15
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl/asm/frag/function-overload-alias.asm.frag b/shaders-msl/asm/frag/function-overload-alias.asm.frag
new file mode 100644
index 0000000000..397aa98ce2
--- /dev/null
+++ b/shaders-msl/asm/frag/function-overload-alias.asm.frag
@@ -0,0 +1,153 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 3
+; Bound: 76
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource ESSL 310
+               OpName %main "main"
+               OpName %foobar_vf4_ "foo"
+               OpName %a "foo"
+               OpName %foobar_vf3_ "foo"
+               OpName %a_0 "foo"
+               OpName %foobaz_vf4_ "foo"
+               OpName %a_1 "foo"
+               OpName %foobaz_vf2_ "foo"
+               OpName %a_2 "foo"
+               OpName %a_3 "foo"
+               OpName %param "foo"
+               OpName %b "foo"
+               OpName %param_0 "foo"
+               OpName %c "foo"
+               OpName %param_1 "foo"
+               OpName %d "foo"
+               OpName %param_2 "foo"
+               OpName %FragColor "FragColor"
+               OpDecorate %foobar_vf4_ RelaxedPrecision
+               OpDecorate %a RelaxedPrecision
+               OpDecorate %foobar_vf3_ RelaxedPrecision
+               OpDecorate %a_0 RelaxedPrecision
+               OpDecorate %foobaz_vf4_ RelaxedPrecision
+               OpDecorate %a_1 RelaxedPrecision
+               OpDecorate %foobaz_vf2_ RelaxedPrecision
+               OpDecorate %a_2 RelaxedPrecision
+               OpDecorate %28 RelaxedPrecision
+               OpDecorate %30 RelaxedPrecision
+               OpDecorate %31 RelaxedPrecision
+               OpDecorate %34 RelaxedPrecision
+               OpDecorate %35 RelaxedPrecision
+               OpDecorate %36 RelaxedPrecision
+               OpDecorate %37 RelaxedPrecision
+               OpDecorate %40 RelaxedPrecision
+               OpDecorate %42 RelaxedPrecision
+               OpDecorate %43 RelaxedPrecision
+               OpDecorate %46 RelaxedPrecision
+               OpDecorate %47 RelaxedPrecision
+               OpDecorate %48 RelaxedPrecision
+               OpDecorate %49 RelaxedPrecision
+               OpDecorate %a_3 RelaxedPrecision
+               OpDecorate %55 RelaxedPrecision
+               OpDecorate %b RelaxedPrecision
+               OpDecorate %59 RelaxedPrecision
+               OpDecorate %c RelaxedPrecision
+               OpDecorate %62 RelaxedPrecision
+               OpDecorate %d RelaxedPrecision
+               OpDecorate %66 RelaxedPrecision
+               OpDecorate %FragColor RelaxedPrecision
+               OpDecorate %FragColor Location 0
+               OpDecorate %69 RelaxedPrecision
+               OpDecorate %70 RelaxedPrecision
+               OpDecorate %71 RelaxedPrecision
+               OpDecorate %72 RelaxedPrecision
+               OpDecorate %73 RelaxedPrecision
+               OpDecorate %74 RelaxedPrecision
+               OpDecorate %75 RelaxedPrecision
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Function_v4float = OpTypePointer Function %v4float
+          %9 = OpTypeFunction %v4float %_ptr_Function_v4float
+    %v3float = OpTypeVector %float 3
+%_ptr_Function_v3float = OpTypePointer Function %v3float
+         %15 = OpTypeFunction %v4float %_ptr_Function_v3float
+    %v2float = OpTypeVector %float 2
+%_ptr_Function_v2float = OpTypePointer Function %v2float
+         %24 = OpTypeFunction %v4float %_ptr_Function_v2float
+    %float_1 = OpConstant %float 1
+    %float_2 = OpConstant %float 2
+         %53 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+         %57 = OpConstantComposite %v3float %float_1 %float_1 %float_1
+         %64 = OpConstantComposite %v2float %float_1 %float_1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+        %a_3 = OpVariable %_ptr_Function_v4float Function
+      %param = OpVariable %_ptr_Function_v4float Function
+          %b = OpVariable %_ptr_Function_v4float Function
+    %param_0 = OpVariable %_ptr_Function_v3float Function
+          %c = OpVariable %_ptr_Function_v4float Function
+    %param_1 = OpVariable %_ptr_Function_v4float Function
+          %d = OpVariable %_ptr_Function_v4float Function
+    %param_2 = OpVariable %_ptr_Function_v2float Function
+               OpStore %param %53
+         %55 = OpFunctionCall %v4float %foobar_vf4_ %param
+               OpStore %a_3 %55
+               OpStore %param_0 %57
+         %59 = OpFunctionCall %v4float %foobar_vf3_ %param_0
+               OpStore %b %59
+               OpStore %param_1 %53
+         %62 = OpFunctionCall %v4float %foobaz_vf4_ %param_1
+               OpStore %c %62
+               OpStore %param_2 %64
+         %66 = OpFunctionCall %v4float %foobaz_vf2_ %param_2
+               OpStore %d %66
+         %69 = OpLoad %v4float %a_3
+         %70 = OpLoad %v4float %b
+         %71 = OpFAdd %v4float %69 %70
+         %72 = OpLoad %v4float %c
+         %73 = OpFAdd %v4float %71 %72
+         %74 = OpLoad %v4float %d
+         %75 = OpFAdd %v4float %73 %74
+               OpStore %FragColor %75
+               OpReturn
+               OpFunctionEnd
+%foobar_vf4_ = OpFunction %v4float None %9
+          %a = OpFunctionParameter %_ptr_Function_v4float
+         %12 = OpLabel
+         %28 = OpLoad %v4float %a
+         %30 = OpCompositeConstruct %v4float %float_1 %float_1 %float_1 %float_1
+         %31 = OpFAdd %v4float %28 %30
+               OpReturnValue %31
+               OpFunctionEnd
+%foobar_vf3_ = OpFunction %v4float None %15
+        %a_0 = OpFunctionParameter %_ptr_Function_v3float
+         %18 = OpLabel
+         %34 = OpLoad %v3float %a_0
+         %35 = OpVectorShuffle %v4float %34 %34 0 1 2 2
+         %36 = OpCompositeConstruct %v4float %float_1 %float_1 %float_1 %float_1
+         %37 = OpFAdd %v4float %35 %36
+               OpReturnValue %37
+               OpFunctionEnd
+%foobaz_vf4_ = OpFunction %v4float None %9
+        %a_1 = OpFunctionParameter %_ptr_Function_v4float
+         %21 = OpLabel
+         %40 = OpLoad %v4float %a_1
+         %42 = OpCompositeConstruct %v4float %float_2 %float_2 %float_2 %float_2
+         %43 = OpFAdd %v4float %40 %42
+               OpReturnValue %43
+               OpFunctionEnd
+%foobaz_vf2_ = OpFunction %v4float None %24
+        %a_2 = OpFunctionParameter %_ptr_Function_v2float
+         %27 = OpLabel
+         %46 = OpLoad %v2float %a_2
+         %47 = OpVectorShuffle %v4float %46 %46 0 1 0 1
+         %48 = OpCompositeConstruct %v4float %float_2 %float_2 %float_2 %float_2
+         %49 = OpFAdd %v4float %47 %48
+               OpReturnValue %49
+               OpFunctionEnd
diff --git a/shaders-msl/comp/inverse.comp b/shaders-msl/comp/inverse.comp
new file mode 100644
index 0000000000..03b06d6464
--- /dev/null
+++ b/shaders-msl/comp/inverse.comp
@@ -0,0 +1,23 @@
+#version 450
+layout(local_size_x = 1) in;
+
+layout(std430, binding = 0) writeonly buffer MatrixOut
+{
+	mat2 m2out;
+	mat3 m3out;
+	mat4 m4out;
+};
+
+layout(std430, binding = 1) readonly buffer MatrixIn
+{
+	mat2 m2in;
+	mat3 m3in;
+	mat4 m4in;
+};
+
+void main()
+{
+	m2out = inverse(m2in);
+	m3out = inverse(m3in);
+	m4out = inverse(m4in);
+}
diff --git a/shaders-msl/comp/struct-nested.comp b/shaders-msl/comp/struct-nested.comp
new file mode 100644
index 0000000000..d9645cbc4c
--- /dev/null
+++ b/shaders-msl/comp/struct-nested.comp
@@ -0,0 +1,20 @@
+#version 450
+				   
+struct s1
+{
+	int a;
+};
+
+struct s2
+{
+	s1 b;
+};
+
+layout(std430, binding = 1) buffer dstbuffer{ s2 test[]; };
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main()
+{
+	s2 testVal;
+	testVal.b.a = 0;
+	test[0] = testVal;
+}
\ No newline at end of file
diff --git a/shaders-msl/comp/struct-packing.comp b/shaders-msl/comp/struct-packing.comp
index 04b933dd18..5baf45cb3c 100644
--- a/shaders-msl/comp/struct-packing.comp
+++ b/shaders-msl/comp/struct-packing.comp
@@ -72,5 +72,6 @@ layout(binding = 0, std140) buffer SSBO0
 void main()
 {
     ssbo_430.content = ssbo_140.content;
+    ssbo_430.content.m1.a = ssbo_430.m6[1][1] * ssbo_430.content.m3.a;	// test packed matrix access
 }
 
diff --git a/shaders-msl/frag/front-facing.frag b/shaders-msl/frag/front-facing.frag
new file mode 100644
index 0000000000..90ca1abf4e
--- /dev/null
+++ b/shaders-msl/frag/front-facing.frag
@@ -0,0 +1,14 @@
+#version 310 es
+precision mediump float;
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) in vec4 vA;
+layout(location = 1) in vec4 vB;
+
+void main()
+{
+	if (gl_FrontFacing)
+		FragColor = vA;
+	else
+		FragColor = vB;
+}
diff --git a/shaders-msl/frag/inf-nan-constant.frag b/shaders-msl/frag/inf-nan-constant.frag
new file mode 100644
index 0000000000..78b93c31e0
--- /dev/null
+++ b/shaders-msl/frag/inf-nan-constant.frag
@@ -0,0 +1,14 @@
+#version 310 es
+precision highp float;
+
+const float posinf = 1.0 / 0.0;
+const float neginf = -1.0 / 0.0;
+const float nan = 0.0 / 0.0;
+
+layout(location = 0) out vec3 FragColor;
+
+void main()
+{
+	FragColor = vec3(posinf, neginf, nan);
+}
+
diff --git a/shaders-msl/frag/input-attachment-ms.frag b/shaders-msl/frag/input-attachment-ms.frag
new file mode 100644
index 0000000000..b3d44c9436
--- /dev/null
+++ b/shaders-msl/frag/input-attachment-ms.frag
@@ -0,0 +1,15 @@
+#version 450
+
+layout(input_attachment_index = 0, set = 0, binding = 0) uniform subpassInputMS uSubpass0;
+layout(input_attachment_index = 1, set = 0, binding = 1) uniform subpassInputMS uSubpass1;
+layout(location = 0) out vec4 FragColor;
+
+vec4 load_subpasses(mediump subpassInputMS uInput)
+{
+	return subpassLoad(uInput, gl_SampleID);
+}
+
+void main()
+{
+    FragColor = subpassLoad(uSubpass0, 1) + subpassLoad(uSubpass1, 2) + load_subpasses(uSubpass0);
+}
diff --git a/shaders-msl/frag/input-attachment.frag b/shaders-msl/frag/input-attachment.frag
new file mode 100644
index 0000000000..877d0525a4
--- /dev/null
+++ b/shaders-msl/frag/input-attachment.frag
@@ -0,0 +1,16 @@
+#version 310 es
+precision mediump float;
+
+layout(input_attachment_index = 0, set = 0, binding = 0) uniform mediump subpassInput uSubpass0;
+layout(input_attachment_index = 1, set = 0, binding = 1) uniform mediump subpassInput uSubpass1;
+layout(location = 0) out vec4 FragColor;
+
+vec4 load_subpasses(mediump subpassInput uInput)
+{
+	return subpassLoad(uInput);
+}
+
+void main()
+{
+    FragColor = subpassLoad(uSubpass0) + load_subpasses(uSubpass1);
+}
diff --git a/shaders-msl/frag/sample-depth-separate-image-sampler.frag b/shaders-msl/frag/sample-depth-separate-image-sampler.frag
new file mode 100644
index 0000000000..db1f5e983a
--- /dev/null
+++ b/shaders-msl/frag/sample-depth-separate-image-sampler.frag
@@ -0,0 +1,22 @@
+#version 450
+
+layout(set = 0, binding = 0) uniform texture2D uDepth;
+layout(set = 0, binding = 1) uniform texture2D uColor;
+layout(set = 0, binding = 2) uniform sampler uSampler;
+layout(set = 0, binding = 3) uniform samplerShadow uSamplerShadow;
+layout(location = 0) out float FragColor;
+
+float sample_depth_from_function(texture2D uT, samplerShadow uS)
+{
+	return texture(sampler2DShadow(uT, uS), vec3(0.5));
+}
+
+float sample_color_from_function(texture2D uT, sampler uS)
+{
+	return texture(sampler2D(uT, uS), vec2(0.5)).x;
+}
+
+void main()
+{
+	FragColor = sample_depth_from_function(uDepth, uSamplerShadow) + sample_color_from_function(uColor, uSampler);
+}
diff --git a/shaders-msl/frag/sample-mask.frag b/shaders-msl/frag/sample-mask.frag
new file mode 100644
index 0000000000..33ff0b2e69
--- /dev/null
+++ b/shaders-msl/frag/sample-mask.frag
@@ -0,0 +1,10 @@
+#version 450
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+	FragColor = vec4(1.0);
+	gl_SampleMask[0] = 0;
+}
+
diff --git a/shaders-msl/frag/spec-constant-block-size.frag b/shaders-msl/frag/spec-constant-block-size.frag
new file mode 100644
index 0000000000..8d2b1f326f
--- /dev/null
+++ b/shaders-msl/frag/spec-constant-block-size.frag
@@ -0,0 +1,17 @@
+#version 310 es
+precision mediump float;
+
+layout(constant_id = 10) const int Value = 2;
+layout(binding = 0) uniform SpecConstArray
+{
+	vec4 samples[Value];
+};
+
+layout(location = 0) flat in int Index;
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+	FragColor = samples[Index];
+}
+
diff --git a/shaders-msl/vert/packed_matrix.vert b/shaders-msl/vert/packed_matrix.vert
new file mode 100644
index 0000000000..4d99d2190a
--- /dev/null
+++ b/shaders-msl/vert/packed_matrix.vert
@@ -0,0 +1,41 @@
+#version 450
+
+layout(binding = 13, std140) uniform _1365_18812
+{
+    layout(row_major) mat4x3 _m0;
+    layout(row_major) mat4x3 _m1;
+} _18812;
+
+layout(binding = 12, std140) uniform _1126_22044
+{
+    layout(row_major) mat4 _m0;
+    layout(row_major) mat4 _m1;
+    float _m9;
+    vec3 _m10;
+    float _m11;
+    vec3 _m12;
+    float _m17;
+    float _m18;
+    float _m19;
+    vec2 _m20;
+} _22044;
+
+layout(location = 0) out vec3 _3976;
+layout(location = 0) in vec4 _5275;
+
+vec3 _2;
+
+void main()
+{
+    vec3 _23783;
+    do
+    {
+        _23783 = normalize(_18812._m1 * vec4(_5275.xyz, 0.0));
+        break;
+    } while (false);
+    vec4 _14995 = vec4(_22044._m10 + (_5275.xyz * (_22044._m17 + _22044._m18)), 1.0) * _22044._m0;
+    _3976 = _23783;
+    vec4 _6282 = _14995;
+    _6282.y = -_14995.y;
+    gl_Position = _6282;
+}
diff --git a/shaders-msl/vert/return-array.vert b/shaders-msl/vert/return-array.vert
new file mode 100644
index 0000000000..708460114e
--- /dev/null
+++ b/shaders-msl/vert/return-array.vert
@@ -0,0 +1,22 @@
+#version 310 es
+
+layout(location = 0) in vec4 vInput0;
+layout(location = 1) in vec4 vInput1;
+
+vec4[2] test()
+{
+	return vec4[](vec4(10.0), vec4(20.0));
+}
+
+vec4[2] test2()
+{
+	vec4 foobar[2];
+	foobar[0] = vInput0;
+	foobar[1] = vInput1;
+	return foobar;
+}
+
+void main()
+{
+	gl_Position = test()[0] + test2()[1];
+}
diff --git a/shaders-other/README.md b/shaders-other/README.md
new file mode 100644
index 0000000000..6d454813eb
--- /dev/null
+++ b/shaders-other/README.md
@@ -0,0 +1,4 @@
+These shaders are not actually run yet as part of any test suite,
+but are kept here because they have been used to manually test various aspects of SPIRV-Cross in the past.
+
+These would ideally be part of the test suite in some way.
diff --git a/shaders-other/aliased-entry-point-names.asm b/shaders-other/aliased-entry-point-names.asm
new file mode 100644
index 0000000000..d60cf3039c
--- /dev/null
+++ b/shaders-other/aliased-entry-point-names.asm
@@ -0,0 +1,60 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 3
+; Bound: 20
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Vertex %main "main" %_
+               OpEntryPoint Vertex %main2 "main2" %_
+               OpEntryPoint Fragment %main3 "main" %FragColor
+               OpEntryPoint Fragment %main4 "main2" %FragColor
+               OpSource GLSL 450
+               OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+               OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
+               OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance
+			   OpDecorate %FragColor Location 0
+               OpDecorate %gl_PerVertex Block
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+	%v4floatptr = OpTypePointer Output %v4float
+       %uint = OpTypeInt 32 0
+     %uint_1 = OpConstant %uint 1
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
+%_ptr_Output_gl_PerVertex = OpTypePointer Output %gl_PerVertex
+          %_ = OpVariable %_ptr_Output_gl_PerVertex Output
+  %FragColor = OpVariable %v4floatptr Output
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+    %float_1 = OpConstant %float 1
+         %17 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+    %float_2 = OpConstant %float 2
+         %18 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %19 = OpAccessChain %_ptr_Output_v4float %_ %int_0
+               OpStore %19 %17
+               OpReturn
+               OpFunctionEnd
+      %main2 = OpFunction %void None %3
+          %6 = OpLabel
+         %20 = OpAccessChain %_ptr_Output_v4float %_ %int_0
+               OpStore %20 %18
+               OpReturn
+               OpFunctionEnd
+	 %main3 = OpFunction %void None %3
+          %7 = OpLabel
+		  	   OpStore %FragColor %17
+			   OpReturn
+			   OpFunctionEnd
+	 %main4 = OpFunction %void None %3
+          %8 = OpLabel
+		  	   OpStore %FragColor %18
+			   OpReturn
+			   OpFunctionEnd
diff --git a/shaders/asm/frag/frem.asm.frag b/shaders/asm/frag/frem.asm.frag
new file mode 100644
index 0000000000..8350c75c04
--- /dev/null
+++ b/shaders/asm/frag/frem.asm.frag
@@ -0,0 +1,41 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 3
+; Bound: 16
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor %vA %vB
+               OpExecutionMode %main OriginUpperLeft
+               OpSource ESSL 310
+               OpName %main "main"
+               OpName %FragColor "FragColor"
+               OpName %vA "vA"
+               OpName %vB "vB"
+               OpDecorate %FragColor RelaxedPrecision
+               OpDecorate %FragColor Location 0
+               OpDecorate %vA RelaxedPrecision
+               OpDecorate %vA Location 0
+               OpDecorate %12 RelaxedPrecision
+               OpDecorate %vB RelaxedPrecision
+               OpDecorate %vB Location 1
+               OpDecorate %14 RelaxedPrecision
+               OpDecorate %15 RelaxedPrecision
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+         %vA = OpVariable %_ptr_Input_v4float Input
+         %vB = OpVariable %_ptr_Input_v4float Input
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %12 = OpLoad %v4float %vA
+         %14 = OpLoad %v4float %vB
+         %15 = OpFRem %v4float %12 %14
+               OpStore %FragColor %15
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders/asm/frag/function-overload-alias.asm.frag b/shaders/asm/frag/function-overload-alias.asm.frag
new file mode 100644
index 0000000000..397aa98ce2
--- /dev/null
+++ b/shaders/asm/frag/function-overload-alias.asm.frag
@@ -0,0 +1,153 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 3
+; Bound: 76
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource ESSL 310
+               OpName %main "main"
+               OpName %foobar_vf4_ "foo"
+               OpName %a "foo"
+               OpName %foobar_vf3_ "foo"
+               OpName %a_0 "foo"
+               OpName %foobaz_vf4_ "foo"
+               OpName %a_1 "foo"
+               OpName %foobaz_vf2_ "foo"
+               OpName %a_2 "foo"
+               OpName %a_3 "foo"
+               OpName %param "foo"
+               OpName %b "foo"
+               OpName %param_0 "foo"
+               OpName %c "foo"
+               OpName %param_1 "foo"
+               OpName %d "foo"
+               OpName %param_2 "foo"
+               OpName %FragColor "FragColor"
+               OpDecorate %foobar_vf4_ RelaxedPrecision
+               OpDecorate %a RelaxedPrecision
+               OpDecorate %foobar_vf3_ RelaxedPrecision
+               OpDecorate %a_0 RelaxedPrecision
+               OpDecorate %foobaz_vf4_ RelaxedPrecision
+               OpDecorate %a_1 RelaxedPrecision
+               OpDecorate %foobaz_vf2_ RelaxedPrecision
+               OpDecorate %a_2 RelaxedPrecision
+               OpDecorate %28 RelaxedPrecision
+               OpDecorate %30 RelaxedPrecision
+               OpDecorate %31 RelaxedPrecision
+               OpDecorate %34 RelaxedPrecision
+               OpDecorate %35 RelaxedPrecision
+               OpDecorate %36 RelaxedPrecision
+               OpDecorate %37 RelaxedPrecision
+               OpDecorate %40 RelaxedPrecision
+               OpDecorate %42 RelaxedPrecision
+               OpDecorate %43 RelaxedPrecision
+               OpDecorate %46 RelaxedPrecision
+               OpDecorate %47 RelaxedPrecision
+               OpDecorate %48 RelaxedPrecision
+               OpDecorate %49 RelaxedPrecision
+               OpDecorate %a_3 RelaxedPrecision
+               OpDecorate %55 RelaxedPrecision
+               OpDecorate %b RelaxedPrecision
+               OpDecorate %59 RelaxedPrecision
+               OpDecorate %c RelaxedPrecision
+               OpDecorate %62 RelaxedPrecision
+               OpDecorate %d RelaxedPrecision
+               OpDecorate %66 RelaxedPrecision
+               OpDecorate %FragColor RelaxedPrecision
+               OpDecorate %FragColor Location 0
+               OpDecorate %69 RelaxedPrecision
+               OpDecorate %70 RelaxedPrecision
+               OpDecorate %71 RelaxedPrecision
+               OpDecorate %72 RelaxedPrecision
+               OpDecorate %73 RelaxedPrecision
+               OpDecorate %74 RelaxedPrecision
+               OpDecorate %75 RelaxedPrecision
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Function_v4float = OpTypePointer Function %v4float
+          %9 = OpTypeFunction %v4float %_ptr_Function_v4float
+    %v3float = OpTypeVector %float 3
+%_ptr_Function_v3float = OpTypePointer Function %v3float
+         %15 = OpTypeFunction %v4float %_ptr_Function_v3float
+    %v2float = OpTypeVector %float 2
+%_ptr_Function_v2float = OpTypePointer Function %v2float
+         %24 = OpTypeFunction %v4float %_ptr_Function_v2float
+    %float_1 = OpConstant %float 1
+    %float_2 = OpConstant %float 2
+         %53 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+         %57 = OpConstantComposite %v3float %float_1 %float_1 %float_1
+         %64 = OpConstantComposite %v2float %float_1 %float_1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+        %a_3 = OpVariable %_ptr_Function_v4float Function
+      %param = OpVariable %_ptr_Function_v4float Function
+          %b = OpVariable %_ptr_Function_v4float Function
+    %param_0 = OpVariable %_ptr_Function_v3float Function
+          %c = OpVariable %_ptr_Function_v4float Function
+    %param_1 = OpVariable %_ptr_Function_v4float Function
+          %d = OpVariable %_ptr_Function_v4float Function
+    %param_2 = OpVariable %_ptr_Function_v2float Function
+               OpStore %param %53
+         %55 = OpFunctionCall %v4float %foobar_vf4_ %param
+               OpStore %a_3 %55
+               OpStore %param_0 %57
+         %59 = OpFunctionCall %v4float %foobar_vf3_ %param_0
+               OpStore %b %59
+               OpStore %param_1 %53
+         %62 = OpFunctionCall %v4float %foobaz_vf4_ %param_1
+               OpStore %c %62
+               OpStore %param_2 %64
+         %66 = OpFunctionCall %v4float %foobaz_vf2_ %param_2
+               OpStore %d %66
+         %69 = OpLoad %v4float %a_3
+         %70 = OpLoad %v4float %b
+         %71 = OpFAdd %v4float %69 %70
+         %72 = OpLoad %v4float %c
+         %73 = OpFAdd %v4float %71 %72
+         %74 = OpLoad %v4float %d
+         %75 = OpFAdd %v4float %73 %74
+               OpStore %FragColor %75
+               OpReturn
+               OpFunctionEnd
+%foobar_vf4_ = OpFunction %v4float None %9
+          %a = OpFunctionParameter %_ptr_Function_v4float
+         %12 = OpLabel
+         %28 = OpLoad %v4float %a
+         %30 = OpCompositeConstruct %v4float %float_1 %float_1 %float_1 %float_1
+         %31 = OpFAdd %v4float %28 %30
+               OpReturnValue %31
+               OpFunctionEnd
+%foobar_vf3_ = OpFunction %v4float None %15
+        %a_0 = OpFunctionParameter %_ptr_Function_v3float
+         %18 = OpLabel
+         %34 = OpLoad %v3float %a_0
+         %35 = OpVectorShuffle %v4float %34 %34 0 1 2 2
+         %36 = OpCompositeConstruct %v4float %float_1 %float_1 %float_1 %float_1
+         %37 = OpFAdd %v4float %35 %36
+               OpReturnValue %37
+               OpFunctionEnd
+%foobaz_vf4_ = OpFunction %v4float None %9
+        %a_1 = OpFunctionParameter %_ptr_Function_v4float
+         %21 = OpLabel
+         %40 = OpLoad %v4float %a_1
+         %42 = OpCompositeConstruct %v4float %float_2 %float_2 %float_2 %float_2
+         %43 = OpFAdd %v4float %40 %42
+               OpReturnValue %43
+               OpFunctionEnd
+%foobaz_vf2_ = OpFunction %v4float None %24
+        %a_2 = OpFunctionParameter %_ptr_Function_v2float
+         %27 = OpLabel
+         %46 = OpLoad %v2float %a_2
+         %47 = OpVectorShuffle %v4float %46 %46 0 1 0 1
+         %48 = OpCompositeConstruct %v4float %float_2 %float_2 %float_2 %float_2
+         %49 = OpFAdd %v4float %47 %48
+               OpReturnValue %49
+               OpFunctionEnd
diff --git a/shaders/asm/frag/image-fetch-no-sampler.asm.vk.frag b/shaders/asm/frag/image-fetch-no-sampler.asm.vk.frag
new file mode 100644
index 0000000000..a3d64c09d7
--- /dev/null
+++ b/shaders/asm/frag/image-fetch-no-sampler.asm.vk.frag
@@ -0,0 +1,163 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 2
+; Bound: 113
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %xIn_1 %_entryPointOutput
+               OpExecutionMode %main OriginUpperLeft
+               OpSource HLSL 500
+               OpName %main "main"
+               OpName %sample_fetch_t21_vi3_ "sample_fetch(t21;vi3;"
+               OpName %tex "tex"
+               OpName %UV "UV"
+               OpName %sample_sampler_t21_vf2_ "sample_sampler(t21;vf2;"
+               OpName %tex_0 "tex"
+               OpName %UV_0 "UV"
+               OpName %_main_vf4_ "@main(vf4;"
+               OpName %xIn "xIn"
+               OpName %Sampler "Sampler"
+               OpName %coord "coord"
+               OpName %value "value"
+               OpName %SampledImage "SampledImage"
+               OpName %param "param"
+               OpName %param_0 "param"
+               OpName %param_1 "param"
+               OpName %param_2 "param"
+               OpName %xIn_0 "xIn"
+               OpName %xIn_1 "xIn"
+               OpName %_entryPointOutput "@entryPointOutput"
+               OpName %param_3 "param"
+               OpDecorate %Sampler DescriptorSet 0
+               OpDecorate %Sampler Binding 0
+               OpDecorate %SampledImage DescriptorSet 0
+               OpDecorate %SampledImage Binding 0
+               OpDecorate %xIn_1 BuiltIn FragCoord
+               OpDecorate %_entryPointOutput Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+          %7 = OpTypeImage %float 2D 0 0 0 1 Unknown
+%_ptr_Function_7 = OpTypePointer Function %7
+        %int = OpTypeInt 32 1
+      %v3int = OpTypeVector %int 3
+%_ptr_Function_v3int = OpTypePointer Function %v3int
+    %v4float = OpTypeVector %float 4
+         %13 = OpTypeFunction %v4float %_ptr_Function_7 %_ptr_Function_v3int
+    %v2float = OpTypeVector %float 2
+%_ptr_Function_v2float = OpTypePointer Function %v2float
+         %20 = OpTypeFunction %v4float %_ptr_Function_7 %_ptr_Function_v2float
+%_ptr_Function_v4float = OpTypePointer Function %v4float
+         %26 = OpTypeFunction %v4float %_ptr_Function_v4float
+      %v2int = OpTypeVector %int 2
+       %uint = OpTypeInt 32 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Function_int = OpTypePointer Function %int
+         %43 = OpTypeSampler
+%_ptr_UniformConstant_43 = OpTypePointer UniformConstant %43
+    %Sampler = OpVariable %_ptr_UniformConstant_43 UniformConstant
+         %47 = OpTypeSampledImage %7
+     %uint_0 = OpConstant %uint 0
+%_ptr_Function_float = OpTypePointer Function %float
+ %float_1280 = OpConstant %float 1280
+     %uint_1 = OpConstant %uint 1
+  %float_720 = OpConstant %float 720
+      %int_0 = OpConstant %int 0
+%_ptr_UniformConstant_7 = OpTypePointer UniformConstant %7
+%SampledImage = OpVariable %_ptr_UniformConstant_7 UniformConstant
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+      %xIn_1 = OpVariable %_ptr_Input_v4float Input
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+%_entryPointOutput = OpVariable %_ptr_Output_v4float Output
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+      %xIn_0 = OpVariable %_ptr_Function_v4float Function
+    %param_3 = OpVariable %_ptr_Function_v4float Function
+        %107 = OpLoad %v4float %xIn_1
+               OpStore %xIn_0 %107
+        %111 = OpLoad %v4float %xIn_0
+               OpStore %param_3 %111
+        %112 = OpFunctionCall %v4float %_main_vf4_ %param_3
+               OpStore %_entryPointOutput %112
+               OpReturn
+               OpFunctionEnd
+%sample_fetch_t21_vi3_ = OpFunction %v4float None %13
+        %tex = OpFunctionParameter %_ptr_Function_7
+         %UV = OpFunctionParameter %_ptr_Function_v3int
+         %17 = OpLabel
+         %30 = OpLoad %7 %tex
+         %32 = OpLoad %v3int %UV
+         %33 = OpVectorShuffle %v2int %32 %32 0 1
+         %37 = OpAccessChain %_ptr_Function_int %UV %uint_2
+         %38 = OpLoad %int %37
+         %39 = OpImageFetch %v4float %30 %33 Lod %38
+               OpReturnValue %39
+               OpFunctionEnd
+%sample_sampler_t21_vf2_ = OpFunction %v4float None %20
+      %tex_0 = OpFunctionParameter %_ptr_Function_7
+       %UV_0 = OpFunctionParameter %_ptr_Function_v2float
+         %24 = OpLabel
+         %42 = OpLoad %7 %tex_0
+         %46 = OpLoad %43 %Sampler
+         %48 = OpSampledImage %47 %42 %46
+         %49 = OpLoad %v2float %UV_0
+         %50 = OpImageSampleImplicitLod %v4float %48 %49
+               OpReturnValue %50
+               OpFunctionEnd
+ %_main_vf4_ = OpFunction %v4float None %26
+        %xIn = OpFunctionParameter %_ptr_Function_v4float
+         %29 = OpLabel
+      %coord = OpVariable %_ptr_Function_v3int Function
+      %value = OpVariable %_ptr_Function_v4float Function
+      %param = OpVariable %_ptr_Function_7 Function
+    %param_0 = OpVariable %_ptr_Function_v3int Function
+    %param_1 = OpVariable %_ptr_Function_7 Function
+    %param_2 = OpVariable %_ptr_Function_v2float Function
+         %56 = OpAccessChain %_ptr_Function_float %xIn %uint_0
+         %57 = OpLoad %float %56
+         %59 = OpFMul %float %57 %float_1280
+         %60 = OpConvertFToS %int %59
+         %62 = OpAccessChain %_ptr_Function_float %xIn %uint_1
+         %63 = OpLoad %float %62
+         %65 = OpFMul %float %63 %float_720
+         %66 = OpConvertFToS %int %65
+         %68 = OpCompositeConstruct %v3int %60 %66 %int_0
+               OpStore %coord %68
+         %73 = OpLoad %7 %SampledImage
+               OpStore %param %73
+         %75 = OpLoad %v3int %coord
+               OpStore %param_0 %75
+         %76 = OpFunctionCall %v4float %sample_fetch_t21_vi3_ %param %param_0
+               OpStore %value %76
+         %77 = OpLoad %7 %SampledImage
+         %78 = OpLoad %v3int %coord
+         %79 = OpVectorShuffle %v2int %78 %78 0 1
+         %80 = OpAccessChain %_ptr_Function_int %coord %uint_2
+         %81 = OpLoad %int %80
+         %82 = OpImageFetch %v4float %77 %79 Lod %81
+         %83 = OpLoad %v4float %value
+         %84 = OpFAdd %v4float %83 %82
+               OpStore %value %84
+         %86 = OpLoad %7 %SampledImage
+               OpStore %param_1 %86
+         %88 = OpLoad %v4float %xIn
+         %89 = OpVectorShuffle %v2float %88 %88 0 1
+               OpStore %param_2 %89
+         %90 = OpFunctionCall %v4float %sample_sampler_t21_vf2_ %param_1 %param_2
+         %91 = OpLoad %v4float %value
+         %92 = OpFAdd %v4float %91 %90
+               OpStore %value %92
+         %93 = OpLoad %7 %SampledImage
+         %94 = OpLoad %43 %Sampler
+         %95 = OpSampledImage %47 %93 %94
+         %96 = OpLoad %v4float %xIn
+         %97 = OpVectorShuffle %v2float %96 %96 0 1
+         %98 = OpImageSampleImplicitLod %v4float %95 %97
+         %99 = OpLoad %v4float %value
+        %100 = OpFAdd %v4float %99 %98
+               OpStore %value %100
+        %101 = OpLoad %v4float %value
+               OpReturnValue %101
+               OpFunctionEnd
diff --git a/shaders/asm/vert/invariant-block.asm.vert b/shaders/asm/vert/invariant-block.asm.vert
new file mode 100644
index 0000000000..5984935c7b
--- /dev/null
+++ b/shaders/asm/vert/invariant-block.asm.vert
@@ -0,0 +1,44 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 3
+; Bound: 20
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Vertex %main "main" %_
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %gl_PerVertex "gl_PerVertex"
+               OpMemberName %gl_PerVertex 0 "gl_Position"
+               OpMemberName %gl_PerVertex 1 "gl_PointSize"
+               OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
+               OpMemberName %gl_PerVertex 3 "gl_CullDistance"
+               OpName %_ ""
+               OpMemberDecorate %gl_PerVertex 0 Invariant
+               OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+               OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
+               OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance
+               OpDecorate %gl_PerVertex Block
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+       %uint = OpTypeInt 32 0
+     %uint_1 = OpConstant %uint 1
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
+%_ptr_Output_gl_PerVertex = OpTypePointer Output %gl_PerVertex
+          %_ = OpVariable %_ptr_Output_gl_PerVertex Output
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+    %float_1 = OpConstant %float 1
+         %17 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %19 = OpAccessChain %_ptr_Output_v4float %_ %int_0
+               OpStore %19 %17
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders/asm/vert/invariant-block.sso.asm.vert b/shaders/asm/vert/invariant-block.sso.asm.vert
new file mode 100644
index 0000000000..5984935c7b
--- /dev/null
+++ b/shaders/asm/vert/invariant-block.sso.asm.vert
@@ -0,0 +1,44 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 3
+; Bound: 20
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Vertex %main "main" %_
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %gl_PerVertex "gl_PerVertex"
+               OpMemberName %gl_PerVertex 0 "gl_Position"
+               OpMemberName %gl_PerVertex 1 "gl_PointSize"
+               OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
+               OpMemberName %gl_PerVertex 3 "gl_CullDistance"
+               OpName %_ ""
+               OpMemberDecorate %gl_PerVertex 0 Invariant
+               OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+               OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
+               OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance
+               OpDecorate %gl_PerVertex Block
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+       %uint = OpTypeInt 32 0
+     %uint_1 = OpConstant %uint 1
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
+%_ptr_Output_gl_PerVertex = OpTypePointer Output %gl_PerVertex
+          %_ = OpVariable %_ptr_Output_gl_PerVertex Output
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+    %float_1 = OpConstant %float 1
+         %17 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %19 = OpAccessChain %_ptr_Output_v4float %_ %int_0
+               OpStore %19 %17
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders/asm/vert/invariant.asm.vert b/shaders/asm/vert/invariant.asm.vert
new file mode 100644
index 0000000000..c0d381ee23
--- /dev/null
+++ b/shaders/asm/vert/invariant.asm.vert
@@ -0,0 +1,34 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 3
+; Bound: 18
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Vertex %main "main" %_entryPointOutput
+               OpSource HLSL 500
+               OpName %main "main"
+               OpName %_main_ "@main("
+               OpName %_entryPointOutput "@entryPointOutput"
+               OpDecorate %_entryPointOutput Invariant
+               OpDecorate %_entryPointOutput BuiltIn Position
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+          %8 = OpTypeFunction %v4float
+    %float_1 = OpConstant %float 1
+         %12 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+%_entryPointOutput = OpVariable %_ptr_Output_v4float Output
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %17 = OpFunctionCall %v4float %_main_
+               OpStore %_entryPointOutput %17
+               OpReturn
+               OpFunctionEnd
+     %_main_ = OpFunction %v4float None %8
+         %10 = OpLabel
+               OpReturnValue %12
+               OpFunctionEnd
diff --git a/shaders/asm/vert/invariant.sso.asm.vert b/shaders/asm/vert/invariant.sso.asm.vert
new file mode 100644
index 0000000000..c0d381ee23
--- /dev/null
+++ b/shaders/asm/vert/invariant.sso.asm.vert
@@ -0,0 +1,34 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 3
+; Bound: 18
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Vertex %main "main" %_entryPointOutput
+               OpSource HLSL 500
+               OpName %main "main"
+               OpName %_main_ "@main("
+               OpName %_entryPointOutput "@entryPointOutput"
+               OpDecorate %_entryPointOutput Invariant
+               OpDecorate %_entryPointOutput BuiltIn Position
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+          %8 = OpTypeFunction %v4float
+    %float_1 = OpConstant %float 1
+         %12 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+%_entryPointOutput = OpVariable %_ptr_Output_v4float Output
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %17 = OpFunctionCall %v4float %_main_
+               OpStore %_entryPointOutput %17
+               OpReturn
+               OpFunctionEnd
+     %_main_ = OpFunction %v4float None %8
+         %10 = OpLabel
+               OpReturnValue %12
+               OpFunctionEnd
diff --git a/shaders/desktop-only/frag/clip-cull-distance.desktop.frag b/shaders/desktop-only/frag/clip-cull-distance.desktop.frag
new file mode 100644
index 0000000000..5212fd6448
--- /dev/null
+++ b/shaders/desktop-only/frag/clip-cull-distance.desktop.frag
@@ -0,0 +1,12 @@
+#version 450
+
+in float gl_ClipDistance[4];
+in float gl_CullDistance[3];
+
+layout(location = 0) out float FragColor;
+
+void main()
+{
+	FragColor = gl_ClipDistance[0] + gl_CullDistance[0];
+}
+
diff --git a/shaders/desktop-only/frag/inf-nan-constant-double.desktop.frag b/shaders/desktop-only/frag/inf-nan-constant-double.desktop.frag
new file mode 100644
index 0000000000..d2a80fe044
--- /dev/null
+++ b/shaders/desktop-only/frag/inf-nan-constant-double.desktop.frag
@@ -0,0 +1,13 @@
+#version 450
+
+const double posinf = 1.0lf / 0.0lf;
+const double neginf = -1.0lf / 0.0lf;
+const double nan = 0.0lf / 0.0lf;
+
+layout(location = 0) out vec3 FragColor;
+layout(location = 0) flat in double vTmp;
+
+void main()
+{
+	FragColor = vec3(dvec3(posinf, neginf, nan) + vTmp);
+}
diff --git a/shaders/desktop-only/vert/clip-cull-distance.desktop.sso.vert b/shaders/desktop-only/vert/clip-cull-distance.desktop.sso.vert
new file mode 100644
index 0000000000..1489cc7a15
--- /dev/null
+++ b/shaders/desktop-only/vert/clip-cull-distance.desktop.sso.vert
@@ -0,0 +1,13 @@
+#version 450
+out float gl_ClipDistance[4];
+out float gl_CullDistance[3];
+
+void main()
+{
+	gl_Position = vec4(1.0);
+	gl_ClipDistance[0] = 0.0;
+	gl_ClipDistance[1] = 0.0;
+	gl_ClipDistance[2] = 0.0;
+	gl_ClipDistance[3] = 0.0;
+	gl_CullDistance[1] = 4.0;
+}
diff --git a/shaders/desktop-only/vert/clip-cull-distance.desktop.vert b/shaders/desktop-only/vert/clip-cull-distance.desktop.vert
index 9e4a0b7ac9..1489cc7a15 100644
--- a/shaders/desktop-only/vert/clip-cull-distance.desktop.vert
+++ b/shaders/desktop-only/vert/clip-cull-distance.desktop.vert
@@ -1,10 +1,13 @@
 #version 450
+out float gl_ClipDistance[4];
+out float gl_CullDistance[3];
 
 void main()
 {
-   gl_Position = vec4(10.0);
-   gl_ClipDistance[0] = 1.0;
-   gl_ClipDistance[1] = 4.0;
-   gl_CullDistance[0] = 4.0;
-   gl_CullDistance[1] = 9.0;
+	gl_Position = vec4(1.0);
+	gl_ClipDistance[0] = 0.0;
+	gl_ClipDistance[1] = 0.0;
+	gl_ClipDistance[2] = 0.0;
+	gl_ClipDistance[3] = 0.0;
+	gl_CullDistance[1] = 4.0;
 }
diff --git a/shaders/frag/front-facing.frag b/shaders/frag/front-facing.frag
new file mode 100644
index 0000000000..90ca1abf4e
--- /dev/null
+++ b/shaders/frag/front-facing.frag
@@ -0,0 +1,14 @@
+#version 310 es
+precision mediump float;
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) in vec4 vA;
+layout(location = 1) in vec4 vB;
+
+void main()
+{
+	if (gl_FrontFacing)
+		FragColor = vA;
+	else
+		FragColor = vB;
+}
diff --git a/shaders/frag/hoisted-temporary-use-continue-block-as-value.frag b/shaders/frag/hoisted-temporary-use-continue-block-as-value.frag
new file mode 100644
index 0000000000..cc8a648358
--- /dev/null
+++ b/shaders/frag/hoisted-temporary-use-continue-block-as-value.frag
@@ -0,0 +1,24 @@
+#version 310 es
+precision mediump float;
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) flat in int vA;
+layout(location = 1) flat in int vB;
+
+void main()
+{
+	FragColor = vec4(0.0);
+
+	int k = 0;
+	int j;
+	for (int i = 0; i < vA; i += j)
+	{
+		if ((vA + i) == 20)
+			k = 50;
+		else if ((vB + i) == 40)
+			k = 60;
+
+		j = k + 10;
+		FragColor += 1.0;
+	}
+}
diff --git a/shaders/frag/inf-nan-constant.frag b/shaders/frag/inf-nan-constant.frag
new file mode 100644
index 0000000000..78b93c31e0
--- /dev/null
+++ b/shaders/frag/inf-nan-constant.frag
@@ -0,0 +1,14 @@
+#version 310 es
+precision highp float;
+
+const float posinf = 1.0 / 0.0;
+const float neginf = -1.0 / 0.0;
+const float nan = 0.0 / 0.0;
+
+layout(location = 0) out vec3 FragColor;
+
+void main()
+{
+	FragColor = vec3(posinf, neginf, nan);
+}
+
diff --git a/shaders/vert/return-array.vert b/shaders/vert/return-array.vert
new file mode 100644
index 0000000000..708460114e
--- /dev/null
+++ b/shaders/vert/return-array.vert
@@ -0,0 +1,22 @@
+#version 310 es
+
+layout(location = 0) in vec4 vInput0;
+layout(location = 1) in vec4 vInput1;
+
+vec4[2] test()
+{
+	return vec4[](vec4(10.0), vec4(20.0));
+}
+
+vec4[2] test2()
+{
+	vec4 foobar[2];
+	foobar[0] = vInput0;
+	foobar[1] = vInput1;
+	return foobar;
+}
+
+void main()
+{
+	gl_Position = test()[0] + test2()[1];
+}
diff --git a/shaders/vulkan/frag/spec-constant-block-size.vk.frag b/shaders/vulkan/frag/spec-constant-block-size.vk.frag
new file mode 100644
index 0000000000..8d2b1f326f
--- /dev/null
+++ b/shaders/vulkan/frag/spec-constant-block-size.vk.frag
@@ -0,0 +1,17 @@
+#version 310 es
+precision mediump float;
+
+layout(constant_id = 10) const int Value = 2;
+layout(binding = 0) uniform SpecConstArray
+{
+	vec4 samples[Value];
+};
+
+layout(location = 0) flat in int Index;
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+	FragColor = samples[Index];
+}
+
diff --git a/spirv_common.hpp b/spirv_common.hpp
index c4829f4643..b4d6ef1dca 100644
--- a/spirv_common.hpp
+++ b/spirv_common.hpp
@@ -125,6 +125,8 @@ inline std::string convert_to_string(T &&t)
 #endif
 
 #ifdef _MSC_VER
+// sprintf warning.
+// We cannot rely on snprintf existing because, ..., MSVC.
 #pragma warning(push)
 #pragma warning(disable : 4996)
 #endif
@@ -723,16 +725,30 @@ struct SPIRConstant : IVariant
 	{
 		Constant r[4];
 		// If != 0, this element is a specialization constant, and we should keep track of it as such.
-		uint32_t id[4] = {};
+		uint32_t id[4];
 		uint32_t vecsize = 1;
+
+		// Workaround for MSVC 2013, initializing an array breaks.
+		ConstantVector()
+		{
+			for (unsigned i = 0; i < 4; i++)
+				id[i] = 0;
+		}
 	};
 
 	struct ConstantMatrix
 	{
 		ConstantVector c[4];
 		// If != 0, this column is a specialization constant, and we should keep track of it as such.
-		uint32_t id[4] = {};
+		uint32_t id[4];
 		uint32_t columns = 1;
+
+		// Workaround for MSVC 2013, initializing an array breaks.
+		ConstantMatrix()
+		{
+			for (unsigned i = 0; i < 4; i++)
+				id[i] = 0;
+		}
 	};
 
 	inline uint32_t specialization_constant_id(uint32_t col, uint32_t row) const
@@ -1021,6 +1037,23 @@ public:
 private:
 	std::locale old;
 };
+
+class Hasher
+{
+public:
+	inline void u32(uint32_t value)
+	{
+		h = (h * 0x100000001b3ull) ^ value;
+	}
+
+	inline uint64_t get() const
+	{
+		return h;
+	}
+
+private:
+	uint64_t h = 0xcbf29ce484222325ull;
+};
 }
 
 #endif
diff --git a/spirv_cpp.cpp b/spirv_cpp.cpp
index 69ef138e43..9500195ccb 100644
--- a/spirv_cpp.cpp
+++ b/spirv_cpp.cpp
@@ -375,6 +375,9 @@ void CompilerCPP::emit_c_linkage()
 
 void CompilerCPP::emit_function_prototype(SPIRFunction &func, uint64_t)
 {
+	if (func.self != entry_point)
+		add_function_overload(func);
+
 	local_variable_names = resource_names;
 	string decl;
 
diff --git a/spirv_cross.cpp b/spirv_cross.cpp
index 996c0205c3..3342aaabe4 100644
--- a/spirv_cross.cpp
+++ b/spirv_cross.cpp
@@ -646,6 +646,11 @@ unordered_set<uint32_t> Compiler::get_active_interface_variables() const
 	unordered_set<uint32_t> variables;
 	InterfaceVariableAccessHandler handler(*this, variables);
 	traverse_all_reachable_opcodes(get<SPIRFunction>(entry_point), handler);
+
+	// If we needed to create one, we'll need it.
+	if (dummy_sampler_id)
+		variables.insert(dummy_sampler_id);
+
 	return variables;
 }
 
@@ -2291,7 +2296,9 @@ size_t Compiler::get_declared_struct_member_size(const SPIRType &struct_type, ui
 	if (!type.array.empty())
 	{
 		// For arrays, we can use ArrayStride to get an easy check.
-		return type_struct_member_array_stride(struct_type, index) * type.array.back();
+		bool array_size_literal = type.array_size_literal.back();
+		uint32_t array_size = array_size_literal ? type.array.back() : get<SPIRConstant>(type.array.back()).scalar();
+		return type_struct_member_array_stride(struct_type, index) * array_size;
 	}
 	else if (type.basetype == SPIRType::Struct)
 	{
@@ -2582,20 +2589,51 @@ vector<string> Compiler::get_entry_points() const
 	return entries;
 }
 
+vector<EntryPoint> Compiler::get_entry_points_and_stages() const
+{
+	vector<EntryPoint> entries;
+	for (auto &entry : entry_points)
+		entries.push_back({ entry.second.orig_name, entry.second.model });
+	return entries;
+}
+
 void Compiler::rename_entry_point(const std::string &old_name, const std::string &new_name)
 {
-	auto &entry = get_entry_point(old_name);
+	auto &entry = get_first_entry_point(old_name);
+	entry.orig_name = new_name;
+	entry.name = new_name;
+}
+
+void Compiler::rename_entry_point(const std::string &old_name, const std::string &new_name, spv::ExecutionModel model)
+{
+	auto &entry = get_entry_point(old_name, model);
 	entry.orig_name = new_name;
 	entry.name = new_name;
 }
 
 void Compiler::set_entry_point(const std::string &name)
 {
-	auto &entry = get_entry_point(name);
+	auto &entry = get_first_entry_point(name);
+	entry_point = entry.self;
+}
+
+void Compiler::set_entry_point(const std::string &name, spv::ExecutionModel model)
+{
+	auto &entry = get_entry_point(name, model);
 	entry_point = entry.self;
 }
 
 SPIREntryPoint &Compiler::get_entry_point(const std::string &name)
+{
+	return get_first_entry_point(name);
+}
+
+const SPIREntryPoint &Compiler::get_entry_point(const std::string &name) const
+{
+	return get_first_entry_point(name);
+}
+
+SPIREntryPoint &Compiler::get_first_entry_point(const std::string &name)
 {
 	auto itr =
 	    find_if(begin(entry_points), end(entry_points), [&](const std::pair<uint32_t, SPIREntryPoint> &entry) -> bool {
@@ -2608,7 +2646,7 @@ SPIREntryPoint &Compiler::get_entry_point(const std::string &name)
 	return itr->second;
 }
 
-const SPIREntryPoint &Compiler::get_entry_point(const std::string &name) const
+const SPIREntryPoint &Compiler::get_first_entry_point(const std::string &name) const
 {
 	auto itr =
 	    find_if(begin(entry_points), end(entry_points), [&](const std::pair<uint32_t, SPIREntryPoint> &entry) -> bool {
@@ -2621,9 +2659,40 @@ const SPIREntryPoint &Compiler::get_entry_point(const std::string &name) const
 	return itr->second;
 }
 
+SPIREntryPoint &Compiler::get_entry_point(const std::string &name, ExecutionModel model)
+{
+	auto itr =
+	    find_if(begin(entry_points), end(entry_points), [&](const std::pair<uint32_t, SPIREntryPoint> &entry) -> bool {
+		    return entry.second.orig_name == name && entry.second.model == model;
+	    });
+
+	if (itr == end(entry_points))
+		SPIRV_CROSS_THROW("Entry point does not exist.");
+
+	return itr->second;
+}
+
+const SPIREntryPoint &Compiler::get_entry_point(const std::string &name, ExecutionModel model) const
+{
+	auto itr =
+	    find_if(begin(entry_points), end(entry_points), [&](const std::pair<uint32_t, SPIREntryPoint> &entry) -> bool {
+		    return entry.second.orig_name == name && entry.second.model == model;
+	    });
+
+	if (itr == end(entry_points))
+		SPIRV_CROSS_THROW("Entry point does not exist.");
+
+	return itr->second;
+}
+
 const string &Compiler::get_cleansed_entry_point_name(const std::string &name) const
 {
-	return get_entry_point(name).name;
+	return get_first_entry_point(name).name;
+}
+
+const string &Compiler::get_cleansed_entry_point_name(const std::string &name, ExecutionModel model) const
+{
+	return get_entry_point(name, model).name;
 }
 
 const SPIREntryPoint &Compiler::get_entry_point() const
@@ -2823,9 +2892,79 @@ void Compiler::CombinedImageSamplerHandler::register_combined_image_sampler(SPIR
 	}
 }
 
+bool Compiler::DummySamplerForCombinedImageHandler::handle(Op opcode, const uint32_t *args, uint32_t length)
+{
+	if (need_dummy_sampler)
+	{
+		// No need to traverse further, we know the result.
+		return false;
+	}
+
+	switch (opcode)
+	{
+	case OpLoad:
+	{
+		if (length < 3)
+			return false;
+
+		uint32_t result_type = args[0];
+
+		auto &type = compiler.get<SPIRType>(result_type);
+		bool separate_image =
+		    type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer;
+
+		// If not separate image, don't bother.
+		if (!separate_image)
+			return true;
+
+		uint32_t id = args[1];
+		uint32_t ptr = args[2];
+		compiler.set<SPIRExpression>(id, "", result_type, true);
+		compiler.register_read(id, ptr, true);
+		break;
+	}
+
+	case OpImageFetch:
+	{
+		// If we are fetching from a plain OpTypeImage, we must pre-combine with our dummy sampler.
+		auto *var = compiler.maybe_get_backing_variable(args[2]);
+		if (var)
+		{
+			auto &type = compiler.get<SPIRType>(var->basetype);
+			if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer)
+				need_dummy_sampler = true;
+		}
+
+		break;
+	}
+
+	case OpInBoundsAccessChain:
+	case OpAccessChain:
+	{
+		if (length < 3)
+			return false;
+
+		auto &type = compiler.get<SPIRType>(args[0]);
+		bool separate_image =
+		    type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer;
+		if (separate_image)
+			SPIRV_CROSS_THROW("Attempting to use arrays or structs of separate images. This is not possible to "
+			                  "statically remap to plain GLSL.");
+		break;
+	}
+
+	default:
+		break;
+	}
+
+	return true;
+}
+
 bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *args, uint32_t length)
 {
 	// We need to figure out where samplers and images are loaded from, so do only the bare bones compilation we need.
+	bool is_fetch = false;
+
 	switch (opcode)
 	{
 	case OpLoad:
@@ -2875,6 +3014,28 @@ bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *ar
 		return true;
 	}
 
+	case OpImageFetch:
+	{
+		// If we are fetching from a plain OpTypeImage, we must pre-combine with our dummy sampler.
+		auto *var = compiler.maybe_get_backing_variable(args[2]);
+		if (!var)
+			return true;
+
+		auto &type = compiler.get<SPIRType>(var->basetype);
+		if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer)
+		{
+			if (compiler.dummy_sampler_id == 0)
+				SPIRV_CROSS_THROW("texelFetch without sampler was found, but no dummy sampler has been created with "
+				                  "build_dummy_sampler_for_combined_images().");
+
+			// Do it outside.
+			is_fetch = true;
+			break;
+		}
+
+		return true;
+	}
+
 	case OpSampledImage:
 		// Do it outside.
 		break;
@@ -2899,7 +3060,7 @@ bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *ar
 			if (image)
 				image_id = image->self;
 
-			uint32_t sampler_id = args[3];
+			uint32_t sampler_id = is_fetch ? compiler.dummy_sampler_id : args[3];
 			auto *sampler = compiler.maybe_get_backing_variable(sampler_id);
 			if (sampler)
 				sampler_id = sampler->self;
@@ -2914,7 +3075,7 @@ bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *ar
 	// Function parameters are not necessarily pointers, so if we don't have a backing variable, remapping will know
 	// which backing variable the image/sample came from.
 	uint32_t image_id = remap_parameter(args[2]);
-	uint32_t sampler_id = remap_parameter(args[3]);
+	uint32_t sampler_id = is_fetch ? compiler.dummy_sampler_id : remap_parameter(args[3]);
 
 	auto itr = find_if(begin(compiler.combined_image_samplers), end(compiler.combined_image_samplers),
 	                   [image_id, sampler_id](const CombinedImageSampler &combined) {
@@ -2923,10 +3084,24 @@ bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *ar
 
 	if (itr == end(compiler.combined_image_samplers))
 	{
+		uint32_t sampled_type;
+		if (is_fetch)
+		{
+			// Have to invent the sampled image type.
+			sampled_type = compiler.increase_bound_by(1);
+			auto &type = compiler.set<SPIRType>(sampled_type);
+			type = compiler.expression_type(args[2]);
+			type.self = sampled_type;
+			type.basetype = SPIRType::SampledImage;
+		}
+		else
+		{
+			sampled_type = args[0];
+		}
+
 		auto id = compiler.increase_bound_by(2);
 		auto type_id = id + 0;
 		auto combined_id = id + 1;
-		auto sampled_type = args[0];
 
 		// Make a new type, pointer to OpTypeSampledImage, so we can make a variable of this type.
 		// We will probably have this type lying around, but it doesn't hurt to make duplicates for internal purposes.
@@ -2941,7 +3116,8 @@ bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *ar
 
 		// Inherit RelaxedPrecision (and potentially other useful flags if deemed relevant).
 		auto &new_flags = compiler.meta[combined_id].decoration.decoration_flags;
-		auto old_flags = compiler.meta[sampler_id].decoration.decoration_flags;
+		// Fetch inherits precision from the image, not sampler (there is no sampler).
+		auto old_flags = compiler.meta[is_fetch ? image_id : sampler_id].decoration.decoration_flags;
 		new_flags = old_flags & (1ull << DecorationRelaxedPrecision);
 
 		compiler.combined_image_samplers.push_back({ combined_id, image_id, sampler_id });
@@ -2950,6 +3126,36 @@ bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *ar
 	return true;
 }
 
+uint32_t Compiler::build_dummy_sampler_for_combined_images()
+{
+	DummySamplerForCombinedImageHandler handler(*this);
+	traverse_all_reachable_opcodes(get<SPIRFunction>(entry_point), handler);
+	if (handler.need_dummy_sampler)
+	{
+		uint32_t offset = increase_bound_by(3);
+		auto type_id = offset + 0;
+		auto ptr_type_id = offset + 1;
+		auto var_id = offset + 2;
+
+		SPIRType sampler_type;
+		auto &sampler = set<SPIRType>(type_id);
+		sampler.basetype = SPIRType::Sampler;
+
+		auto &ptr_sampler = set<SPIRType>(ptr_type_id);
+		ptr_sampler = sampler;
+		ptr_sampler.self = type_id;
+		ptr_sampler.storage = StorageClassUniformConstant;
+		ptr_sampler.pointer = true;
+
+		set<SPIRVariable>(var_id, ptr_type_id, StorageClassUniformConstant, 0);
+		set_name(var_id, "SPIRV_Cross_DummySampler");
+		dummy_sampler_id = var_id;
+		return var_id;
+	}
+	else
+		return 0;
+}
+
 void Compiler::build_combined_image_samplers()
 {
 	for (auto &id : ids)
@@ -3244,7 +3450,7 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry)
 					accessed_variables_to_block[var->self].insert(current_block->self);
 
 				// If we store through an access chain, we have a partial write.
-				if (var->self == lhs)
+				if (var && var->self == lhs)
 					complete_write_variables_to_block[var->self].insert(current_block->self);
 
 				var = compiler.maybe_get_backing_variable(rhs);
@@ -3368,8 +3574,8 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry)
 	CFG cfg(*this, entry);
 
 	// Analyze if there are parameters which need to be implicitly preserved with an "in" qualifier.
-	analyze_parameter_preservation(entry, cfg, handler.accessed_variables_to_block,
-	                               handler.complete_write_variables_to_block);
+	this->analyze_parameter_preservation(entry, cfg, handler.accessed_variables_to_block,
+	                                     handler.complete_write_variables_to_block);
 
 	unordered_map<uint32_t, uint32_t> potential_loop_variables;
 
@@ -3393,7 +3599,7 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry)
 				// The continue block is dominated by the inner part of the loop, which does not make sense in high-level
 				// language output because it will be declared before the body,
 				// so we will have to lift the dominator up to the relevant loop header instead.
-				builder.add_block(continue_block_to_loop_header[block]);
+				builder.add_block(this->continue_block_to_loop_header[block]);
 
 				if (type.vecsize == 1 && type.columns == 1)
 				{
@@ -3447,7 +3653,7 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry)
 			// If a temporary is used in more than one block, we might have to lift continue block
 			// access up to loop header like we did for variables.
 			if (blocks.size() != 1 && this->is_continue(block))
-				builder.add_block(continue_block_to_loop_header[block]);
+				builder.add_block(this->continue_block_to_loop_header[block]);
 		}
 
 		uint32_t dominating_block = builder.get_dominator();
@@ -3462,10 +3668,10 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry)
 				// This should be very rare, but if we try to declare a temporary inside a loop,
 				// and that temporary is used outside the loop as well (spirv-opt inliner likes this)
 				// we should actually emit the temporary outside the loop.
-				hoisted_temporaries.insert(var.first);
-				forced_temporaries.insert(var.first);
+				this->hoisted_temporaries.insert(var.first);
+				this->forced_temporaries.insert(var.first);
 
-				auto &block_temporaries = get<SPIRBlock>(dominating_block).declare_temporary;
+				auto &block_temporaries = this->get<SPIRBlock>(dominating_block).declare_temporary;
 				block_temporaries.emplace_back(handler.result_id_to_type[var.first], var.first);
 			}
 		}
@@ -3607,18 +3813,49 @@ bool Compiler::get_common_basic_type(const SPIRType &type, SPIRType::BaseType &b
 	}
 }
 
+void Compiler::ActiveBuiltinHandler::handle_builtin(const SPIRType &type, BuiltIn builtin, uint64_t decoration_flags)
+{
+	// If used, we will need to explicitly declare a new array size for these builtins.
+
+	if (builtin == BuiltInClipDistance)
+	{
+		if (!type.array_size_literal[0])
+			SPIRV_CROSS_THROW("Array size for ClipDistance must be a literal.");
+		uint32_t array_size = type.array[0];
+		if (array_size == 0)
+			SPIRV_CROSS_THROW("Array size for ClipDistance must not be unsized.");
+		compiler.clip_distance_count = array_size;
+	}
+	else if (builtin == BuiltInCullDistance)
+	{
+		if (!type.array_size_literal[0])
+			SPIRV_CROSS_THROW("Array size for CullDistance must be a literal.");
+		uint32_t array_size = type.array[0];
+		if (array_size == 0)
+			SPIRV_CROSS_THROW("Array size for CullDistance must not be unsized.");
+		compiler.cull_distance_count = array_size;
+	}
+	else if (builtin == BuiltInPosition)
+	{
+		if (decoration_flags & (1ull << DecorationInvariant))
+			compiler.position_invariant = true;
+	}
+}
+
 bool Compiler::ActiveBuiltinHandler::handle(spv::Op opcode, const uint32_t *args, uint32_t length)
 {
 	const auto add_if_builtin = [&](uint32_t id) {
 		// Only handles variables here.
 		// Builtins which are part of a block are handled in AccessChain.
 		auto *var = compiler.maybe_get<SPIRVariable>(id);
-		if (var && compiler.meta[id].decoration.builtin)
+		auto &decorations = compiler.meta[id].decoration;
+		if (var && decorations.builtin)
 		{
 			auto &type = compiler.get<SPIRType>(var->basetype);
 			auto &flags =
 			    type.storage == StorageClassInput ? compiler.active_input_builtins : compiler.active_output_builtins;
-			flags |= 1ull << compiler.meta[id].decoration.builtin_type;
+			flags |= 1ull << decorations.builtin_type;
+			handle_builtin(type, decorations.builtin_type, decorations.decoration_flags);
 		}
 	};
 
@@ -3704,7 +3941,11 @@ bool Compiler::ActiveBuiltinHandler::handle(spv::Op opcode, const uint32_t *args
 				{
 					auto &decorations = compiler.meta[type->self].members[index];
 					if (decorations.builtin)
+					{
 						flags |= 1ull << decorations.builtin_type;
+						handle_builtin(compiler.get<SPIRType>(type->member_types[index]), decorations.builtin_type,
+						               decorations.decoration_flags);
+					}
 				}
 
 				type = &compiler.get<SPIRType>(type->member_types[index]);
@@ -3729,6 +3970,8 @@ void Compiler::update_active_builtins()
 {
 	active_input_builtins = 0;
 	active_output_builtins = 0;
+	cull_distance_count = 0;
+	clip_distance_count = 0;
 	ActiveBuiltinHandler handler(*this);
 	traverse_all_reachable_opcodes(get<SPIRFunction>(entry_point), handler);
 }
@@ -3752,11 +3995,13 @@ bool Compiler::has_active_builtin(BuiltIn builtin, StorageClass storage)
 	return flags & (1ull << builtin);
 }
 
-void Compiler::analyze_sampler_comparison_states()
+void Compiler::analyze_image_and_sampler_usage()
 {
 	CombinedImageSamplerUsageHandler handler(*this);
 	traverse_all_reachable_opcodes(get<SPIRFunction>(entry_point), handler);
 	comparison_samplers = move(handler.comparison_samplers);
+	comparison_images = move(handler.comparison_images);
+	need_subpass_input = handler.need_subpass_input;
 }
 
 bool Compiler::CombinedImageSamplerUsageHandler::begin_function_scope(const uint32_t *args, uint32_t length)
@@ -3777,6 +4022,14 @@ bool Compiler::CombinedImageSamplerUsageHandler::begin_function_scope(const uint
 	return true;
 }
 
+void Compiler::CombinedImageSamplerUsageHandler::add_hierarchy_to_comparison_images(uint32_t image)
+{
+	// Traverse the variable dependency hierarchy and tag everything in its path with comparison images.
+	comparison_images.insert(image);
+	for (auto &img : dependency_hierarchy[image])
+		add_hierarchy_to_comparison_images(img);
+}
+
 void Compiler::CombinedImageSamplerUsageHandler::add_hierarchy_to_comparison_samplers(uint32_t sampler)
 {
 	// Traverse the variable dependency hierarchy and tag everything in its path with comparison samplers.
@@ -3796,6 +4049,12 @@ bool Compiler::CombinedImageSamplerUsageHandler::handle(Op opcode, const uint32_
 		if (length < 3)
 			return false;
 		dependency_hierarchy[args[1]].insert(args[2]);
+
+		// Ideally defer this to OpImageRead, but then we'd need to track loaded IDs.
+		// If we load an image, we're going to use it and there is little harm in declaring an unused gl_FragCoord.
+		auto &type = compiler.get<SPIRType>(args[0]);
+		if (type.image.dim == DimSubpassData)
+			need_subpass_input = true;
 		break;
 	}
 
@@ -3808,6 +4067,10 @@ bool Compiler::CombinedImageSamplerUsageHandler::handle(Op opcode, const uint32_
 		auto &type = compiler.get<SPIRType>(result_type);
 		if (type.image.depth)
 		{
+			// This image must be a depth image.
+			uint32_t image = args[2];
+			add_hierarchy_to_comparison_images(image);
+
 			// This sampler must be a SamplerComparisionState, and not a regular SamplerState.
 			uint32_t sampler = args[3];
 			add_hierarchy_to_comparison_samplers(sampler);
diff --git a/spirv_cross.hpp b/spirv_cross.hpp
index c477ea952f..193efc7477 100644
--- a/spirv_cross.hpp
+++ b/spirv_cross.hpp
@@ -108,6 +108,12 @@ enum BufferPackingStandard
 	BufferPackingHLSLCbufferPackOffset
 };
 
+struct EntryPoint
+{
+	std::string name;
+	spv::ExecutionModel execution_model;
+};
+
 class Compiler
 {
 public:
@@ -261,17 +267,23 @@ public:
 	// Entry points should be set right after the constructor completes as some reflection functions traverse the graph from the entry point.
 	// Resource reflection also depends on the entry point.
 	// By default, the current entry point is set to the first OpEntryPoint which appears in the SPIR-V module.
+	SPIRV_CROSS_DEPRECATED("Please use get_entry_points_and_stages instead.")
 	std::vector<std::string> get_entry_points() const;
+	SPIRV_CROSS_DEPRECATED("Please use set_entry_point(const std::string &, spv::ExecutionModel) instead.")
 	void set_entry_point(const std::string &name);
 
 	// Renames an entry point from old_name to new_name.
 	// If old_name is currently selected as the current entry point, it will continue to be the current entry point,
 	// albeit with a new name.
 	// get_entry_points() is essentially invalidated at this point.
+	SPIRV_CROSS_DEPRECATED(
+	    "Please use rename_entry_point(const std::string&, const std::string&, spv::ExecutionModel) instead.")
 	void rename_entry_point(const std::string &old_name, const std::string &new_name);
 
 	// Returns the internal data structure for entry points to allow poking around.
+	SPIRV_CROSS_DEPRECATED("Please use get_entry_point(const std::string &, spv::ExecutionModel instead.")
 	const SPIREntryPoint &get_entry_point(const std::string &name) const;
+	SPIRV_CROSS_DEPRECATED("Please use get_entry_point(const std::string &, spv::ExecutionModel instead.")
 	SPIREntryPoint &get_entry_point(const std::string &name);
 
 	// Some shader languages restrict the names that can be given to entry points, and the
@@ -282,8 +294,22 @@ public:
 	// the name, as updated by the backend during the call to compile(). If the name is not
 	// illegal, and has not been renamed, or if this function is called before compile(),
 	// this function will simply return the same name.
+	SPIRV_CROSS_DEPRECATED(
+	    "Please use get_cleansed_entry_point_name(const std::string &, spv::ExecutionModel) instead.")
 	const std::string &get_cleansed_entry_point_name(const std::string &name) const;
 
+	// New variants of entry point query and reflection.
+	// Names for entry points in the SPIR-V module may alias if they belong to different execution models.
+	// To disambiguate, we must pass along with the entry point names the execution model.
+	std::vector<EntryPoint> get_entry_points_and_stages() const;
+	void set_entry_point(const std::string &entry, spv::ExecutionModel execution_model);
+	void rename_entry_point(const std::string &old_name, const std::string &new_name,
+	                        spv::ExecutionModel execution_model);
+	const SPIREntryPoint &get_entry_point(const std::string &name, spv::ExecutionModel execution_model) const;
+	SPIREntryPoint &get_entry_point(const std::string &name, spv::ExecutionModel execution_model);
+	const std::string &get_cleansed_entry_point_name(const std::string &name,
+	                                                 spv::ExecutionModel execution_model) const;
+
 	// Query and modify OpExecutionMode.
 	uint64_t get_execution_mode_mask() const;
 	void unset_execution_mode(spv::ExecutionMode mode);
@@ -314,6 +340,22 @@ public:
 	uint32_t get_work_group_size_specialization_constants(SpecializationConstant &x, SpecializationConstant &y,
 	                                                      SpecializationConstant &z) const;
 
+	// Analyzes all OpImageFetch (texelFetch) opcodes and checks if there are instances where
+	// said instruction is used without a combined image sampler.
+	// GLSL targets do not support the use of texelFetch without a sampler.
+	// To workaround this, we must inject a dummy sampler which can be used to form a sampler2D at the call-site of
+	// texelFetch as necessary.
+	//
+	// This must be called before build_combined_image_samplers().
+	// build_combined_image_samplers() may refer to the ID returned by this method if the returned ID is non-zero.
+	// The return value will be the ID of a sampler object if a dummy sampler is necessary, or 0 if no sampler object
+	// is required.
+	//
+	// If the returned ID is non-zero, it can be decorated with set/bindings as desired before calling compile().
+	// Calling this function also invalidates get_active_interface_variables(), so this should be called
+	// before that function.
+	uint32_t build_dummy_sampler_for_combined_images();
+
 	// Analyzes all separate image and samplers used from the currently selected entry point,
 	// and re-routes them all to a combined image sampler instead.
 	// This is required to "support" separate image samplers in targets which do not natively support
@@ -678,6 +720,18 @@ protected:
 		                                     bool depth);
 	};
 
+	struct DummySamplerForCombinedImageHandler : OpcodeHandler
+	{
+		DummySamplerForCombinedImageHandler(Compiler &compiler_)
+		    : compiler(compiler_)
+		{
+		}
+		bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) override;
+
+		Compiler &compiler;
+		bool need_dummy_sampler = false;
+	};
+
 	struct ActiveBuiltinHandler : OpcodeHandler
 	{
 		ActiveBuiltinHandler(Compiler &compiler_)
@@ -687,6 +741,8 @@ protected:
 
 		bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) override;
 		Compiler &compiler;
+
+		void handle_builtin(const SPIRType &type, spv::BuiltIn builtin, uint64_t decoration_flags);
 	};
 
 	bool traverse_all_reachable_opcodes(const SPIRBlock &block, OpcodeHandler &handler) const;
@@ -707,6 +763,10 @@ protected:
 
 	uint64_t active_input_builtins = 0;
 	uint64_t active_output_builtins = 0;
+	uint32_t clip_distance_count = 0;
+	uint32_t cull_distance_count = 0;
+	bool position_invariant = false;
+
 	// Traverses all reachable opcodes and sets active_builtins to a bitmask of all builtin variables which are accessed in the shader.
 	void update_active_builtins();
 	bool has_active_builtin(spv::BuiltIn builtin, spv::StorageClass storage);
@@ -720,8 +780,17 @@ protected:
 	// SPIR-V does not support this distinction, so we must keep track of this information outside the type system.
 	// There might be unrelated IDs found in this set which do not correspond to actual variables.
 	// This set should only be queried for the existence of samplers which are already known to be variables or parameter IDs.
+	// Similar is implemented for images, as well as if subpass inputs are needed.
 	std::unordered_set<uint32_t> comparison_samplers;
-	void analyze_sampler_comparison_states();
+	std::unordered_set<uint32_t> comparison_images;
+	bool need_subpass_input = false;
+
+	// In certain backends, we will need to use a dummy sampler to be able to emit code.
+	// GLSL does not support texelFetch on texture2D objects, but SPIR-V does,
+	// so we need to workaround by having the application inject a dummy sampler.
+	uint32_t dummy_sampler_id = 0;
+
+	void analyze_image_and_sampler_usage();
 	struct CombinedImageSamplerUsageHandler : OpcodeHandler
 	{
 		CombinedImageSamplerUsageHandler(Compiler &compiler_)
@@ -734,9 +803,12 @@ protected:
 		Compiler &compiler;
 
 		std::unordered_map<uint32_t, std::unordered_set<uint32_t>> dependency_hierarchy;
+		std::unordered_set<uint32_t> comparison_images;
 		std::unordered_set<uint32_t> comparison_samplers;
 
 		void add_hierarchy_to_comparison_samplers(uint32_t sampler);
+		void add_hierarchy_to_comparison_images(uint32_t sampler);
+		bool need_subpass_input = false;
 	};
 
 	void make_constant_null(uint32_t id, uint32_t type);
@@ -747,6 +819,11 @@ protected:
 
 	bool instruction_to_result_type(uint32_t &result_type, uint32_t &result_id, spv::Op op, const uint32_t *args,
 	                                uint32_t length);
+
+private:
+	// Used only to implement the old deprecated get_entry_point() interface.
+	const SPIREntryPoint &get_first_entry_point(const std::string &name) const;
+	SPIREntryPoint &get_first_entry_point(const std::string &name);
 };
 }
 
diff --git a/spirv_cross_util.cpp b/spirv_cross_util.cpp
new file mode 100644
index 0000000000..5bc2f3517a
--- /dev/null
+++ b/spirv_cross_util.cpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2015-2018 ARM Limited
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "spirv_cross_util.hpp"
+#include "spirv_common.hpp"
+
+using namespace spv;
+using namespace spirv_cross;
+
+namespace spirv_cross_util
+{
+void rename_interface_variable(spirv_cross::Compiler &compiler, const std::vector<spirv_cross::Resource> &resources,
+                               uint32_t location, const std::string &name)
+{
+	for (auto &v : resources)
+	{
+		if (!compiler.has_decoration(v.id, spv::DecorationLocation))
+			continue;
+
+		auto loc = compiler.get_decoration(v.id, spv::DecorationLocation);
+		if (loc != location)
+			continue;
+
+		auto &type = compiler.get_type(v.base_type_id);
+
+		// This is more of a friendly variant. If we need to rename interface variables, we might have to rename
+		// structs as well and make sure all the names match up.
+		if (type.basetype == SPIRType::Struct)
+		{
+			compiler.set_name(v.base_type_id, join("SPIRV_Cross_Interface_Location", location));
+			for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
+				compiler.set_member_name(v.base_type_id, i, join("InterfaceMember", i));
+		}
+
+		compiler.set_name(v.id, name);
+	}
+}
+}
diff --git a/spirv_cross_util.hpp b/spirv_cross_util.hpp
new file mode 100644
index 0000000000..509c7c09ce
--- /dev/null
+++ b/spirv_cross_util.hpp
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2015-2018 ARM Limited
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SPIRV_CROSS_UTIL_HPP
+#define SPIRV_CROSS_UTIL_HPP
+
+#include "spirv_cross.hpp"
+
+namespace spirv_cross_util
+{
+void rename_interface_variable(spirv_cross::Compiler &compiler, const std::vector<spirv_cross::Resource> &resources,
+                               uint32_t location, const std::string &name);
+}
+
+#endif
diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp
index 10c61b82d4..882d241f5f 100644
--- a/spirv_glsl.cpp
+++ b/spirv_glsl.cpp
@@ -19,6 +19,7 @@
 #include "spirv_common.hpp"
 #include <algorithm>
 #include <assert.h>
+#include <cmath>
 #include <utility>
 
 using namespace spv;
@@ -109,7 +110,7 @@ string CompilerGLSL::sanitize_underscores(const string &str)
 }
 
 // Returns true if an arithmetic operation does not change behavior depending on signedness.
-static bool opcode_is_sign_invariant(Op opcode)
+static bool glsl_opcode_is_sign_invariant(Op opcode)
 {
 	switch (opcode)
 	{
@@ -243,6 +244,7 @@ void CompilerGLSL::reset()
 	forwarded_temporaries.clear();
 
 	resource_names.clear();
+	function_overloads.clear();
 
 	for (auto &id : ids)
 	{
@@ -371,12 +373,13 @@ string CompilerGLSL::compile()
 	if (options.vulkan_semantics)
 		backend.allow_precision_qualifiers = true;
 	backend.force_gl_in_out_block = true;
+	backend.supports_extensions = true;
 
 	// Scan the SPIR-V to find trivial uses of extensions.
 	find_static_extensions();
 	fixup_image_load_store_access();
 	update_active_builtins();
-	analyze_sampler_comparison_states();
+	analyze_image_and_sampler_usage();
 
 	uint32_t pass_count = 0;
 	do
@@ -1040,7 +1043,8 @@ uint32_t CompilerGLSL::type_to_packed_size(const SPIRType &type, uint64_t flags,
 	return size;
 }
 
-bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing)
+bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing,
+                                              uint32_t start_offset, uint32_t end_offset)
 {
 	// This is very tricky and error prone, but try to be exhaustive and correct here.
 	// SPIR-V doesn't directly say if we're using std430 or std140.
@@ -1079,6 +1083,10 @@ bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackin
 		uint32_t alignment = max(packed_alignment, pad_alignment);
 		offset = (offset + alignment - 1) & ~(alignment - 1);
 
+		// Field is not in the specified range anymore and we can ignore any further fields.
+		if (offset >= end_offset)
+			break;
+
 		// The next member following a struct member is aligned to the base alignment of the struct that came before.
 		// GL 4.5 spec, 7.6.2.2.
 		if (memb_type.basetype == SPIRType::Struct)
@@ -1086,27 +1094,31 @@ bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackin
 		else
 			pad_alignment = 1;
 
-		// We only care about offsets in std140, std430, etc ...
-		// For EnhancedLayout variants, we have the flexibility to choose our own offsets.
-		if (!packing_has_flexible_offset(packing))
+		// Only care about packing if we are in the given range
+		if (offset >= start_offset)
 		{
-			uint32_t actual_offset = type_struct_member_offset(type, i);
-			if (actual_offset != offset) // This cannot be the packing we're looking for.
+			// We only care about offsets in std140, std430, etc ...
+			// For EnhancedLayout variants, we have the flexibility to choose our own offsets.
+			if (!packing_has_flexible_offset(packing))
+			{
+				uint32_t actual_offset = type_struct_member_offset(type, i);
+				if (actual_offset != offset) // This cannot be the packing we're looking for.
+					return false;
+			}
+
+			// Verify array stride rules.
+			if (!memb_type.array.empty() && type_to_packed_array_stride(memb_type, member_flags, packing) !=
+			                                    type_struct_member_array_stride(type, i))
+				return false;
+
+			// Verify that sub-structs also follow packing rules.
+			// We cannot use enhanced layouts on substructs, so they better be up to spec.
+			auto substruct_packing = packing_to_substruct_packing(packing);
+
+			if (!memb_type.member_types.empty() && !buffer_is_packing_standard(memb_type, substruct_packing))
 				return false;
 		}
 
-		// Verify array stride rules.
-		if (!memb_type.array.empty() &&
-		    type_to_packed_array_stride(memb_type, member_flags, packing) != type_struct_member_array_stride(type, i))
-			return false;
-
-		// Verify that sub-structs also follow packing rules.
-		// We cannot use enhanced layouts on substructs, so they better be up to spec.
-		auto substruct_packing = packing_to_substruct_packing(packing);
-
-		if (!memb_type.member_types.empty() && !buffer_is_packing_standard(memb_type, substruct_packing))
-			return false;
-
 		// Bump size.
 		offset += packed_size;
 	}
@@ -1804,6 +1816,11 @@ void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionMo
 	bool emitted_block = false;
 	bool builtin_array = false;
 
+	// Need to use declared size in the type.
+	// These variables might have been declared, but not statically used, so we haven't deduced their size yet.
+	uint32_t cull_distance_size = 0;
+	uint32_t clip_distance_size = 0;
+
 	for (auto &id : ids)
 	{
 		if (id.get_type() != TypeVariable)
@@ -1816,16 +1833,32 @@ void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionMo
 
 		if (var.storage == storage && block && is_builtin_variable(var))
 		{
+			uint32_t index = 0;
 			for (auto &m : meta[type.self].members)
+			{
 				if (m.builtin)
+				{
 					builtins |= 1ull << m.builtin_type;
+					if (m.builtin_type == BuiltInCullDistance)
+						cull_distance_size = get<SPIRType>(type.member_types[index]).array.front();
+					else if (m.builtin_type == BuiltInClipDistance)
+						clip_distance_size = get<SPIRType>(type.member_types[index]).array.front();
+				}
+				index++;
+			}
 		}
 		else if (var.storage == storage && !block && is_builtin_variable(var))
 		{
 			// While we're at it, collect all declared global builtins (HLSL mostly ...).
 			auto &m = meta[var.self].decoration;
 			if (m.builtin)
+			{
 				global_builtins |= 1ull << m.builtin_type;
+				if (m.builtin_type == BuiltInCullDistance)
+					cull_distance_size = type.array.front();
+				else if (m.builtin_type == BuiltInClipDistance)
+					clip_distance_size = type.array.front();
+			}
 		}
 
 		if (!builtins)
@@ -1862,9 +1895,9 @@ void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionMo
 	if (emitted_builtins & (1ull << BuiltInPointSize))
 		statement("float gl_PointSize;");
 	if (emitted_builtins & (1ull << BuiltInClipDistance))
-		statement("float gl_ClipDistance[];"); // TODO: Do we need a fixed array size here?
+		statement("float gl_ClipDistance[", clip_distance_size, "];");
 	if (emitted_builtins & (1ull << BuiltInCullDistance))
-		statement("float gl_CullDistance[];"); // TODO: Do we need a fixed array size here?
+		statement("float gl_CullDistance[", cull_distance_size, "];");
 
 	bool tessellation = model == ExecutionModelTessellationEvaluation || model == ExecutionModelTessellationControl;
 	if (builtin_array)
@@ -1918,7 +1951,7 @@ void CompilerGLSL::emit_resources()
 		emit_pls();
 
 	// Emit custom gl_PerVertex for SSO compatibility.
-	if (options.separate_shader_objects && !options.es)
+	if (options.separate_shader_objects && !options.es && execution.model != ExecutionModelFragment)
 	{
 		switch (execution.model)
 		{
@@ -1937,6 +1970,24 @@ void CompilerGLSL::emit_resources()
 			break;
 		}
 	}
+	else
+	{
+		// Need to redeclare clip/cull distance with explicit size to use them.
+		// SPIR-V mandates these builtins have a size declared.
+		const char *storage = execution.model == ExecutionModelFragment ? "in" : "out";
+		if (clip_distance_count != 0)
+			statement(storage, " float gl_ClipDistance[", clip_distance_count, "];");
+		if (cull_distance_count != 0)
+			statement(storage, " float gl_CullDistance[", cull_distance_count, "];");
+		if (clip_distance_count != 0 || cull_distance_count != 0)
+			statement("");
+	}
+
+	if (position_invariant)
+	{
+		statement("invariant gl_Position;");
+		statement("");
+	}
 
 	bool emitted = false;
 
@@ -2238,7 +2289,10 @@ string CompilerGLSL::to_expression(uint32_t id)
 		if (e.base_expression)
 			return to_enclosed_expression(e.base_expression) + e.expression;
 		else if (e.need_transpose)
-			return convert_row_major_matrix(e.expression, get<SPIRType>(e.expression_type));
+		{
+			bool is_packed = has_decoration(id, DecorationCPacked);
+			return convert_row_major_matrix(e.expression, get<SPIRType>(e.expression_type), is_packed);
+		}
 		else
 		{
 			if (force_recompile)
@@ -2402,7 +2456,7 @@ string CompilerGLSL::constant_op_expression(const SPIRConstantOp &cop)
 	}
 
 	SPIRType::BaseType input_type;
-	bool skip_cast_if_equal_type = opcode_is_sign_invariant(cop.opcode);
+	bool skip_cast_if_equal_type = glsl_opcode_is_sign_invariant(cop.opcode);
 
 	switch (cop.opcode)
 	{
@@ -2505,6 +2559,149 @@ string CompilerGLSL::constant_expression(const SPIRConstant &c)
 	}
 }
 
+#ifdef _MSC_VER
+// sprintf warning.
+// We cannot rely on snprintf existing because, ..., MSVC.
+#pragma warning(push)
+#pragma warning(disable : 4996)
+#endif
+
+string CompilerGLSL::convert_float_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
+{
+	string res;
+	float float_value = c.scalar_f32(col, row);
+
+	if (std::isnan(float_value) || std::isinf(float_value))
+	{
+		// Use special representation.
+		if (!is_legacy())
+		{
+			SPIRType out_type;
+			SPIRType in_type;
+			out_type.basetype = SPIRType::Float;
+			in_type.basetype = SPIRType::UInt;
+			out_type.vecsize = 1;
+			in_type.vecsize = 1;
+			out_type.width = 32;
+			in_type.width = 32;
+
+			char print_buffer[32];
+			sprintf(print_buffer, "0x%xu", c.scalar(col, row));
+			res = join(bitcast_glsl_op(out_type, in_type), "(", print_buffer, ")");
+		}
+		else
+		{
+			if (float_value == numeric_limits<float>::infinity())
+			{
+				if (backend.float_literal_suffix)
+					res = "(1.0f / 0.0f)";
+				else
+					res = "(1.0 / 0.0)";
+			}
+			else if (float_value == -numeric_limits<float>::infinity())
+			{
+				if (backend.float_literal_suffix)
+					res = "(-1.0f / 0.0f)";
+				else
+					res = "(-1.0 / 0.0)";
+			}
+			else if (std::isnan(float_value))
+			{
+				if (backend.float_literal_suffix)
+					res = "(0.0f / 0.0f)";
+				else
+					res = "(0.0 / 0.0)";
+			}
+			else
+				SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
+		}
+	}
+	else
+	{
+		res = convert_to_string(float_value);
+		if (backend.float_literal_suffix)
+			res += "f";
+	}
+
+	return res;
+}
+
+std::string CompilerGLSL::convert_double_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
+{
+	string res;
+	double double_value = c.scalar_f64(col, row);
+
+	if (std::isnan(double_value) || std::isinf(double_value))
+	{
+		// Use special representation.
+		if (!is_legacy())
+		{
+			SPIRType out_type;
+			SPIRType in_type;
+			out_type.basetype = SPIRType::Double;
+			in_type.basetype = SPIRType::UInt64;
+			out_type.vecsize = 1;
+			in_type.vecsize = 1;
+			out_type.width = 64;
+			in_type.width = 64;
+
+			uint64_t u64_value = c.scalar_u64(col, row);
+
+			if (options.es)
+				SPIRV_CROSS_THROW("64-bit integers/float not supported in ES profile.");
+			require_extension("GL_ARB_gpu_shader_int64");
+
+			char print_buffer[64];
+			sprintf(print_buffer, "0x%llx%s", static_cast<unsigned long long>(u64_value),
+			        backend.long_long_literal_suffix ? "ull" : "ul");
+			res = join(bitcast_glsl_op(out_type, in_type), "(", print_buffer, ")");
+		}
+		else
+		{
+			if (options.es)
+				SPIRV_CROSS_THROW("FP64 not supported in ES profile.");
+			if (options.version < 400)
+				require_extension("GL_ARB_gpu_shader_fp64");
+
+			if (double_value == numeric_limits<double>::infinity())
+			{
+				if (backend.double_literal_suffix)
+					res = "(1.0lf / 0.0lf)";
+				else
+					res = "(1.0 / 0.0)";
+			}
+			else if (double_value == -numeric_limits<double>::infinity())
+			{
+				if (backend.double_literal_suffix)
+					res = "(-1.0lf / 0.0lf)";
+				else
+					res = "(-1.0 / 0.0)";
+			}
+			else if (std::isnan(double_value))
+			{
+				if (backend.double_literal_suffix)
+					res = "(0.0lf / 0.0lf)";
+				else
+					res = "(0.0 / 0.0)";
+			}
+			else
+				SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
+		}
+	}
+	else
+	{
+		res = convert_to_string(double_value);
+		if (backend.double_literal_suffix)
+			res += "lf";
+	}
+
+	return res;
+}
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
 string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t vector)
 {
 	auto type = get<SPIRType>(c.constant_type);
@@ -2571,10 +2768,7 @@ string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t
 	case SPIRType::Float:
 		if (splat || swizzle_splat)
 		{
-			res += convert_to_string(c.scalar_f32(vector, 0));
-			if (backend.float_literal_suffix)
-				res += "f";
-
+			res += convert_float_to_string(c, vector, 0);
 			if (swizzle_splat)
 				res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
 		}
@@ -2585,10 +2779,8 @@ string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t
 				if (options.vulkan_semantics && c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
 					res += to_name(c.specialization_constant_id(vector, i));
 				else
-					res += convert_to_string(c.scalar_f32(vector, i));
+					res += convert_float_to_string(c, vector, i);
 
-				if (backend.float_literal_suffix)
-					res += "f";
 				if (i + 1 < c.vector_size())
 					res += ", ";
 			}
@@ -2598,10 +2790,7 @@ string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t
 	case SPIRType::Double:
 		if (splat || swizzle_splat)
 		{
-			res += convert_to_string(c.scalar_f64(vector, 0));
-			if (backend.double_literal_suffix)
-				res += "lf";
-
+			res += convert_double_to_string(c, vector, 0);
 			if (swizzle_splat)
 				res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
 		}
@@ -2612,11 +2801,7 @@ string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t
 				if (options.vulkan_semantics && c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
 					res += to_name(c.specialization_constant_id(vector, i));
 				else
-				{
-					res += convert_to_string(c.scalar_f64(vector, i));
-					if (backend.double_literal_suffix)
-						res += "lf";
-				}
+					res += convert_double_to_string(c, vector, i);
 
 				if (i + 1 < c.vector_size())
 					res += ", ";
@@ -3473,13 +3658,40 @@ string CompilerGLSL::to_function_name(uint32_t, const SPIRType &imgtype, bool is
 }
 
 // Returns the function args for a texture sampling function for the specified image and sampling characteristics.
-string CompilerGLSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool, bool, bool is_proj, uint32_t coord,
-                                      uint32_t coord_components, uint32_t dref, uint32_t grad_x, uint32_t grad_y,
-                                      uint32_t lod, uint32_t coffset, uint32_t offset, uint32_t bias, uint32_t comp,
-                                      uint32_t sample, bool *p_forward)
+string CompilerGLSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool, bool is_proj,
+                                      uint32_t coord, uint32_t coord_components, uint32_t dref, uint32_t grad_x,
+                                      uint32_t grad_y, uint32_t lod, uint32_t coffset, uint32_t offset, uint32_t bias,
+                                      uint32_t comp, uint32_t sample, bool *p_forward)
 {
 	string farg_str = to_expression(img);
 
+	if (is_fetch)
+	{
+		auto *var = maybe_get_backing_variable(img);
+
+		// If we are fetching from a plain OpTypeImage, we must combine with a dummy sampler.
+		if (var)
+		{
+			auto &type = get<SPIRType>(var->basetype);
+			if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer)
+			{
+				if (!dummy_sampler_id)
+					SPIRV_CROSS_THROW(
+					    "Cannot find dummy sampler ID. Was build_dummy_sampler_for_combined_images() called?");
+
+				if (options.vulkan_semantics)
+				{
+					auto sampled_type = imgtype;
+					sampled_type.basetype = SPIRType::SampledImage;
+					farg_str = join(type_to_glsl(sampled_type), "(", to_expression(img), ", ",
+					                to_expression(dummy_sampler_id), ")");
+				}
+				else
+					farg_str = to_combined_image_sampler(img, dummy_sampler_id);
+			}
+		}
+	}
+
 	bool swizz_func = backend.swizzle_is_function;
 	auto swizzle = [swizz_func](uint32_t comps, uint32_t in_comps) -> const char * {
 		if (comps == in_comps)
@@ -4289,7 +4501,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
 
 	bool access_chain_is_arrayed = false;
 	bool row_major_matrix_needs_conversion = is_non_native_row_major_matrix(base);
-	bool vector_is_packed = false;
+	bool is_packed = false;
 	bool pending_array_enclose = false;
 	bool dimension_flatten = false;
 
@@ -4421,7 +4633,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
 				}
 			}
 
-			vector_is_packed = member_is_packed_type(*type, index);
+			is_packed = member_is_packed_type(*type, index);
 			row_major_matrix_needs_conversion = member_is_non_native_row_major_matrix(*type, index);
 			type = &get<SPIRType>(type->member_types[index]);
 		}
@@ -4430,8 +4642,9 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
 		{
 			if (row_major_matrix_needs_conversion)
 			{
-				expr = convert_row_major_matrix(expr, *type);
+				expr = convert_row_major_matrix(expr, *type, is_packed);
 				row_major_matrix_needs_conversion = false;
+				is_packed = false;
 			}
 
 			expr += "[";
@@ -4447,10 +4660,10 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
 		// Vector -> Scalar
 		else if (type->vecsize > 1)
 		{
-			if (vector_is_packed)
+			if (is_packed)
 			{
 				expr = unpack_expression_type(expr, *type);
-				vector_is_packed = false;
+				is_packed = false;
 			}
 
 			if (index_is_literal)
@@ -4474,7 +4687,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
 			type_id = type->parent_type;
 			type = &get<SPIRType>(type_id);
 		}
-		else
+		else if (!backend.allow_truncated_access_chain)
 			SPIRV_CROSS_THROW("Cannot subdivide a scalar value!");
 	}
 
@@ -4489,7 +4702,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
 		*need_transpose = row_major_matrix_needs_conversion;
 
 	if (result_is_packed)
-		*result_is_packed = vector_is_packed;
+		*result_is_packed = is_packed;
 
 	return expr;
 }
@@ -4620,7 +4833,7 @@ std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uin
 
 		// Cannot forward transpositions, so resolve them here.
 		if (need_transpose)
-			expr += convert_row_major_matrix(tmp, member_type);
+			expr += convert_row_major_matrix(tmp, member_type, false);
 		else
 			expr += tmp;
 	}
@@ -5225,13 +5438,13 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 
 #define BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op)
 #define BOP_CAST(op, type) \
-	emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode))
+	emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, glsl_opcode_is_sign_invariant(opcode))
 #define UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op)
 #define QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op)
 #define TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op)
 #define BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
 #define BFOP_CAST(op, type) \
-	emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode))
+	emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, glsl_opcode_is_sign_invariant(opcode))
 #define BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
 #define UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op)
 
@@ -5358,9 +5571,11 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		length -= 3;
 
 		auto &callee = get<SPIRFunction>(func);
+		auto &return_type = get<SPIRType>(callee.return_type);
 		bool pure = function_is_pure(callee);
 
 		bool callee_has_out_variables = false;
+		bool emit_return_value_as_argument = false;
 
 		// Invalidate out variables passed to functions since they can be OpStore'd to.
 		for (uint32_t i = 0; i < length; i++)
@@ -5374,12 +5589,25 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 			flush_variable_declaration(arg[i]);
 		}
 
+		if (!return_type.array.empty() && !backend.can_return_array)
+		{
+			callee_has_out_variables = true;
+			emit_return_value_as_argument = true;
+		}
+
 		if (!pure)
 			register_impure_function_call();
 
 		string funexpr;
 		vector<string> arglist;
 		funexpr += to_name(func) + "(";
+
+		if (emit_return_value_as_argument)
+		{
+			statement(type_to_glsl(return_type), " ", to_name(id), type_to_array_glsl(return_type), ";");
+			arglist.push_back(to_name(id));
+		}
+
 		for (uint32_t i = 0; i < length; i++)
 		{
 			// Do not pass in separate images or samplers if we're remapping
@@ -5414,7 +5642,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		// Check for function call constraints.
 		check_function_call_constraints(arg, length);
 
-		if (get<SPIRType>(result_type).basetype != SPIRType::Void)
+		if (return_type.basetype != SPIRType::Void)
 		{
 			// If the function actually writes to an out variable,
 			// take the conservative route and do not forward.
@@ -5426,7 +5654,13 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 			bool forward = args_will_forward(id, arg, length, pure) && !callee_has_out_variables && pure &&
 			               (forced_temporaries.find(id) == end(forced_temporaries));
 
-			emit_op(result_type, id, funexpr, forward);
+			if (emit_return_value_as_argument)
+			{
+				statement(funexpr, ";");
+				set<SPIRExpression>(id, to_name(id), result_type, true);
+			}
+			else
+				emit_op(result_type, id, funexpr, forward);
 
 			// Function calls are implicit loads from all variables in question.
 			// Set dependencies for them.
@@ -5875,6 +6109,28 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		BFOP(mod);
 		break;
 
+	case OpFRem:
+	{
+		if (is_legacy())
+			SPIRV_CROSS_THROW("OpFRem requires trunc() and is only supported on non-legacy targets. A workaround is "
+			                  "needed for legacy.");
+
+		uint32_t result_type = ops[0];
+		uint32_t result_id = ops[1];
+		uint32_t op0 = ops[2];
+		uint32_t op1 = ops[3];
+
+		// Needs special handling.
+		bool forward = should_forward(op0) && should_forward(op1);
+		auto expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "trunc(",
+		                 to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")");
+
+		emit_op(result_type, result_id, expr, forward);
+		inherit_expression_dependencies(result_id, op0);
+		inherit_expression_dependencies(result_id, op1);
+		break;
+	}
+
 	// Relational
 	case OpAny:
 		UFOP(any);
@@ -7050,7 +7306,7 @@ bool CompilerGLSL::member_is_packed_type(const SPIRType &type, uint32_t index) c
 // row_major matrix result of the expression to a column_major matrix.
 // Base implementation uses the standard library transpose() function.
 // Subclasses may override to use a different function.
-string CompilerGLSL::convert_row_major_matrix(string exp_str, const SPIRType & /*exp_type*/)
+string CompilerGLSL::convert_row_major_matrix(string exp_str, const SPIRType & /*exp_type*/, bool /*is_packed*/)
 {
 	strip_enclosed_expression(exp_str);
 	return join("transpose(", exp_str, ")");
@@ -7066,7 +7322,7 @@ string CompilerGLSL::variable_decl(const SPIRType &type, const string &name, uin
 // Emit a structure member. Subclasses may override to modify output,
 // or to dynamically add a padding member if needed.
 void CompilerGLSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
-                                      const string &qualifier)
+                                      const string &qualifier, uint32_t)
 {
 	auto &membertype = get<SPIRType>(member_type_id);
 
@@ -7243,10 +7499,18 @@ uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type, uint32_t inde
 {
 	assert(type.array.size() == type.array_size_literal.size());
 
-	if (!type.array_size_literal[index])
-		SPIRV_CROSS_THROW("The array size is not a literal, but a specialization constant or spec constant op.");
-
-	return type.array[index];
+	if (type.array_size_literal[index])
+	{
+		return type.array[index];
+	}
+	else
+	{
+		// Use the default spec constant value.
+		// This is the best we can do.
+		uint32_t array_size_id = type.array[index];
+		uint32_t array_size = get<SPIRConstant>(array_size_id).scalar();
+		return array_size;
+	}
 }
 
 string CompilerGLSL::to_array_size(const SPIRType &type, uint32_t index)
@@ -7389,7 +7653,9 @@ string CompilerGLSL::image_type_glsl(const SPIRType &type, uint32_t /* id */)
 			require_extension("GL_EXT_texture_array");
 		res += "Array";
 	}
-	if (type.image.depth)
+
+	// "Shadow" state in GLSL only exists for samplers and combined image samplers.
+	if (((type.basetype == SPIRType::SampledImage) || (type.basetype == SPIRType::Sampler)) && type.image.depth)
 		res += "Shadow";
 
 	return res;
@@ -7575,7 +7841,7 @@ bool CompilerGLSL::has_extension(const std::string &ext) const
 
 void CompilerGLSL::require_extension(const string &ext)
 {
-	if (!has_extension(ext))
+	if (backend.supports_extensions && !has_extension(ext))
 	{
 		forced_extensions.push_back(ext);
 		force_recompile = true;
@@ -7626,8 +7892,56 @@ bool CompilerGLSL::check_atomic_image(uint32_t id)
 		return false;
 }
 
+void CompilerGLSL::add_function_overload(const SPIRFunction &func)
+{
+	Hasher hasher;
+	for (auto &arg : func.arguments)
+	{
+		// Parameters can vary with pointer type or not,
+		// but that will not change the signature in GLSL/HLSL,
+		// so strip the pointer type before hashing.
+		uint32_t type_id = arg.type;
+		auto *type = &get<SPIRType>(type_id);
+		while (type->pointer)
+		{
+			type_id = type->parent_type;
+			type = &get<SPIRType>(type_id);
+		}
+		hasher.u32(type_id);
+	}
+	uint64_t types_hash = hasher.get();
+
+	auto function_name = to_name(func.self);
+	auto itr = function_overloads.find(function_name);
+	if (itr != end(function_overloads))
+	{
+		// There exists a function with this name already.
+		auto &overloads = itr->second;
+		if (overloads.count(types_hash) != 0)
+		{
+			// Overload conflict, assign a new name.
+			add_resource_name(func.self);
+			function_overloads[to_name(func.self)].insert(types_hash);
+		}
+		else
+		{
+			// Can reuse the name.
+			overloads.insert(types_hash);
+		}
+	}
+	else
+	{
+		// First time we see this function name.
+		add_resource_name(func.self);
+		function_overloads[to_name(func.self)].insert(types_hash);
+	}
+}
+
 void CompilerGLSL::emit_function_prototype(SPIRFunction &func, uint64_t return_flags)
 {
+	if (func.self != entry_point)
+		add_function_overload(func);
+
 	// Avoid shadow declarations.
 	local_variable_names = resource_names;
 
@@ -7636,6 +7950,7 @@ void CompilerGLSL::emit_function_prototype(SPIRFunction &func, uint64_t return_f
 	auto &type = get<SPIRType>(func.return_type);
 	decl += flags_to_precision_qualifiers_glsl(type, return_flags);
 	decl += type_to_glsl(type);
+	decl += type_to_array_glsl(type);
 	decl += " ";
 
 	if (func.self == entry_point)
@@ -7898,7 +8213,7 @@ void CompilerGLSL::branch(uint32_t from, uint32_t to)
 				// so just use "self" here.
 				loop_dominator = from;
 			}
-			else if (from_block.loop_dominator != -1u)
+			else if (from_block.loop_dominator != SPIRBlock::NoDominator)
 			{
 				loop_dominator = from_block.loop_dominator;
 			}
@@ -8299,6 +8614,9 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
 		auto flags = meta[tmp.second].decoration.decoration_flags;
 		auto &type = get<SPIRType>(tmp.first);
 		statement(flags_to_precision_qualifiers_glsl(type, flags), variable_decl(type, to_name(tmp.second)), ";");
+
+		// The temporary might be read from before it's assigned, set up the expression now.
+		set<SPIRExpression>(tmp.second, to_name(tmp.second), tmp.first, true);
 	}
 
 	SPIRBlock::ContinueBlockType continue_type = SPIRBlock::ContinueNone;
@@ -8444,9 +8762,26 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
 
 		if (block.return_value)
 		{
-			// OpReturnValue can return Undef, so don't emit anything for this case.
-			if (ids.at(block.return_value).get_type() != TypeUndef)
-				statement("return ", to_expression(block.return_value), ";");
+			auto &type = expression_type(block.return_value);
+			if (!type.array.empty() && !backend.can_return_array)
+			{
+				// If we cannot return arrays, we will have a special out argument we can write to instead.
+				// The backend is responsible for setting this up, and redirection the return values as appropriate.
+				if (ids.at(block.return_value).get_type() != TypeUndef)
+					emit_array_copy("SPIRV_Cross_return_value", block.return_value);
+
+				if (!block_is_outside_flow_control_from_block(get<SPIRBlock>(current_function->entry_block), block) ||
+				    block.loop_dominator != SPIRBlock::NoDominator)
+				{
+					statement("return;");
+				}
+			}
+			else
+			{
+				// OpReturnValue can return Undef, so don't emit anything for this case.
+				if (ids.at(block.return_value).get_type() != TypeUndef)
+					statement("return ", to_expression(block.return_value), ";");
+			}
 		}
 		// If this block is the very final block and not called from control flow,
 		// we do not need an explicit return which looks out of place. Just end the function here.
@@ -8454,7 +8789,9 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
 		// but we actually need a return here ...
 		else if (!block_is_outside_flow_control_from_block(get<SPIRBlock>(current_function->entry_block), block) ||
 		         block.loop_dominator != SPIRBlock::NoDominator)
+		{
 			statement("return;");
+		}
 		break;
 
 	case SPIRBlock::Kill:
@@ -8572,3 +8909,8 @@ uint32_t CompilerGLSL::mask_relevant_memory_semantics(uint32_t semantics)
 	                    MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask |
 	                    MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask);
 }
+
+void CompilerGLSL::emit_array_copy(const string &lhs, uint32_t rhs_id)
+{
+	statement(lhs, " = ", to_expression(rhs_id), ";");
+}
diff --git a/spirv_glsl.hpp b/spirv_glsl.hpp
index 0cae70c09b..bfc8501946 100644
--- a/spirv_glsl.hpp
+++ b/spirv_glsl.hpp
@@ -18,6 +18,7 @@
 #define SPIRV_CROSS_GLSL_HPP
 
 #include "spirv_cross.hpp"
+#include <limits>
 #include <sstream>
 #include <unordered_map>
 #include <unordered_set>
@@ -202,7 +203,7 @@ protected:
 	virtual std::string type_to_glsl(const SPIRType &type, uint32_t id = 0);
 	virtual std::string builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClass storage);
 	virtual void emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
-	                                const std::string &qualifier = "");
+	                                const std::string &qualifier = "", uint32_t base_offset = 0);
 	virtual std::string image_type_glsl(const SPIRType &type, uint32_t id = 0);
 	virtual std::string constant_expression(const SPIRConstant &c);
 	std::string constant_op_expression(const SPIRConstantOp &cop);
@@ -294,14 +295,16 @@ protected:
 	void add_local_variable_name(uint32_t id);
 	void add_resource_name(uint32_t id);
 	void add_member_name(SPIRType &type, uint32_t name);
+	void add_function_overload(const SPIRFunction &func);
 
 	virtual bool is_non_native_row_major_matrix(uint32_t id);
 	virtual bool member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index);
 	bool member_is_packed_type(const SPIRType &type, uint32_t index) const;
-	virtual std::string convert_row_major_matrix(std::string exp_str, const SPIRType &exp_type);
+	virtual std::string convert_row_major_matrix(std::string exp_str, const SPIRType &exp_type, bool is_packed);
 
 	std::unordered_set<std::string> local_variable_names;
 	std::unordered_set<std::string> resource_names;
+	std::unordered_map<std::string, std::unordered_set<uint64_t>> function_overloads;
 
 	bool processing_entry_point = false;
 
@@ -330,6 +333,9 @@ protected:
 		bool allow_precision_qualifiers = false;
 		bool can_swizzle_scalar = false;
 		bool force_gl_in_out_block = false;
+		bool can_return_array = true;
+		bool allow_truncated_access_chain = false;
+		bool supports_extensions = false;
 	} backend;
 
 	void emit_struct(SPIRType &type);
@@ -423,8 +429,10 @@ protected:
 	std::string layout_for_variable(const SPIRVariable &variable);
 	std::string to_combined_image_sampler(uint32_t image_id, uint32_t samp_id);
 	virtual bool skip_argument(uint32_t id) const;
+	virtual void emit_array_copy(const std::string &lhs, uint32_t rhs_id);
 
-	bool buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing);
+	bool buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing, uint32_t start_offset = 0,
+	                                uint32_t end_offset = std::numeric_limits<uint32_t>::max());
 	uint32_t type_to_packed_base_size(const SPIRType &type, BufferPackingStandard packing);
 	uint32_t type_to_packed_alignment(const SPIRType &type, uint64_t flags, BufferPackingStandard packing);
 	uint32_t type_to_packed_array_stride(const SPIRType &type, uint64_t flags, BufferPackingStandard packing);
@@ -520,6 +528,9 @@ protected:
 	const Instruction *get_next_instruction_in_block(const Instruction &instr);
 	static uint32_t mask_relevant_memory_semantics(uint32_t semantics);
 
+	std::string convert_float_to_string(const SPIRConstant &value, uint32_t col, uint32_t row);
+	std::string convert_double_to_string(const SPIRConstant &value, uint32_t col, uint32_t row);
+
 private:
 	void init()
 	{
diff --git a/spirv_hlsl.cpp b/spirv_hlsl.cpp
index 3f2227cfea..a56865e78d 100644
--- a/spirv_hlsl.cpp
+++ b/spirv_hlsl.cpp
@@ -204,7 +204,7 @@ static string image_format_to_type(ImageFormat fmt, SPIRType::BaseType basetype)
 }
 
 // Returns true if an arithmetic operation does not change behavior depending on signedness.
-static bool opcode_is_sign_invariant(Op opcode)
+static bool hlsl_opcode_is_sign_invariant(Op opcode)
 {
 	switch (opcode)
 	{
@@ -260,8 +260,9 @@ string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type)
 		else
 			SPIRV_CROSS_THROW("Sampler buffers must be either sampled or unsampled. Cannot deduce in runtime.");
 	case DimSubpassData:
-		// This should be implemented same way as desktop GL. Fetch on a 2D texture based on int2(SV_Position).
-		SPIRV_CROSS_THROW("Subpass data support is not yet implemented for HLSL"); // TODO
+		dim = "2D";
+		typed_load = false;
+		break;
 	default:
 		SPIRV_CROSS_THROW("Invalid dimension.");
 	}
@@ -508,6 +509,38 @@ void CompilerHLSL::emit_builtin_outputs_in_struct()
 			semantic = legacy ? "DEPTH" : "SV_Depth";
 			break;
 
+		case BuiltInClipDistance:
+			// HLSL is a bit weird here, use SV_ClipDistance0, SV_ClipDistance1 and so on with vectors.
+			for (uint32_t clip = 0; clip < clip_distance_count; clip += 4)
+			{
+				uint32_t to_declare = clip_distance_count - clip;
+				if (to_declare > 4)
+					to_declare = 4;
+
+				uint32_t semantic_index = clip / 4;
+
+				static const char *types[] = { "float", "float2", "float3", "float4" };
+				statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassOutput), semantic_index,
+				          " : SV_ClipDistance", semantic_index, ";");
+			}
+			break;
+
+		case BuiltInCullDistance:
+			// HLSL is a bit weird here, use SV_CullDistance0, SV_CullDistance1 and so on with vectors.
+			for (uint32_t cull = 0; cull < cull_distance_count; cull += 4)
+			{
+				uint32_t to_declare = cull_distance_count - cull;
+				if (to_declare > 4)
+					to_declare = 4;
+
+				uint32_t semantic_index = cull / 4;
+
+				static const char *types[] = { "float", "float2", "float3", "float4" };
+				statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassOutput), semantic_index,
+				          " : SV_CullDistance", semantic_index, ";");
+			}
+			break;
+
 		case BuiltInPointSize:
 			// If point_size_compat is enabled, just ignore PointSize.
 			// PointSize does not exist in HLSL, but some code bases might want to be able to use these shaders,
@@ -588,6 +621,54 @@ void CompilerHLSL::emit_builtin_inputs_in_struct()
 			semantic = "SV_GroupID";
 			break;
 
+		case BuiltInFrontFacing:
+			type = "bool";
+			semantic = "SV_IsFrontFace";
+			break;
+
+		case BuiltInNumWorkgroups:
+			// Handled specially.
+			break;
+
+		case BuiltInClipDistance:
+			// HLSL is a bit weird here, use SV_ClipDistance0, SV_ClipDistance1 and so on with vectors.
+			for (uint32_t clip = 0; clip < clip_distance_count; clip += 4)
+			{
+				uint32_t to_declare = clip_distance_count - clip;
+				if (to_declare > 4)
+					to_declare = 4;
+
+				uint32_t semantic_index = clip / 4;
+
+				static const char *types[] = { "float", "float2", "float3", "float4" };
+				statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassInput), semantic_index,
+				          " : SV_ClipDistance", semantic_index, ";");
+			}
+			break;
+
+		case BuiltInCullDistance:
+			// HLSL is a bit weird here, use SV_CullDistance0, SV_CullDistance1 and so on with vectors.
+			for (uint32_t cull = 0; cull < cull_distance_count; cull += 4)
+			{
+				uint32_t to_declare = cull_distance_count - cull;
+				if (to_declare > 4)
+					to_declare = 4;
+
+				uint32_t semantic_index = cull / 4;
+
+				static const char *types[] = { "float", "float2", "float3", "float4" };
+				statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassInput), semantic_index,
+				          " : SV_CullDistance", semantic_index, ";");
+			}
+			break;
+
+		case BuiltInPointCoord:
+			// PointCoord is not supported, but provide a way to just ignore that, similar to PointSize.
+			if (options.point_coord_compat)
+				break;
+			else
+				SPIRV_CROSS_THROW("Unsupported builtin in HLSL.");
+
 		default:
 			SPIRV_CROSS_THROW("Unsupported builtin in HLSL.");
 			break;
@@ -772,6 +853,19 @@ std::string CompilerHLSL::builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClas
 		return "gl_VertexID";
 	case BuiltInInstanceId:
 		return "gl_InstanceID";
+	case BuiltInNumWorkgroups:
+	{
+		if (!num_workgroups_builtin)
+			SPIRV_CROSS_THROW("NumWorkgroups builtin is used, but remap_num_workgroups_builtin() was not called. "
+			                  "Cannot emit code for this builtin.");
+
+		auto &var = get<SPIRVariable>(num_workgroups_builtin);
+		auto &type = get<SPIRType>(var.basetype);
+		return sanitize_underscores(join(to_name(num_workgroups_builtin), "_", get_member_name(type.self, 0)));
+	}
+	case BuiltInPointCoord:
+		// Crude hack, but there is no real alternative. This path is only enabled if point_coord_compat is set.
+		return "float2(0.5f, 0.5f)";
 	default:
 		return CompilerGLSL::builtin_to_glsl(builtin, storage);
 	}
@@ -787,6 +881,7 @@ void CompilerHLSL::emit_builtin_variables()
 
 		const char *type = nullptr;
 		auto builtin = static_cast<BuiltIn>(i);
+		uint32_t array_size = 0;
 
 		switch (builtin)
 		{
@@ -827,6 +922,25 @@ void CompilerHLSL::emit_builtin_variables()
 			type = "uint";
 			break;
 
+		case BuiltInFrontFacing:
+			type = "bool";
+			break;
+
+		case BuiltInNumWorkgroups:
+		case BuiltInPointCoord:
+			// Handled specially.
+			break;
+
+		case BuiltInClipDistance:
+			array_size = clip_distance_count;
+			type = "float";
+			break;
+
+		case BuiltInCullDistance:
+			array_size = cull_distance_count;
+			type = "float";
+			break;
+
 		default:
 			SPIRV_CROSS_THROW(join("Unsupported builtin in HLSL: ", unsigned(builtin)));
 			break;
@@ -837,7 +951,12 @@ void CompilerHLSL::emit_builtin_variables()
 		// need to distinguish that when we add support for that.
 
 		if (type)
-			statement("static ", type, " ", builtin_to_glsl(builtin, storage), ";");
+		{
+			if (array_size)
+				statement("static ", type, " ", builtin_to_glsl(builtin, storage), "[", array_size, "];");
+			else
+				statement("static ", type, " ", builtin_to_glsl(builtin, storage), ";");
+		}
 	}
 }
 
@@ -1106,7 +1225,8 @@ void CompilerHLSL::emit_resources()
 		return name1.compare(name2) < 0;
 	};
 
-	if (!input_variables.empty() || active_input_builtins)
+	static const uint64_t implicit_builtins = (1ull << BuiltInNumWorkgroups) | (1ull << BuiltInPointCoord);
+	if (!input_variables.empty() || (active_input_builtins & ~implicit_builtins))
 	{
 		require_input = true;
 		statement("struct SPIRV_Cross_Input");
@@ -1419,6 +1539,159 @@ void CompilerHLSL::emit_resources()
 			statement("");
 		}
 	}
+
+	if (requires_inverse_2x2)
+	{
+		statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical");
+		statement("// adjoint and dividing by the determinant. The contents of the matrix are changed.");
+		statement("float2x2 SPIRV_Cross_Inverse(float2x2 m)");
+		begin_scope();
+		statement("float2x2 adj;	// The adjoint matrix (inverse after dividing by determinant)");
+		statement_no_indent("");
+		statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix.");
+		statement("adj[0][0] =  m[1][1];");
+		statement("adj[0][1] = -m[0][1];");
+		statement_no_indent("");
+		statement("adj[1][0] = -m[1][0];");
+		statement("adj[1][1] =  m[0][0];");
+		statement_no_indent("");
+		statement("// Calculate the determinant as a combination of the cofactors of the first row.");
+		statement("float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]);");
+		statement_no_indent("");
+		statement("// Divide the classical adjoint matrix by the determinant.");
+		statement("// If determinant is zero, matrix is not invertable, so leave it unchanged.");
+		statement("return (det != 0.0f) ? (adj * (1.0f / det)) : m;");
+		end_scope();
+		statement("");
+	}
+
+	if (requires_inverse_3x3)
+	{
+		statement("// Returns the determinant of a 2x2 matrix.");
+		statement("float SPIRV_Cross_Det2x2(float a1, float a2, float b1, float b2)");
+		begin_scope();
+		statement("return a1 * b2 - b1 * a2;");
+		end_scope();
+		statement_no_indent("");
+		statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical");
+		statement("// adjoint and dividing by the determinant. The contents of the matrix are changed.");
+		statement("float3x3 SPIRV_Cross_Inverse(float3x3 m)");
+		begin_scope();
+		statement("float3x3 adj;	// The adjoint matrix (inverse after dividing by determinant)");
+		statement_no_indent("");
+		statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix.");
+		statement("adj[0][0] =  SPIRV_Cross_Det2x2(m[1][1], m[1][2], m[2][1], m[2][2]);");
+		statement("adj[0][1] = -SPIRV_Cross_Det2x2(m[0][1], m[0][2], m[2][1], m[2][2]);");
+		statement("adj[0][2] =  SPIRV_Cross_Det2x2(m[0][1], m[0][2], m[1][1], m[1][2]);");
+		statement_no_indent("");
+		statement("adj[1][0] = -SPIRV_Cross_Det2x2(m[1][0], m[1][2], m[2][0], m[2][2]);");
+		statement("adj[1][1] =  SPIRV_Cross_Det2x2(m[0][0], m[0][2], m[2][0], m[2][2]);");
+		statement("adj[1][2] = -SPIRV_Cross_Det2x2(m[0][0], m[0][2], m[1][0], m[1][2]);");
+		statement_no_indent("");
+		statement("adj[2][0] =  SPIRV_Cross_Det2x2(m[1][0], m[1][1], m[2][0], m[2][1]);");
+		statement("adj[2][1] = -SPIRV_Cross_Det2x2(m[0][0], m[0][1], m[2][0], m[2][1]);");
+		statement("adj[2][2] =  SPIRV_Cross_Det2x2(m[0][0], m[0][1], m[1][0], m[1][1]);");
+		statement_no_indent("");
+		statement("// Calculate the determinant as a combination of the cofactors of the first row.");
+		statement("float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]);");
+		statement_no_indent("");
+		statement("// Divide the classical adjoint matrix by the determinant.");
+		statement("// If determinant is zero, matrix is not invertable, so leave it unchanged.");
+		statement("return (det != 0.0f) ? (adj * (1.0f / det)) : m;");
+		end_scope();
+		statement("");
+	}
+
+	if (requires_inverse_4x4)
+	{
+		if (!requires_inverse_3x3)
+		{
+			statement("// Returns the determinant of a 2x2 matrix.");
+			statement("float SPIRV_Cross_Det2x2(float a1, float a2, float b1, float b2)");
+			begin_scope();
+			statement("return a1 * b2 - b1 * a2;");
+			end_scope();
+			statement("");
+		}
+
+		statement("// Returns the determinant of a 3x3 matrix.");
+		statement("float SPIRV_Cross_Det3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, "
+		          "float c2, float c3)");
+		begin_scope();
+		statement("return a1 * SPIRV_Cross_Det2x2(b2, b3, c2, c3) - b1 * SPIRV_Cross_Det2x2(a2, a3, c2, c3) + c1 * "
+		          "SPIRV_Cross_Det2x2(a2, a3, "
+		          "b2, b3);");
+		end_scope();
+		statement_no_indent("");
+		statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical");
+		statement("// adjoint and dividing by the determinant. The contents of the matrix are changed.");
+		statement("float4x4 SPIRV_Cross_Inverse(float4x4 m)");
+		begin_scope();
+		statement("float4x4 adj;	// The adjoint matrix (inverse after dividing by determinant)");
+		statement_no_indent("");
+		statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix.");
+		statement(
+		    "adj[0][0] =  SPIRV_Cross_Det3x3(m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], "
+		    "m[3][3]);");
+		statement(
+		    "adj[0][1] = -SPIRV_Cross_Det3x3(m[0][1], m[0][2], m[0][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], "
+		    "m[3][3]);");
+		statement(
+		    "adj[0][2] =  SPIRV_Cross_Det3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[3][1], m[3][2], "
+		    "m[3][3]);");
+		statement(
+		    "adj[0][3] = -SPIRV_Cross_Det3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], "
+		    "m[2][3]);");
+		statement_no_indent("");
+		statement(
+		    "adj[1][0] = -SPIRV_Cross_Det3x3(m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], "
+		    "m[3][3]);");
+		statement(
+		    "adj[1][1] =  SPIRV_Cross_Det3x3(m[0][0], m[0][2], m[0][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], "
+		    "m[3][3]);");
+		statement(
+		    "adj[1][2] = -SPIRV_Cross_Det3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[3][0], m[3][2], "
+		    "m[3][3]);");
+		statement(
+		    "adj[1][3] =  SPIRV_Cross_Det3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], "
+		    "m[2][3]);");
+		statement_no_indent("");
+		statement(
+		    "adj[2][0] =  SPIRV_Cross_Det3x3(m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], "
+		    "m[3][3]);");
+		statement(
+		    "adj[2][1] = -SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], "
+		    "m[3][3]);");
+		statement(
+		    "adj[2][2] =  SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[3][0], m[3][1], "
+		    "m[3][3]);");
+		statement(
+		    "adj[2][3] = -SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], "
+		    "m[2][3]);");
+		statement_no_indent("");
+		statement(
+		    "adj[3][0] = -SPIRV_Cross_Det3x3(m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], "
+		    "m[3][2]);");
+		statement(
+		    "adj[3][1] =  SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], "
+		    "m[3][2]);");
+		statement(
+		    "adj[3][2] = -SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[3][0], m[3][1], "
+		    "m[3][2]);");
+		statement(
+		    "adj[3][3] =  SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], "
+		    "m[2][2]);");
+		statement_no_indent("");
+		statement("// Calculate the determinant as a combination of the cofactors of the first row.");
+		statement("float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]) + (adj[0][3] "
+		          "* m[3][0]);");
+		statement_no_indent("");
+		statement("// Divide the classical adjoint matrix by the determinant.");
+		statement("// If determinant is zero, matrix is not invertable, so leave it unchanged.");
+		statement("return (det != 0.0f) ? (adj * (1.0f / det)) : m;");
+		end_scope();
+		statement("");
+	}
 }
 
 string CompilerHLSL::layout_for_member(const SPIRType &type, uint32_t index)
@@ -1442,7 +1715,7 @@ string CompilerHLSL::layout_for_member(const SPIRType &type, uint32_t index)
 }
 
 void CompilerHLSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
-                                      const string &qualifier)
+                                      const string &qualifier, uint32_t base_offset)
 {
 	auto &membertype = get<SPIRType>(member_type_id);
 
@@ -1458,9 +1731,12 @@ void CompilerHLSL::emit_struct_member(const SPIRType &type, uint32_t member_type
 		qualifiers = to_interpolation_qualifiers(memberflags);
 
 	string packing_offset;
-	if (has_decoration(type.self, DecorationCPacked) && has_member_decoration(type.self, index, DecorationOffset))
+	bool is_push_constant = type.storage == StorageClassPushConstant;
+
+	if ((has_decoration(type.self, DecorationCPacked) || is_push_constant) &&
+	    has_member_decoration(type.self, index, DecorationOffset))
 	{
-		uint32_t offset = memb[index].offset;
+		uint32_t offset = memb[index].offset - base_offset;
 		if (offset & 3)
 			SPIRV_CROSS_THROW("Cannot pack on tighter bounds than 4 bytes in HLSL.");
 
@@ -1540,7 +1816,58 @@ void CompilerHLSL::emit_buffer_block(const SPIRVariable &var)
 
 void CompilerHLSL::emit_push_constant_block(const SPIRVariable &var)
 {
-	emit_buffer_block(var);
+	if (root_constants_layout.empty())
+	{
+		emit_buffer_block(var);
+	}
+	else
+	{
+		for (const auto &layout : root_constants_layout)
+		{
+			auto &type = get<SPIRType>(var.basetype);
+
+			if (buffer_is_packing_standard(type, BufferPackingHLSLCbufferPackOffset, layout.start, layout.end))
+				set_decoration(type.self, DecorationCPacked);
+			else
+				SPIRV_CROSS_THROW(
+				    "root constant cbuffer cannot be expressed with either HLSL packing layout or packoffset.");
+
+			flattened_structs.insert(var.self);
+			type.member_name_cache.clear();
+			add_resource_name(var.self);
+			auto &memb = meta[type.self].members;
+
+			statement("cbuffer SPIRV_CROSS_RootConstant_", to_name(var.self),
+			          to_resource_register('b', layout.binding, layout.space));
+			begin_scope();
+
+			// Index of the next field in the generated root constant constant buffer
+			auto constant_index = 0u;
+
+			// Iterate over all member of the push constant and check which of the fields
+			// fit into the given root constant layout.
+			for (auto i = 0u; i < memb.size(); i++)
+			{
+				const auto offset = memb[i].offset;
+				if (layout.start <= offset && offset < layout.end)
+				{
+					const auto &member = type.member_types[i];
+
+					add_member_name(type, constant_index);
+					auto backup_name = get_member_name(type.self, i);
+					auto member_name = to_member_name(type, i);
+					set_member_name(type.self, constant_index,
+					                sanitize_underscores(join(to_name(type.self), "_", member_name)));
+					emit_struct_member(type, member, i, "", layout.start);
+					set_member_name(type.self, constant_index, backup_name);
+
+					constant_index++;
+				}
+			}
+
+			end_scope_decl();
+		}
+	}
 }
 
 string CompilerHLSL::to_sampler_expression(uint32_t id)
@@ -1584,6 +1911,9 @@ string CompilerHLSL::to_func_call_arg(uint32_t id)
 
 void CompilerHLSL::emit_function_prototype(SPIRFunction &func, uint64_t return_flags)
 {
+	if (func.self != entry_point)
+		add_function_overload(func);
+
 	auto &execution = get_entry_point();
 	// Avoid shadow declarations.
 	local_variable_names = resource_names;
@@ -1591,9 +1921,17 @@ void CompilerHLSL::emit_function_prototype(SPIRFunction &func, uint64_t return_f
 	string decl;
 
 	auto &type = get<SPIRType>(func.return_type);
-	decl += flags_to_precision_qualifiers_glsl(type, return_flags);
-	decl += type_to_glsl(type);
-	decl += " ";
+	if (type.array.empty())
+	{
+		decl += flags_to_precision_qualifiers_glsl(type, return_flags);
+		decl += type_to_glsl(type);
+		decl += " ";
+	}
+	else
+	{
+		// We cannot return arrays in HLSL, so "return" through an out variable.
+		decl = "void ";
+	}
 
 	if (func.self == entry_point)
 	{
@@ -1611,6 +1949,19 @@ void CompilerHLSL::emit_function_prototype(SPIRFunction &func, uint64_t return_f
 		decl += to_name(func.self);
 
 	decl += "(";
+
+	if (!type.array.empty())
+	{
+		// Fake array returns by writing to an out array instead.
+		decl += "out ";
+		decl += type_to_glsl(type);
+		decl += " ";
+		decl += "SPIRV_Cross_return_value";
+		decl += type_to_array_glsl(type);
+		if (!func.arguments.empty())
+			decl += ", ";
+	}
+
 	for (auto &arg : func.arguments)
 	{
 		// Might change the variable name if it already exists in this function.
@@ -1739,6 +2090,22 @@ void CompilerHLSL::emit_hlsl_entry_point()
 			statement(builtin, " = int(stage_input.", builtin, ");");
 			break;
 
+		case BuiltInNumWorkgroups:
+		case BuiltInPointCoord:
+			break;
+
+		case BuiltInClipDistance:
+			for (uint32_t clip = 0; clip < clip_distance_count; clip++)
+				statement("gl_ClipDistance[", clip, "] = stage_input.gl_ClipDistance", clip / 4, ".", "xyzw"[clip & 3],
+				          ";");
+			break;
+
+		case BuiltInCullDistance:
+			for (uint32_t cull = 0; cull < cull_distance_count; cull++)
+				statement("gl_CullDistance[", cull, "] = stage_input.gl_CullDistance", cull / 4, ".", "xyzw"[cull & 3],
+				          ";");
+			break;
+
 		default:
 			statement(builtin, " = stage_input.", builtin, ";");
 			break;
@@ -1831,8 +2198,27 @@ void CompilerHLSL::emit_hlsl_entry_point()
 			if (i == BuiltInPointSize)
 				continue;
 
-			auto builtin = builtin_to_glsl(static_cast<BuiltIn>(i), StorageClassOutput);
-			statement("stage_output.", builtin, " = ", builtin, ";");
+			switch (static_cast<BuiltIn>(i))
+			{
+			case BuiltInClipDistance:
+				for (uint32_t clip = 0; clip < clip_distance_count; clip++)
+					statement("stage_output.gl_ClipDistance", clip / 4, ".", "xyzw"[clip & 3], " = gl_ClipDistance[",
+					          clip, "];");
+				break;
+
+			case BuiltInCullDistance:
+				for (uint32_t cull = 0; cull < cull_distance_count; cull++)
+					statement("stage_output.gl_CullDistance", cull / 4, ".", "xyzw"[cull & 3], " = gl_CullDistance[",
+					          cull, "];");
+				break;
+
+			default:
+			{
+				auto builtin_expr = builtin_to_glsl(static_cast<BuiltIn>(i), StorageClassOutput);
+				statement("stage_output.", builtin_expr, " = ", builtin_expr, ";");
+				break;
+			}
+			}
 		}
 
 		for (auto &id : ids)
@@ -2295,24 +2681,24 @@ string CompilerHLSL::to_resource_binding(const SPIRVariable &var)
 	if (!has_decoration(var.self, DecorationBinding))
 		return "";
 
-	auto &type = get<SPIRType>(var.basetype);
-	const char *space = nullptr;
+	const auto &type = get<SPIRType>(var.basetype);
+	char space = '\0';
 
 	switch (type.basetype)
 	{
 	case SPIRType::SampledImage:
-		space = "t"; // SRV
+		space = 't'; // SRV
 		break;
 
 	case SPIRType::Image:
-		if (type.image.sampled == 2)
-			space = "u"; // UAV
+		if (type.image.sampled == 2 && type.image.dim != DimSubpassData)
+			space = 'u'; // UAV
 		else
-			space = "t"; // SRV
+			space = 't'; // SRV
 		break;
 
 	case SPIRType::Sampler:
-		space = "s";
+		space = 's';
 		break;
 
 	case SPIRType::Struct:
@@ -2324,15 +2710,15 @@ string CompilerHLSL::to_resource_binding(const SPIRVariable &var)
 			{
 				uint64_t flags = get_buffer_block_flags(var);
 				bool is_readonly = (flags & (1ull << DecorationNonWritable)) != 0;
-				space = is_readonly ? "t" : "u"; // UAV
+				space = is_readonly ? 't' : 'u'; // UAV
 			}
 			else if (has_decoration(type.self, DecorationBlock))
-				space = "b"; // Constant buffers
+				space = 'b'; // Constant buffers
 		}
 		else if (storage == StorageClassPushConstant)
-			space = "b"; // Constant buffers
+			space = 'b'; // Constant buffers
 		else if (storage == StorageClassStorageBuffer)
-			space = "u"; // UAV
+			space = 'u'; // UAV
 
 		break;
 	}
@@ -2343,12 +2729,8 @@ string CompilerHLSL::to_resource_binding(const SPIRVariable &var)
 	if (!space)
 		return "";
 
-	// shader model 5.1 supports space
-	if (options.shader_model >= 51)
-		return join(" : register(", space, get_decoration(var.self, DecorationBinding), ", space",
-		            get_decoration(var.self, DecorationDescriptorSet), ")");
-	else
-		return join(" : register(", space, get_decoration(var.self, DecorationBinding), ")");
+	return to_resource_register(space, get_decoration(var.self, DecorationBinding),
+	                            get_decoration(var.self, DecorationDescriptorSet));
 }
 
 string CompilerHLSL::to_resource_binding_sampler(const SPIRVariable &var)
@@ -2357,11 +2739,16 @@ string CompilerHLSL::to_resource_binding_sampler(const SPIRVariable &var)
 	if (!has_decoration(var.self, DecorationBinding))
 		return "";
 
+	return to_resource_register('s', get_decoration(var.self, DecorationBinding),
+	                            get_decoration(var.self, DecorationDescriptorSet));
+}
+
+string CompilerHLSL::to_resource_register(char space, uint32_t binding, uint32_t space_set)
+{
 	if (options.shader_model >= 51)
-		return join(" : register(s", get_decoration(var.self, DecorationBinding), ", space",
-		            get_decoration(var.self, DecorationDescriptorSet), ")");
+		return join(" : register(", space, binding, ", space", space_set, ")");
 	else
-		return join(" : register(s", get_decoration(var.self, DecorationBinding), ")");
+		return join(" : register(", space, binding, ")");
 }
 
 void CompilerHLSL::emit_modern_uniform(const SPIRVariable &var)
@@ -2595,6 +2982,37 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 		emit_unary_func_op(result_type, id, args[0], "firstbithigh");
 		break;
 
+	case GLSLstd450MatrixInverse:
+	{
+		auto &type = get<SPIRType>(result_type);
+		if (type.vecsize == 2 && type.columns == 2)
+		{
+			if (!requires_inverse_2x2)
+			{
+				requires_inverse_2x2 = true;
+				force_recompile = true;
+			}
+		}
+		else if (type.vecsize == 3 && type.columns == 3)
+		{
+			if (!requires_inverse_3x3)
+			{
+				requires_inverse_3x3 = true;
+				force_recompile = true;
+			}
+		}
+		else if (type.vecsize == 4 && type.columns == 4)
+		{
+			if (!requires_inverse_4x4)
+			{
+				requires_inverse_4x4 = true;
+				force_recompile = true;
+			}
+		}
+		emit_unary_func_op(result_type, id, args[0], "SPIRV_Cross_Inverse");
+		break;
+	}
+
 	default:
 		CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
 		break;
@@ -3056,13 +3474,13 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 
 #define BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op)
 #define BOP_CAST(op, type) \
-	emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode))
+	emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, hlsl_opcode_is_sign_invariant(opcode))
 #define UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op)
 #define QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op)
 #define TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op)
 #define BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
 #define BFOP_CAST(op, type) \
-	emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode))
+	emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, hlsl_opcode_is_sign_invariant(opcode))
 #define BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
 #define UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op)
 
@@ -3116,6 +3534,10 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 		break;
 	}
 
+	case OpFRem:
+		emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], "fmod");
+		break;
+
 	case OpImage:
 	{
 		uint32_t result_type = ops[0];
@@ -3385,21 +3807,52 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 		uint32_t result_type = ops[0];
 		uint32_t id = ops[1];
 		auto *var = maybe_get_backing_variable(ops[2]);
-		auto imgexpr = join(to_expression(ops[2]), "[", to_expression(ops[3]), "]");
+		auto &type = expression_type(ops[2]);
+		bool subpass_data = type.image.dim == DimSubpassData;
+		bool pure = false;
 
-		// The underlying image type in HLSL depends on the image format, unlike GLSL, where all images are "vec4",
-		// except that the underlying type changes how the data is interpreted.
-		if (var)
-			imgexpr = remap_swizzle(get<SPIRType>(result_type),
-			                        image_format_to_components(get<SPIRType>(var->basetype).image.format), imgexpr);
+		string imgexpr;
+
+		if (subpass_data)
+		{
+			if (options.shader_model < 40)
+				SPIRV_CROSS_THROW("Subpass loads are not supported in HLSL shader model 2/3.");
+
+			// Similar to GLSL, implement subpass loads using texelFetch.
+			if (type.image.ms)
+			{
+				uint32_t operands = ops[4];
+				if (operands != ImageOperandsSampleMask || instruction.length != 6)
+					SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected operand mask was used.");
+				uint32_t sample = ops[5];
+				imgexpr = join(to_expression(ops[2]), ".Load(int2(gl_FragCoord.xy), ", to_expression(sample), ")");
+			}
+			else
+				imgexpr = join(to_expression(ops[2]), ".Load(int3(int2(gl_FragCoord.xy), 0))");
+
+			pure = true;
+		}
+		else
+		{
+			imgexpr = join(to_expression(ops[2]), "[", to_expression(ops[3]), "]");
+			// The underlying image type in HLSL depends on the image format, unlike GLSL, where all images are "vec4",
+			// except that the underlying type changes how the data is interpreted.
+			if (var && !subpass_data)
+				imgexpr = remap_swizzle(get<SPIRType>(result_type),
+				                        image_format_to_components(get<SPIRType>(var->basetype).image.format), imgexpr);
+		}
 
 		if (var && var->forwardable)
 		{
 			bool forward = forced_temporaries.find(id) == end(forced_temporaries);
 			auto &e = emit_op(result_type, id, imgexpr, forward);
-			e.loaded_from = var->self;
-			if (forward)
-				var->dependees.push_back(id);
+
+			if (!pure)
+			{
+				e.loaded_from = var->self;
+				if (forward)
+					var->dependees.push_back(id);
+			}
 		}
 		else
 			emit_op(result_type, id, imgexpr, false);
@@ -3659,6 +4112,52 @@ string CompilerHLSL::compile(std::vector<HLSLVertexAttributeRemap> vertex_attrib
 	return compile();
 }
 
+uint32_t CompilerHLSL::remap_num_workgroups_builtin()
+{
+	update_active_builtins();
+
+	if ((active_input_builtins & (1ull << BuiltInNumWorkgroups)) == 0)
+		return 0;
+
+	// Create a new, fake UBO.
+	uint32_t offset = increase_bound_by(4);
+
+	uint32_t uint_type_id = offset;
+	uint32_t block_type_id = offset + 1;
+	uint32_t block_pointer_type_id = offset + 2;
+	uint32_t variable_id = offset + 3;
+
+	SPIRType uint_type;
+	uint_type.basetype = SPIRType::UInt;
+	uint_type.width = 32;
+	uint_type.vecsize = 3;
+	uint_type.columns = 1;
+	set<SPIRType>(uint_type_id, uint_type);
+
+	SPIRType block_type;
+	block_type.basetype = SPIRType::Struct;
+	block_type.member_types.push_back(uint_type_id);
+	set<SPIRType>(block_type_id, block_type);
+	set_decoration(block_type_id, DecorationBlock);
+	set_member_name(block_type_id, 0, "count");
+	set_member_decoration(block_type_id, 0, DecorationOffset, 0);
+
+	SPIRType block_pointer_type = block_type;
+	block_pointer_type.pointer = true;
+	block_pointer_type.storage = StorageClassUniform;
+	block_pointer_type.parent_type = block_type_id;
+	auto &ptr_type = set<SPIRType>(block_pointer_type_id, block_pointer_type);
+
+	// Preserve self.
+	ptr_type.self = block_type_id;
+
+	set<SPIRVariable>(variable_id, block_pointer_type_id, StorageClassUniform);
+	meta[variable_id].decoration.alias = "SPIRV_Cross_NumWorkgroups";
+
+	num_workgroups_builtin = variable_id;
+	return variable_id;
+}
+
 string CompilerHLSL::compile()
 {
 	// Do not deal with ES-isms like precision, older extensions and such.
@@ -3681,9 +4180,14 @@ string CompilerHLSL::compile()
 	backend.can_swizzle_scalar = true;
 	backend.can_declare_struct_inline = false;
 	backend.can_declare_arrays_inline = false;
+	backend.can_return_array = false;
 
 	update_active_builtins();
-	analyze_sampler_comparison_states();
+	analyze_image_and_sampler_usage();
+
+	// Subpass input needs SV_Position.
+	if (need_subpass_input)
+		active_input_builtins |= 1ull << BuiltInFragCoord;
 
 	uint32_t pass_count = 0;
 	do
diff --git a/spirv_hlsl.hpp b/spirv_hlsl.hpp
index 9b3261737c..0de72408e0 100644
--- a/spirv_hlsl.hpp
+++ b/spirv_hlsl.hpp
@@ -29,6 +29,18 @@ struct HLSLVertexAttributeRemap
 	uint32_t location;
 	std::string semantic;
 };
+// Specifying a root constant (d3d12) or push constant range (vulkan).
+//
+// `start` and `end` denotes the range of the root constant in bytes.
+// Both values need to be multiple of 4.
+struct RootConstants
+{
+	uint32_t start;
+	uint32_t end;
+
+	uint32_t binding;
+	uint32_t space;
+};
 
 class CompilerHLSL : public CompilerGLSL
 {
@@ -39,6 +51,9 @@ public:
 
 		// Allows the PointSize builtin, and ignores it, as PointSize is not supported in HLSL.
 		bool point_size_compat = false;
+
+		// Allows the PointCoord builtin, returns float2(0.5, 0.5), as PointCoord is not supported in HLSL.
+		bool point_coord_compat = false;
 	};
 
 	CompilerHLSL(std::vector<uint32_t> spirv_)
@@ -61,6 +76,15 @@ public:
 		options = opts;
 	}
 
+	// Optionally specify a custom root constant layout.
+	//
+	// Push constants ranges will be split up according to the
+	// layout specified.
+	void set_root_constant_layouts(std::vector<RootConstants> layout)
+	{
+		root_constants_layout = std::move(layout);
+	}
+
 	// Compiles and remaps vertex attributes at specific locations to a fixed semantic.
 	// The default is TEXCOORD# where # denotes location.
 	// Matrices are unrolled to vectors with notation ${SEMANTIC}_#, where # denotes row.
@@ -68,6 +92,18 @@ public:
 	std::string compile(std::vector<HLSLVertexAttributeRemap> vertex_attributes);
 	std::string compile() override;
 
+	// This is a special HLSL workaround for the NumWorkGroups builtin.
+	// This does not exist in HLSL, so the calling application must create a dummy cbuffer in
+	// which the application will store this builtin.
+	// The cbuffer layout will be:
+	// cbuffer SPIRV_Cross_NumWorkgroups : register(b#, space#) { uint3 SPIRV_Cross_NumWorkgroups_count; };
+	// This must be called before compile().
+	// The function returns 0 if NumWorkGroups builtin is not statically used in the shader from the current entry point.
+	// If non-zero, this returns the variable ID of a cbuffer which corresponds to
+	// the cbuffer declared above. By default, no binding or descriptor set decoration is set,
+	// so the calling application should declare explicit bindings on this ID before calling compile().
+	uint32_t remap_num_workgroups_builtin();
+
 private:
 	std::string type_to_glsl(const SPIRType &type, uint32_t id = 0) override;
 	std::string image_type_hlsl(const SPIRType &type);
@@ -101,6 +137,7 @@ private:
 	std::string to_sampler_expression(uint32_t id);
 	std::string to_resource_binding(const SPIRVariable &var);
 	std::string to_resource_binding_sampler(const SPIRVariable &var);
+	std::string to_resource_register(char space, uint32_t binding, uint32_t set);
 	void emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id) override;
 	void emit_access_chain(const Instruction &instruction);
 	void emit_load(const Instruction &instruction);
@@ -109,8 +146,8 @@ private:
 	void emit_store(const Instruction &instruction);
 	void emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op);
 
-	void emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
-	                        const std::string &qualifier) override;
+	void emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, const std::string &qualifier,
+	                        uint32_t base_offset = 0) override;
 
 	const char *to_storage_qualifiers_glsl(const SPIRVariable &var) override;
 
@@ -124,6 +161,9 @@ private:
 	bool requires_snorm16_packing = false;
 	bool requires_bitfield_insert = false;
 	bool requires_bitfield_extract = false;
+	bool requires_inverse_2x2 = false;
+	bool requires_inverse_3x3 = false;
+	bool requires_inverse_4x4 = false;
 	uint64_t required_textureSizeVariants = 0;
 	void require_texture_query_variant(const SPIRType &type);
 
@@ -159,6 +199,12 @@ private:
 
 	void emit_io_block(const SPIRVariable &var);
 	std::string to_semantic(uint32_t vertex_location);
+
+	uint32_t num_workgroups_builtin = 0;
+
+	// Custom root constant layout, which should be emitted
+	// when translating push constant ranges.
+	std::vector<RootConstants> root_constants_layout;
 };
 }
 
diff --git a/spirv_msl.cpp b/spirv_msl.cpp
index a85b38c2eb..a8910bf1b1 100644
--- a/spirv_msl.cpp
+++ b/spirv_msl.cpp
@@ -18,6 +18,7 @@
 #include "GLSL.std.450.h"
 
 #include <algorithm>
+#include <assert.h>
 #include <numeric>
 
 using namespace spv;
@@ -52,6 +53,57 @@ CompilerMSL::CompilerMSL(const uint32_t *ir, size_t word_count, MSLVertexAttr *p
 			resource_bindings.push_back(&p_res_bindings[i]);
 }
 
+void CompilerMSL::build_implicit_builtins()
+{
+	if (need_subpass_input)
+	{
+		bool has_frag_coord = false;
+
+		for (auto &id : ids)
+		{
+			if (id.get_type() != TypeVariable)
+				continue;
+
+			auto &var = id.get<SPIRVariable>();
+
+			if (var.storage == StorageClassInput && meta[var.self].decoration.builtin &&
+			    meta[var.self].decoration.builtin_type == BuiltInFragCoord)
+			{
+				builtin_frag_coord_id = var.self;
+				has_frag_coord = true;
+				break;
+			}
+		}
+
+		if (!has_frag_coord)
+		{
+			uint32_t offset = increase_bound_by(3);
+			uint32_t type_id = offset;
+			uint32_t type_ptr_id = offset + 1;
+			uint32_t var_id = offset + 2;
+
+			// Create gl_FragCoord.
+			SPIRType vec4_type;
+			vec4_type.basetype = SPIRType::Float;
+			vec4_type.width = 32;
+			vec4_type.vecsize = 4;
+			set<SPIRType>(type_id, vec4_type);
+
+			SPIRType vec4_type_ptr;
+			vec4_type_ptr = vec4_type;
+			vec4_type_ptr.pointer = true;
+			vec4_type_ptr.parent_type = type_id;
+			vec4_type_ptr.storage = StorageClassInput;
+			auto &ptr_type = set<SPIRType>(type_ptr_id, vec4_type_ptr);
+			ptr_type.self = type_id;
+
+			set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
+			set_decoration(var_id, DecorationBuiltIn, BuiltInFragCoord);
+			builtin_frag_coord_id = var_id;
+		}
+	}
+}
+
 string CompilerMSL::compile()
 {
 	// Force a classic "C" locale, reverts when function returns
@@ -60,7 +112,7 @@ string CompilerMSL::compile()
 	// Do not deal with GLES-isms like precision, older extensions and such.
 	CompilerGLSL::options.vulkan_semantics = true;
 	CompilerGLSL::options.es = false;
-	CompilerGLSL::options.version = 120;
+	CompilerGLSL::options.version = 450;
 	backend.float_literal_suffix = false;
 	backend.uint32_t_literal_suffix = true;
 	backend.basic_int_type = "int";
@@ -72,6 +124,10 @@ string CompilerMSL::compile()
 	backend.use_typed_initializer_list = true;
 	backend.native_row_major_matrix = false;
 	backend.flexible_member_array_supported = false;
+	backend.can_declare_arrays_inline = false;
+	backend.can_return_array = false;
+	backend.boolean_mix_support = false;
+	backend.allow_truncated_access_chain = true;
 
 	replace_illegal_names();
 
@@ -79,6 +135,9 @@ string CompilerMSL::compile()
 	struct_member_padding.clear();
 
 	update_active_builtins();
+	analyze_image_and_sampler_usage();
+	build_implicit_builtins();
+
 	fixup_image_load_store_access();
 
 	set_enabled_interface_variables(get_active_interface_variables());
@@ -271,12 +330,21 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::
 			switch (op)
 			{
 			case OpLoad:
+			case OpInBoundsAccessChain:
 			case OpAccessChain:
 			{
 				uint32_t base_id = ops[2];
 				if (global_var_ids.find(base_id) != global_var_ids.end())
 					added_arg_ids.insert(base_id);
 
+				auto &type = get<SPIRType>(ops[0]);
+				if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData)
+				{
+					// Implicitly reads gl_FragCoord.
+					assert(builtin_frag_coord_id != 0);
+					added_arg_ids.insert(builtin_frag_coord_id);
+				}
+
 				break;
 			}
 			case OpFunctionCall:
@@ -370,11 +438,6 @@ void CompilerMSL::mark_as_packable(SPIRType &type)
 			uint32_t mbr_type_id = type.member_types[mbr_idx];
 			auto &mbr_type = get<SPIRType>(mbr_type_id);
 			mark_as_packable(mbr_type);
-			if (mbr_type.type_alias)
-			{
-				auto &mbr_type_alias = get<SPIRType>(mbr_type.type_alias);
-				mark_as_packable(mbr_type_alias);
-			}
 		}
 	}
 }
@@ -480,15 +543,16 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage)
 				BuiltIn builtin;
 				bool is_builtin = is_member_builtin(type, mbr_idx, &builtin);
 
-				auto &mbr_type = get<SPIRType>(mbr_type_id);
-				if (should_move_to_input_buffer(mbr_type, is_builtin, storage))
+				if (should_move_to_input_buffer(mbr_type_id, is_builtin, storage))
 					move_member_to_input_buffer(type, mbr_idx);
 
 				else if (!is_builtin || has_active_builtin(builtin, storage))
 				{
 					// Add a reference to the member to the interface struct.
 					uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size());
-					ib_type.member_types.push_back(mbr_type_id); // membertype.self is different for array types
+					mbr_type_id = ensure_correct_builtin_type(mbr_type_id, builtin);
+					type.member_types[mbr_idx] = mbr_type_id;
+					ib_type.member_types.push_back(mbr_type_id);
 
 					// Give the member a name
 					string mbr_name = ensure_valid_name(to_qualified_member_name(type, mbr_idx), "m");
@@ -534,13 +598,15 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage)
 			bool is_builtin = is_builtin_variable(*p_var);
 			BuiltIn builtin = BuiltIn(get_decoration(p_var->self, DecorationBuiltIn));
 
-			if (should_move_to_input_buffer(type, is_builtin, storage))
+			if (should_move_to_input_buffer(type_id, is_builtin, storage))
 				move_to_input_buffer(*p_var);
 
 			else if (!is_builtin || has_active_builtin(builtin, storage))
 			{
 				// Add a reference to the variable type to the interface struct.
 				uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size());
+				type_id = ensure_correct_builtin_type(type_id, builtin);
+				p_var->basetype = type_id;
 				ib_type.member_types.push_back(type_id);
 
 				// Give the member a name
@@ -587,8 +653,10 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage)
 // Other types do not need to move, and false is returned.
 // Matrices and arrays are not permitted in the output of a vertex function or the input
 // or output of a fragment function, and in those cases, an exception is thrown.
-bool CompilerMSL::should_move_to_input_buffer(SPIRType &type, bool is_builtin, StorageClass storage)
+bool CompilerMSL::should_move_to_input_buffer(uint32_t type_id, bool is_builtin, StorageClass storage)
 {
+	auto &type = get<SPIRType>(type_id);
+
 	if ((is_matrix(type) || is_array(type)) && !is_builtin)
 	{
 		auto &execution = get_entry_point();
@@ -721,6 +789,36 @@ uint32_t CompilerMSL::get_input_buffer_block_var_id(uint32_t msl_buffer)
 	return ib_var_id;
 }
 
+// Ensure that the type is compatible with the builtin.
+// If it is, simply return the given type ID.
+// Otherwise, create a new type, and return it's ID.
+uint32_t CompilerMSL::ensure_correct_builtin_type(uint32_t type_id, BuiltIn builtin)
+{
+	auto &type = get<SPIRType>(type_id);
+
+	if (builtin == BuiltInSampleMask && is_array(type))
+	{
+		uint32_t next_id = increase_bound_by(type.pointer ? 2 : 1);
+		uint32_t base_type_id = next_id++;
+		auto &base_type = set<SPIRType>(base_type_id);
+		base_type.basetype = SPIRType::UInt;
+		base_type.width = 32;
+
+		if (!type.pointer)
+			return base_type_id;
+
+		uint32_t ptr_type_id = next_id++;
+		auto &ptr_type = set<SPIRType>(ptr_type_id);
+		ptr_type = base_type;
+		ptr_type.pointer = true;
+		ptr_type.storage = type.storage;
+		ptr_type.parent_type = base_type_id;
+		return ptr_type_id;
+	}
+
+	return type_id;
+}
+
 // Sort the members of the struct type by offset, and pack and then pad members where needed
 // to align MSL members with SPIR-V offsets. The struct members are iterated twice. Packing
 // occurs first, followed by padding, because packing a member reduces both its size and its
@@ -744,29 +842,9 @@ void CompilerMSL::align_struct(SPIRType &ib_type)
 	curr_offset = 0;
 	for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++)
 	{
-		// Align current offset to the current member's default alignment.
-		size_t align_mask = get_declared_struct_member_alignment(ib_type, mbr_idx) - 1;
-		curr_offset = uint32_t((curr_offset + align_mask) & ~align_mask);
+		if (is_member_packable(ib_type, mbr_idx))
+			set_member_decoration(ib_type_id, mbr_idx, DecorationCPacked);
 
-		// Fetch the member offset as declared in the SPIRV.
-		uint32_t mbr_offset = get_member_decoration(ib_type_id, mbr_idx, DecorationOffset);
-		if (curr_offset > mbr_offset)
-		{
-			uint32_t prev_mbr_idx = mbr_idx - 1;
-			if (is_member_packable(ib_type, prev_mbr_idx))
-				set_member_decoration(ib_type_id, prev_mbr_idx, DecorationCPacked);
-		}
-
-		// Increment the current offset to be positioned immediately after the current member.
-		curr_offset = mbr_offset + uint32_t(get_declared_struct_member_size(ib_type, mbr_idx));
-	}
-
-	// Test the alignment of each member, and if a member is positioned farther than its
-	// alignment and the end of the previous member, add a dummy padding member that will
-	// be added before the current member when the delaration of this struct is emitted.
-	curr_offset = 0;
-	for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++)
-	{
 		// Align current offset to the current member's default alignment.
 		size_t align_mask = get_declared_struct_member_alignment(ib_type, mbr_idx) - 1;
 		curr_offset = uint32_t((curr_offset + align_mask) & ~align_mask);
@@ -791,14 +869,48 @@ void CompilerMSL::align_struct(SPIRType &ib_type)
 // variation that is smaller than the unpacked variation of that type.
 bool CompilerMSL::is_member_packable(SPIRType &ib_type, uint32_t index)
 {
-	uint32_t mbr_type_id = ib_type.member_types[index];
-	auto &mbr_type = get<SPIRType>(mbr_type_id);
-
-	// 3-element vectors (char3, uchar3, short3, ushort3, int3, uint3, half3, float3)
-	if (mbr_type.vecsize == 3 && mbr_type.columns == 1)
+	// We've already marked it as packable
+	if (has_member_decoration(ib_type.self, index, DecorationCPacked))
 		return true;
 
-	return false;
+	auto &mbr_type = get<SPIRType>(ib_type.member_types[index]);
+
+	// Only 3-element vectors or 3-row matrices need to be packed.
+	if (mbr_type.vecsize != 3)
+		return false;
+
+	// Only row-major matrices need to be packed.
+	if (is_matrix(mbr_type) && !has_member_decoration(ib_type.self, index, DecorationRowMajor))
+		return false;
+
+	uint32_t component_size = mbr_type.width / 8;
+	uint32_t unpacked_mbr_size = component_size * (mbr_type.vecsize + 1) * mbr_type.columns;
+	if (is_array(mbr_type))
+	{
+		// If member is an array, and the array stride is larger than the type needs, don't pack it.
+		// Take into consideration multi-dimentional arrays.
+		uint32_t md_elem_cnt = 1;
+		size_t last_elem_idx = mbr_type.array.size() - 1;
+		for (uint32_t i = 0; i < last_elem_idx; i++)
+			md_elem_cnt *= max(to_array_size_literal(mbr_type, i), 1U);
+
+		uint32_t unpacked_array_stride = unpacked_mbr_size * md_elem_cnt;
+		uint32_t array_stride = type_struct_member_array_stride(ib_type, index);
+		return unpacked_array_stride > array_stride;
+	}
+	else
+	{
+		// Pack if there is not enough space between this member and next.
+		// If last member, only pack if it's a row-major matrix.
+		if (index < ib_type.member_types.size() - 1)
+		{
+			uint32_t mbr_offset_curr = get_member_decoration(ib_type.self, index, DecorationOffset);
+			uint32_t mbr_offset_next = get_member_decoration(ib_type.self, index + 1, DecorationOffset);
+			return unpacked_mbr_size > mbr_offset_next - mbr_offset_curr;
+		}
+		else
+			return is_matrix(mbr_type);
+	}
 }
 
 // Returns a combination of type ID and member index for use as hash key
@@ -835,11 +947,26 @@ void CompilerMSL::emit_header()
 	statement("");
 	statement("using namespace metal;");
 	statement("");
+
+	for (auto &td : typedef_lines)
+		statement(td);
+
+	if (!typedef_lines.empty())
+		statement("");
 }
 
 void CompilerMSL::add_pragma_line(const string &line)
 {
-	pragma_lines.insert(line);
+	auto rslt = pragma_lines.insert(line);
+	if (rslt.second)
+		force_recompile = true;
+}
+
+void CompilerMSL::add_typedef_line(const string &line)
+{
+	auto rslt = typedef_lines.insert(line);
+	if (rslt.second)
+		force_recompile = true;
 }
 
 // Emits any needed custom function bodies.
@@ -912,11 +1039,19 @@ void CompilerMSL::emit_custom_functions()
 
 		case SPVFuncImplArrayCopy:
 			statement("// Implementation of an array copy function to cover GLSL's ability to copy an array via "
-			          "assignment. ");
-			statement("template<typename T>");
-			statement("void spvArrayCopy(thread T* dst, thread const T* src, uint count)");
+			          "assignment.");
+			statement("template<typename T, uint N>");
+			statement("void spvArrayCopy(thread T (&dst)[N], thread const T (&src)[N])");
 			begin_scope();
-			statement("for (uint i = 0; i < count; *dst++ = *src++, i++);");
+			statement("for (uint i = 0; i < N; dst[i] = src[i], i++);");
+			end_scope();
+			statement("");
+
+			statement("// An overload for constant arrays.");
+			statement("template<typename T, uint N>");
+			statement("void spvArrayCopyConstant(thread T (&dst)[N], constant T (&src)[N])");
+			begin_scope();
+			statement("for (uint i = 0; i < N; dst[i] = src[i], i++);");
 			end_scope();
 			statement("");
 			break;
@@ -928,6 +1063,7 @@ void CompilerMSL::emit_custom_functions()
 			statement("return a1 * b2 - b1 * a2;");
 			end_scope();
 			statement("");
+
 			statement("// Returns the determinant of a 3x3 matrix.");
 			statement("inline float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, "
 			          "float c2, float c3)");
@@ -941,7 +1077,7 @@ void CompilerMSL::emit_custom_functions()
 			statement("float4x4 spvInverse4x4(float4x4 m)");
 			begin_scope();
 			statement("float4x4 adj;	// The adjoint matrix (inverse after dividing by determinant)");
-			statement("");
+			statement_no_indent("");
 			statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix.");
 			statement("adj[0][0] =  spvDet3x3(m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], "
 			          "m[3][3]);");
@@ -951,7 +1087,7 @@ void CompilerMSL::emit_custom_functions()
 			          "m[3][3]);");
 			statement("adj[0][3] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], "
 			          "m[2][3]);");
-			statement("");
+			statement_no_indent("");
 			statement("adj[1][0] = -spvDet3x3(m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], "
 			          "m[3][3]);");
 			statement("adj[1][1] =  spvDet3x3(m[0][0], m[0][2], m[0][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], "
@@ -960,7 +1096,7 @@ void CompilerMSL::emit_custom_functions()
 			          "m[3][3]);");
 			statement("adj[1][3] =  spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], "
 			          "m[2][3]);");
-			statement("");
+			statement_no_indent("");
 			statement("adj[2][0] =  spvDet3x3(m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], "
 			          "m[3][3]);");
 			statement("adj[2][1] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], "
@@ -969,7 +1105,7 @@ void CompilerMSL::emit_custom_functions()
 			          "m[3][3]);");
 			statement("adj[2][3] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], "
 			          "m[2][3]);");
-			statement("");
+			statement_no_indent("");
 			statement("adj[3][0] = -spvDet3x3(m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], "
 			          "m[3][2]);");
 			statement("adj[3][1] =  spvDet3x3(m[0][0], m[0][1], m[0][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], "
@@ -978,11 +1114,11 @@ void CompilerMSL::emit_custom_functions()
 			          "m[3][2]);");
 			statement("adj[3][3] =  spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], "
 			          "m[2][2]);");
-			statement("");
+			statement_no_indent("");
 			statement("// Calculate the determinant as a combination of the cofactors of the first row.");
 			statement("float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]) + (adj[0][3] "
 			          "* m[3][0]);");
-			statement("");
+			statement_no_indent("");
 			statement("// Divide the classical adjoint matrix by the determinant.");
 			statement("// If determinant is zero, matrix is not invertable, so leave it unchanged.");
 			statement("return (det != 0.0f) ? (adj * (1.0f / det)) : m;");
@@ -991,34 +1127,38 @@ void CompilerMSL::emit_custom_functions()
 			break;
 
 		case SPVFuncImplInverse3x3:
-			statement("// Returns the determinant of a 2x2 matrix.");
-			statement("inline float spvDet2x2(float a1, float a2, float b1, float b2)");
-			begin_scope();
-			statement("return a1 * b2 - b1 * a2;");
-			end_scope();
-			statement("");
+			if (spv_function_implementations.count(SPVFuncImplInverse4x4) == 0)
+			{
+				statement("// Returns the determinant of a 2x2 matrix.");
+				statement("inline float spvDet2x2(float a1, float a2, float b1, float b2)");
+				begin_scope();
+				statement("return a1 * b2 - b1 * a2;");
+				end_scope();
+				statement("");
+			}
+
 			statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical");
 			statement("// adjoint and dividing by the determinant. The contents of the matrix are changed.");
 			statement("float3x3 spvInverse3x3(float3x3 m)");
 			begin_scope();
 			statement("float3x3 adj;	// The adjoint matrix (inverse after dividing by determinant)");
-			statement("");
+			statement_no_indent("");
 			statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix.");
 			statement("adj[0][0] =  spvDet2x2(m[1][1], m[1][2], m[2][1], m[2][2]);");
 			statement("adj[0][1] = -spvDet2x2(m[0][1], m[0][2], m[2][1], m[2][2]);");
 			statement("adj[0][2] =  spvDet2x2(m[0][1], m[0][2], m[1][1], m[1][2]);");
-			statement("");
+			statement_no_indent("");
 			statement("adj[1][0] = -spvDet2x2(m[1][0], m[1][2], m[2][0], m[2][2]);");
 			statement("adj[1][1] =  spvDet2x2(m[0][0], m[0][2], m[2][0], m[2][2]);");
 			statement("adj[1][2] = -spvDet2x2(m[0][0], m[0][2], m[1][0], m[1][2]);");
-			statement("");
+			statement_no_indent("");
 			statement("adj[2][0] =  spvDet2x2(m[1][0], m[1][1], m[2][0], m[2][1]);");
 			statement("adj[2][1] = -spvDet2x2(m[0][0], m[0][1], m[2][0], m[2][1]);");
 			statement("adj[2][2] =  spvDet2x2(m[0][0], m[0][1], m[1][0], m[1][1]);");
-			statement("");
+			statement_no_indent("");
 			statement("// Calculate the determinant as a combination of the cofactors of the first row.");
 			statement("float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]);");
-			statement("");
+			statement_no_indent("");
 			statement("// Divide the classical adjoint matrix by the determinant.");
 			statement("// If determinant is zero, matrix is not invertable, so leave it unchanged.");
 			statement("return (det != 0.0f) ? (adj * (1.0f / det)) : m;");
@@ -1032,17 +1172,17 @@ void CompilerMSL::emit_custom_functions()
 			statement("float2x2 spvInverse2x2(float2x2 m)");
 			begin_scope();
 			statement("float2x2 adj;	// The adjoint matrix (inverse after dividing by determinant)");
-			statement("");
+			statement_no_indent("");
 			statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix.");
 			statement("adj[0][0] =  m[1][1];");
 			statement("adj[0][1] = -m[0][1];");
-			statement("");
+			statement_no_indent("");
 			statement("adj[1][0] = -m[1][0];");
 			statement("adj[1][1] =  m[0][0];");
-			statement("");
+			statement_no_indent("");
 			statement("// Calculate the determinant as a combination of the cofactors of the first row.");
 			statement("float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]);");
-			statement("");
+			statement_no_indent("");
 			statement("// Divide the classical adjoint matrix by the determinant.");
 			statement("// If determinant is zero, matrix is not invertable, so leave it unchanged.");
 			statement("return (det != 0.0f) ? (adj * (1.0f / det)) : m;");
@@ -1134,6 +1274,34 @@ void CompilerMSL::declare_undefined_values()
 		statement("");
 }
 
+void CompilerMSL::declare_constant_arrays()
+{
+	// MSL cannot declare arrays inline (except when declaring a variable), so we must move them out to
+	// global constants directly, so we are able to use constants as variable expressions.
+	bool emitted = false;
+
+	for (auto &id : ids)
+	{
+		if (id.get_type() == TypeConstant)
+		{
+			auto &c = id.get<SPIRConstant>();
+			if (c.specialization)
+				continue;
+
+			auto &type = get<SPIRType>(c.constant_type);
+			if (!type.array.empty())
+			{
+				auto name = to_name(c.self);
+				statement("constant ", variable_decl(type, name), " = ", constant_expression(c), ";");
+				emitted = true;
+			}
+		}
+	}
+
+	if (emitted)
+		statement("");
+}
+
 void CompilerMSL::emit_resources()
 {
 	// Output non-interface structs. These include local function structs
@@ -1170,6 +1338,7 @@ void CompilerMSL::emit_resources()
 		}
 	}
 
+	declare_constant_arrays();
 	declare_undefined_values();
 
 	// Output interface structs.
@@ -1316,6 +1485,10 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 		UFOP(popcount);
 		break;
 
+	case OpFRem:
+		BFOP(fmod);
+		break;
+
 	// Atomics
 	case OpAtomicExchange:
 	{
@@ -1423,11 +1596,15 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 	{
 		// Mark that this shader reads from this image
 		uint32_t img_id = ops[2];
-		auto *p_var = maybe_get_backing_variable(img_id);
-		if (p_var && has_decoration(p_var->self, DecorationNonReadable))
+		auto &type = expression_type(img_id);
+		if (type.image.dim != DimSubpassData)
 		{
-			unset_decoration(p_var->self, DecorationNonReadable);
-			force_recompile = true;
+			auto *p_var = maybe_get_backing_variable(img_id);
+			if (p_var && has_decoration(p_var->self, DecorationNonReadable))
+			{
+				unset_decoration(p_var->self, DecorationNonReadable);
+				force_recompile = true;
+			}
 		}
 
 		emit_texture_op(instruction);
@@ -1611,11 +1788,12 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 	case OpVectorTimesMatrix:
 	case OpMatrixTimesVector:
 	{
-		// If the matrix needs transpose and it is square, just flip the multiply order.
+		// If the matrix needs transpose and it is square or packed, just flip the multiply order.
 		uint32_t mtx_id = ops[opcode == OpMatrixTimesVector ? 2 : 3];
 		auto *e = maybe_get<SPIRExpression>(mtx_id);
 		auto &t = expression_type(mtx_id);
-		if (e && e->need_transpose && t.columns == t.vecsize)
+		bool is_packed = has_decoration(mtx_id, DecorationCPacked);
+		if (e && e->need_transpose && (t.columns == t.vecsize || is_packed))
 		{
 			e->need_transpose = false;
 			emit_binary_op(ops[0], ops[1], ops[3], ops[2], "*");
@@ -1743,27 +1921,44 @@ bool CompilerMSL::maybe_emit_input_struct_assignment(uint32_t id_lhs, uint32_t i
 	return true;
 }
 
+void CompilerMSL::emit_array_copy(const string &lhs, uint32_t rhs_id)
+{
+	// Assignment from an array initializer is fine.
+	if (ids[rhs_id].get_type() == TypeConstant)
+		statement("spvArrayCopyConstant(", lhs, ", ", to_expression(rhs_id), ");");
+	else
+		statement("spvArrayCopy(", lhs, ", ", to_expression(rhs_id), ");");
+}
+
 // Since MSL does not allow arrays to be copied via simple variable assignment,
 // if the LHS and RHS represent an assignment of an entire array, it must be
 // implemented by calling an array copy function.
 // Returns whether the struct assignment was emitted.
 bool CompilerMSL::maybe_emit_array_assignment(uint32_t id_lhs, uint32_t id_rhs)
 {
-	// Assignment from an array initializer is fine.
-	if (ids[id_rhs].get_type() == TypeConstant)
-		return false;
-
 	// We only care about assignments of an entire array
 	auto &type = expression_type(id_rhs);
 	if (type.array.size() == 0)
 		return false;
 
+	auto *var = maybe_get<SPIRVariable>(id_lhs);
+	if (ids[id_rhs].get_type() == TypeConstant && var && var->deferred_declaration)
+	{
+		// Special case, if we end up declaring a variable when assigning the constant array,
+		// we can avoid the copy by directly assigning the constant expression.
+		// This is likely necessary to be able to use a variable as a true look-up table, as it is unlikely
+		// the compiler will be able to optimize the spvArrayCopy() into a constant LUT.
+		// After a variable has been declared, we can no longer assign constant arrays in MSL unfortunately.
+		statement(to_expression(id_lhs), " = ", constant_expression(get<SPIRConstant>(id_rhs)), ";");
+		return true;
+	}
+
 	// Ensure the LHS variable has been declared
 	auto *p_v_lhs = maybe_get_backing_variable(id_lhs);
 	if (p_v_lhs)
 		flush_variable_declaration(p_v_lhs->self);
 
-	statement("spvArrayCopy(", to_expression(id_lhs), ", ", to_expression(id_rhs), ", ", to_array_size(type, 0), ");");
+	emit_array_copy(to_expression(id_lhs), id_rhs);
 	register_write(id_lhs);
 
 	return true;
@@ -1943,18 +2138,41 @@ void CompilerMSL::emit_interface_block(uint32_t ib_var_id)
 // If this is the entry point function, Metal-specific return value and function arguments are added.
 void CompilerMSL::emit_function_prototype(SPIRFunction &func, uint64_t)
 {
+	if (func.self != entry_point)
+		add_function_overload(func);
+
 	local_variable_names = resource_names;
 	string decl;
 
 	processing_entry_point = (func.self == entry_point);
 
 	auto &type = get<SPIRType>(func.return_type);
-	decl += func_type_decl(type);
+
+	if (type.array.empty())
+	{
+		decl += func_type_decl(type);
+	}
+	else
+	{
+		// We cannot return arrays in MSL, so "return" through an out variable.
+		decl = "void";
+	}
+
 	decl += " ";
 	decl += to_name(func.self);
-
 	decl += "(";
 
+	if (!type.array.empty())
+	{
+		// Fake arrays returns by writing to an out array instead.
+		decl += "thread ";
+		decl += type_to_glsl(type);
+		decl += " (&SPIRV_Cross_return_value)";
+		decl += type_to_array_glsl(type);
+		if (!func.arguments.empty())
+			decl += ", ";
+	}
+
 	if (processing_entry_point)
 	{
 		decl += entry_point_args(!func.arguments.empty());
@@ -2065,6 +2283,13 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
 
 		break;
 
+	case DimSubpassData:
+		if (imgtype.image.ms)
+			tex_coords = "uint2(gl_FragCoord.xy)";
+		else
+			tex_coords = join("uint2(gl_FragCoord.xy), 0");
+		break;
+
 	case Dim2D:
 		if (coord_type.vecsize > 2)
 			tex_coords += ".xy";
@@ -2310,8 +2535,13 @@ bool CompilerMSL::is_non_native_row_major_matrix(uint32_t id)
 		return false;
 
 	// Generate a function that will swap matrix elements from row-major to column-major.
-	const auto type = expression_type(id);
-	add_convert_row_major_matrix_function(type.columns, type.vecsize);
+	// Packed row-matrix should just use transpose() function.
+	if (!has_decoration(id, DecorationCPacked))
+	{
+		const auto type = expression_type(id);
+		add_convert_row_major_matrix_function(type.columns, type.vecsize);
+	}
+
 	return true;
 }
 
@@ -2323,12 +2553,17 @@ bool CompilerMSL::member_is_non_native_row_major_matrix(const SPIRType &type, ui
 		return false;
 
 	// Non-matrix or column-major matrix types do not need to be converted.
-	if (!(combined_decoration_for_member(type, index) & (1ull << DecorationRowMajor)))
+	if (!has_member_decoration(type.self, index, DecorationRowMajor))
 		return false;
 
 	// Generate a function that will swap matrix elements from row-major to column-major.
-	const auto mbr_type = get<SPIRType>(type.member_types[index]);
-	add_convert_row_major_matrix_function(mbr_type.columns, mbr_type.vecsize);
+	// Packed row-matrix should just use transpose() function.
+	if (!has_member_decoration(type.self, index, DecorationCPacked))
+	{
+		const auto mbr_type = get<SPIRType>(type.member_types[index]);
+		add_convert_row_major_matrix_function(mbr_type.columns, mbr_type.vecsize);
+	}
+
 	return true;
 }
 
@@ -2355,20 +2590,19 @@ void CompilerMSL::add_convert_row_major_matrix_function(uint32_t cols, uint32_t
 
 	auto rslt = spv_function_implementations.insert(spv_func);
 	if (rslt.second)
-	{
 		add_pragma_line("#pragma clang diagnostic ignored \"-Wmissing-prototypes\"");
-		force_recompile = true;
-	}
 }
 
 // Wraps the expression string in a function call that converts the
 // row_major matrix result of the expression to a column_major matrix.
-string CompilerMSL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type)
+string CompilerMSL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type, bool is_packed)
 {
 	strip_enclosed_expression(exp_str);
 
 	string func_name;
-	if (exp_type.columns == exp_type.vecsize)
+
+	// Square and packed matrices can just use transpose
+	if (exp_type.columns == exp_type.vecsize || is_packed)
 		func_name = "transpose";
 	else
 		func_name = string("spvConvertFromRowMajor") + to_string(exp_type.columns) + "x" + to_string(exp_type.vecsize);
@@ -2394,7 +2628,7 @@ void CompilerMSL::emit_fixup()
 
 // Emit a structure member, padding and packing to maintain the correct memeber alignments.
 void CompilerMSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
-                                     const string &qualifier)
+                                     const string &qualifier, uint32_t)
 {
 	auto &membertype = get<SPIRType>(member_type_id);
 
@@ -2405,7 +2639,23 @@ void CompilerMSL::emit_struct_member(const SPIRType &type, uint32_t member_type_
 		statement("char pad", to_string(index), "[", to_string(pad_len), "];");
 
 	// If this member is packed, mark it as so.
-	string pack_pfx = member_is_packed_type(type, index) ? "packed_" : "";
+	string pack_pfx = "";
+	if (member_is_packed_type(type, index))
+	{
+		pack_pfx = "packed_";
+
+		// If we're packing a matrix, output an appropriate typedef
+		if (membertype.vecsize > 1 && membertype.columns > 1)
+		{
+			string base_type = membertype.width == 16 ? "half" : "float";
+			string td_line = "typedef ";
+			td_line += base_type + to_string(membertype.vecsize) + "x" + to_string(membertype.columns);
+			td_line += " " + pack_pfx;
+			td_line += base_type + to_string(membertype.columns) + "x" + to_string(membertype.vecsize);
+			td_line += ";";
+			add_typedef_line(td_line);
+		}
+	}
 
 	statement(pack_pfx, type_to_glsl(membertype), " ", qualifier, to_member_name(type, index),
 	          member_attribute_qualifier(type, index), type_to_array_glsl(membertype), ";");
@@ -2595,7 +2845,7 @@ string CompilerMSL::func_type_decl(SPIRType &type)
 {
 	auto &execution = get_entry_point();
 	// The regular function return type. If not processing the entry point function, that's all we need
-	string return_type = type_to_glsl(type);
+	string return_type = type_to_glsl(type) + type_to_array_glsl(type);
 	if (!processing_entry_point)
 		return return_type;
 
@@ -2604,7 +2854,7 @@ string CompilerMSL::func_type_decl(SPIRType &type)
 	{
 		auto &so_var = get<SPIRVariable>(stage_out_var_id);
 		auto &so_type = get<SPIRType>(so_var.basetype);
-		return_type = type_to_glsl(so_type);
+		return_type = type_to_glsl(so_type) + type_to_array_glsl(type);
 	}
 
 	// Prepend a entry type, based on the execution model
@@ -2883,7 +3133,7 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
 	if (is_array(type))
 	{
 		decl += " (&";
-		decl += to_name(var.self);
+		decl += to_expression(var.self);
 		decl += ")";
 		decl += type_to_array_glsl(type);
 	}
@@ -2891,12 +3141,12 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
 	{
 		decl += "&";
 		decl += " ";
-		decl += to_name(var.self);
+		decl += to_expression(var.self);
 	}
 	else
 	{
 		decl += " ";
-		decl += to_name(var.self);
+		decl += to_expression(var.self);
 	}
 
 	return decl;
@@ -3085,8 +3335,9 @@ string CompilerMSL::image_type_glsl(const SPIRType &type, uint32_t id)
 
 	// Bypass pointers because we need the real image struct
 	auto &img_type = get<SPIRType>(type.self).image;
+	bool shadow_image = comparison_images.count(id) != 0;
 
-	if (img_type.depth)
+	if (img_type.depth || shadow_image)
 	{
 		switch (img_type.dim)
 		{
@@ -3116,6 +3367,7 @@ string CompilerMSL::image_type_glsl(const SPIRType &type, uint32_t id)
 			break;
 		case DimBuffer:
 		case Dim2D:
+		case DimSubpassData:
 			img_type_name += (img_type.ms ? "texture2d_ms" : (img_type.arrayed ? "texture2d_array" : "texture2d"));
 			break;
 		case Dim3D:
@@ -3137,7 +3389,7 @@ string CompilerMSL::image_type_glsl(const SPIRType &type, uint32_t id)
 	// For unsampled images, append the sample/read/write access qualifier.
 	// For kernel images, the access qualifier my be supplied directly by SPIR-V.
 	// Otherwise it may be set based on whether the image is read from or written to within the shader.
-	if (type.basetype == SPIRType::Image && type.image.sampled == 2)
+	if (type.basetype == SPIRType::Image && type.image.sampled == 2 && type.image.dim != DimSubpassData)
 	{
 		switch (img_type.access)
 		{
@@ -3216,20 +3468,24 @@ string CompilerMSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage)
 		return "gl_InstanceIndex";
 
 	// When used in the entry function, output builtins are qualified with output struct name.
+	// Test storage class as NOT Input, as output builtins might be part of generic type.
 	case BuiltInPosition:
 	case BuiltInPointSize:
 	case BuiltInClipDistance:
 	case BuiltInCullDistance:
 	case BuiltInLayer:
 	case BuiltInFragDepth:
-		if (current_function && (current_function->self == entry_point))
+	case BuiltInSampleMask:
+		if (storage != StorageClassInput && current_function && (current_function->self == entry_point))
 			return stage_out_var_name + "." + CompilerGLSL::builtin_to_glsl(builtin, storage);
-		else
-			return CompilerGLSL::builtin_to_glsl(builtin, storage);
+
+		break;
 
 	default:
-		return CompilerGLSL::builtin_to_glsl(builtin, storage);
+		break;
 	}
+
+	return CompilerGLSL::builtin_to_glsl(builtin, storage);
 }
 
 // Returns an MSL string attribute qualifer for a SPIR-V builtin
@@ -3369,7 +3625,6 @@ string CompilerMSL::built_in_func_arg(BuiltIn builtin, bool prefix_comma)
 // Returns the byte size of a struct member.
 size_t CompilerMSL::get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const
 {
-	auto dec_mask = get_member_decoration_mask(struct_type.self, index);
 	auto &type = get<SPIRType>(struct_type.member_types[index]);
 
 	switch (type.basetype)
@@ -3384,41 +3639,28 @@ size_t CompilerMSL::get_declared_struct_member_size(const SPIRType &struct_type,
 
 	default:
 	{
-		size_t component_size = type.width / 8;
-		unsigned vecsize = type.vecsize;
-		unsigned columns = type.columns;
-
 		// For arrays, we can use ArrayStride to get an easy check.
 		// Runtime arrays will have zero size so force to min of one.
 		if (!type.array.empty())
-			return type_struct_member_array_stride(struct_type, index) * max(type.array.back(), 1U);
+		{
+			bool array_size_literal = type.array_size_literal.back();
+			uint32_t array_size =
+			    array_size_literal ? type.array.back() : get<SPIRConstant>(type.array.back()).scalar();
+			return type_struct_member_array_stride(struct_type, index) * max(array_size, 1u);
+		}
 
 		if (type.basetype == SPIRType::Struct)
 			return get_declared_struct_size(type);
 
-		if (columns == 1) // An unpacked 3-element vector is the same size as a 4-element vector.
-		{
-			if (!(dec_mask & (1ull << DecorationCPacked)))
-			{
-				if (vecsize == 3)
-					vecsize = 4;
-			}
-		}
-		else // For matrices, a 3-element column is the same size as a 4-element column.
-		{
-			if (dec_mask & (1ull << DecorationColMajor))
-			{
-				if (vecsize == 3)
-					vecsize = 4;
-			}
-			else if (dec_mask & (1ull << DecorationRowMajor))
-			{
-				if (columns == 3)
-					columns = 4;
-			}
-		}
+		uint32_t component_size = type.width / 8;
+		uint32_t vecsize = type.vecsize;
+		uint32_t columns = type.columns;
 
-		return vecsize * columns * component_size;
+		// An unpacked 3-element vector or matrix column is the same memory size as a 4-element.
+		if (vecsize == 3 && !has_member_decoration(struct_type.self, index, DecorationCPacked))
+			vecsize = 4;
+
+		return component_size * vecsize * columns;
 	}
 	}
 }
@@ -3443,16 +3685,13 @@ size_t CompilerMSL::get_declared_struct_member_alignment(const SPIRType &struct_
 
 	default:
 	{
-		// Alignment of packed type is the same as the underlying component size.
-		// Alignment of unpacked type is the same as the type size (or one matrix column).
+		// Alignment of packed type is the same as the underlying component or column size.
+		// Alignment of unpacked type is the same as the vector size.
+		// Alignment of 3-elements vector is the same as 4-elements (including packed using column).
 		if (member_is_packed_type(struct_type, index))
-			return type.width / 8;
+			return (type.width / 8) * (type.columns == 3 ? 4 : type.columns);
 		else
-		{
-			// Divide by array size and colum count. Runtime arrays will have zero size so force to min of one.
-			uint32_t array_size = type.array.empty() ? 1 : max(type.array.back(), 1U);
-			return get_declared_struct_member_size(struct_type, index) / (type.columns * array_size);
-		}
+			return (type.width / 8) * (type.vecsize == 3 ? 4 : type.vecsize);
 	}
 	}
 }
@@ -3522,16 +3761,38 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o
 	case OpFMod:
 		return SPVFuncImplMod;
 
+	case OpFunctionCall:
+	{
+		auto &return_type = compiler.get<SPIRType>(args[0]);
+		if (!return_type.array.empty())
+			return SPVFuncImplArrayCopy;
+		else
+			return SPVFuncImplNone;
+	}
+
 	case OpStore:
 	{
 		// Get the result type of the RHS. Since this is run as a pre-processing stage,
 		// we must extract the result type directly from the Instruction, rather than the ID.
 		uint32_t id_rhs = args[1];
-		uint32_t type_id_rhs = result_types[id_rhs];
-		if ((compiler.ids[id_rhs].get_type() != TypeConstant) && type_id_rhs &&
-		    compiler.is_array(compiler.get<SPIRType>(type_id_rhs)))
-			return SPVFuncImplArrayCopy;
 
+		const SPIRType *type = nullptr;
+		if (compiler.ids[id_rhs].get_type() != TypeNone)
+		{
+			// Could be a constant, or similar.
+			type = &compiler.expression_type(id_rhs);
+		}
+		else
+		{
+			// Or ... an expression.
+			if (result_types[id_rhs] != 0)
+				type = &compiler.get<SPIRType>(result_types[id_rhs]);
+		}
+
+		if (type && compiler.is_array(*type))
+			return SPVFuncImplArrayCopy;
+		else
+			return SPVFuncImplNone;
 		break;
 	}
 
diff --git a/spirv_msl.hpp b/spirv_msl.hpp
index 6f66f3122e..8e6a1130b4 100644
--- a/spirv_msl.hpp
+++ b/spirv_msl.hpp
@@ -135,9 +135,9 @@ public:
 		SPVFuncImplFindSMsb,
 		SPVFuncImplFindUMsb,
 		SPVFuncImplArrayCopy,
-		SPVFuncImplInverse2x2,
-		SPVFuncImplInverse3x3,
 		SPVFuncImplInverse4x4,
+		SPVFuncImplInverse3x3,
+		SPVFuncImplInverse2x2,
 		SPVFuncImplRowMajor2x3,
 		SPVFuncImplRowMajor2x4,
 		SPVFuncImplRowMajor3x2,
@@ -187,7 +187,7 @@ protected:
 	void emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id) override;
 	void emit_fixup() override;
 	void emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
-	                        const std::string &qualifier = "") override;
+	                        const std::string &qualifier = "", uint32_t base_offset = 0) override;
 	std::string type_to_glsl(const SPIRType &type, uint32_t id = 0) override;
 	std::string image_type_glsl(const SPIRType &type, uint32_t id = 0) override;
 	std::string builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClass storage) override;
@@ -208,9 +208,10 @@ protected:
 	std::string to_qualifiers_glsl(uint32_t id) override;
 	void replace_illegal_names() override;
 	void declare_undefined_values() override;
+	void declare_constant_arrays();
 	bool is_non_native_row_major_matrix(uint32_t id) override;
 	bool member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index) override;
-	std::string convert_row_major_matrix(std::string exp_str, const SPIRType &exp_type) override;
+	std::string convert_row_major_matrix(std::string exp_str, const SPIRType &exp_type, bool is_packed) override;
 
 	void preprocess_op_codes();
 	void localize_global_variables();
@@ -225,6 +226,7 @@ protected:
 	                                            std::unordered_set<uint32_t> &processed_func_ids);
 	uint32_t add_interface_block(spv::StorageClass storage);
 	void mark_location_as_used_by_shader(uint32_t location, spv::StorageClass storage);
+	uint32_t ensure_correct_builtin_type(uint32_t type_id, spv::BuiltIn builtin);
 
 	void emit_custom_functions();
 	void emit_resources();
@@ -250,7 +252,7 @@ protected:
 	uint32_t get_ordered_member_location(uint32_t type_id, uint32_t index);
 	size_t get_declared_struct_member_alignment(const SPIRType &struct_type, uint32_t index) const;
 	std::string to_component_argument(uint32_t id);
-	bool should_move_to_input_buffer(SPIRType &type, bool is_builtin, spv::StorageClass storage);
+	bool should_move_to_input_buffer(uint32_t type_id, bool is_builtin, spv::StorageClass storage);
 	void move_to_input_buffer(SPIRVariable &var);
 	void move_member_to_input_buffer(const SPIRType &type, uint32_t index);
 	std::string add_input_buffer_block_member(uint32_t mbr_type_id, std::string mbr_name, uint32_t mbr_locn);
@@ -264,7 +266,11 @@ protected:
 	                         bool op1_is_pointer = false, uint32_t op2 = 0);
 	const char *get_memory_order(uint32_t spv_mem_sem);
 	void add_pragma_line(const std::string &line);
+	void add_typedef_line(const std::string &line);
 	void emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uint32_t id_mem_sem);
+	void emit_array_copy(const std::string &lhs, uint32_t rhs_id) override;
+	void build_implicit_builtins();
+	uint32_t builtin_frag_coord_id = 0;
 
 	Options options;
 	std::set<SPVFuncImpl> spv_function_implementations;
@@ -272,6 +278,7 @@ protected:
 	std::map<uint32_t, uint32_t> non_stage_in_input_var_ids;
 	std::unordered_map<MSLStructMemberKey, uint32_t> struct_member_padding;
 	std::set<std::string> pragma_lines;
+	std::set<std::string> typedef_lines;
 	std::vector<MSLResourceBinding *> resource_bindings;
 	MSLResourceBinding next_metal_resource_index;
 	uint32_t stage_in_var_id = 0;