commit 9ae607553558a37047b4ce50b5aca91908ad9c4e Author: Brad Parker Date: Tue Dec 20 13:17:07 2016 -0500 Squashed 'deps/SPIRV-Cross/' content from commit 9ccd1ae git-subtree-dir: deps/SPIRV-Cross git-subtree-split: 9ccd1aea4201ff5d79c09808567348d0d3ab0452 diff --git a/.clang-format b/.clang-format new file mode 100755 index 0000000000..443f90b774 --- /dev/null +++ b/.clang-format @@ -0,0 +1,167 @@ +# The style used for all options not specifically set in the configuration. +BasedOnStyle: LLVM + +# The extra indent or outdent of access modifiers, e.g. public:. +AccessModifierOffset: -4 + +# If true, aligns escaped newlines as far left as possible. Otherwise puts them into the right-most column. +AlignEscapedNewlinesLeft: true + +# If true, aligns trailing comments. +AlignTrailingComments: false + +# Allow putting all parameters of a function declaration onto the next line even if BinPackParameters is false. +AllowAllParametersOfDeclarationOnNextLine: false + +# Allows contracting simple braced statements to a single line. +AllowShortBlocksOnASingleLine: false + +# If true, short case labels will be contracted to a single line. +AllowShortCaseLabelsOnASingleLine: false + +# Dependent on the value, int f() { return 0; } can be put on a single line. Possible values: None, Inline, All. +AllowShortFunctionsOnASingleLine: None + +# If true, if (a) return; can be put on a single line. +AllowShortIfStatementsOnASingleLine: false + +# If true, while (true) continue; can be put on a single line. +AllowShortLoopsOnASingleLine: false + +# If true, always break after function definition return types. +AlwaysBreakAfterDefinitionReturnType: false + +# If true, always break before multiline string literals. +AlwaysBreakBeforeMultilineStrings: false + +# If true, always break after the template<...> of a template declaration. +AlwaysBreakTemplateDeclarations: true + +# If false, a function call's arguments will either be all on the same line or will have one line each. +BinPackArguments: true + +# If false, a function declaration's or function definition's parameters will either all be on the same line +# or will have one line each. +BinPackParameters: true + +# The way to wrap binary operators. Possible values: None, NonAssignment, All. +BreakBeforeBinaryOperators: None + +# The brace breaking style to use. Possible values: Attach, Linux, Stroustrup, Allman, GNU. +BreakBeforeBraces: Allman + +# If true, ternary operators will be placed after line breaks. +BreakBeforeTernaryOperators: false + +# Always break constructor initializers before commas and align the commas with the colon. +BreakConstructorInitializersBeforeComma: true + +# The column limit. A column limit of 0 means that there is no column limit. +ColumnLimit: 120 + +# A regular expression that describes comments with special meaning, which should not be split into lines or otherwise changed. +CommentPragmas: '^ *' + +# If the constructor initializers don't fit on a line, put each initializer on its own line. +ConstructorInitializerAllOnOneLineOrOnePerLine: false + +# The number of characters to use for indentation of constructor initializer lists. +ConstructorInitializerIndentWidth: 4 + +# Indent width for line continuations. +ContinuationIndentWidth: 4 + +# If true, format braced lists as best suited for C++11 braced lists. +Cpp11BracedListStyle: false + +# Disables formatting at all. +DisableFormat: false + +# A vector of macros that should be interpreted as foreach loops instead of as function calls. +#ForEachMacros: '' + +# Indent case labels one level from the switch statement. +# When false, use the same indentation level as for the switch statement. +# Switch statement body is always indented one level more than case labels. +IndentCaseLabels: false + +# The number of columns to use for indentation. +IndentWidth: 4 + +# Indent if a function definition or declaration is wrapped after the type. +IndentWrappedFunctionNames: false + +# If true, empty lines at the start of blocks are kept. +KeepEmptyLinesAtTheStartOfBlocks: true + +# Language, this format style is targeted at. Possible values: None, Cpp, Java, JavaScript, Proto. +Language: Cpp + +# The maximum number of consecutive empty lines to keep. +MaxEmptyLinesToKeep: 1 + +# The indentation used for namespaces. Possible values: None, Inner, All. +NamespaceIndentation: None + +# The penalty for breaking a function call after "call(". +PenaltyBreakBeforeFirstCallParameter: 19 + +# The penalty for each line break introduced inside a comment. +PenaltyBreakComment: 300 + +# The penalty for breaking before the first <<. +PenaltyBreakFirstLessLess: 120 + +# The penalty for each line break introduced inside a string literal. +PenaltyBreakString: 1000 + +# The penalty for each character outside of the column limit. +PenaltyExcessCharacter: 1000000 + +# Penalty for putting the return type of a function onto its own line. +PenaltyReturnTypeOnItsOwnLine: 1000000000 + +# Pointer and reference alignment style. Possible values: Left, Right, Middle. +PointerAlignment: Right + +# If true, a space may be inserted after C style casts. +SpaceAfterCStyleCast: false + +# If false, spaces will be removed before assignment operators. +SpaceBeforeAssignmentOperators: true + +# Defines in which cases to put a space before opening parentheses. Possible values: Never, ControlStatements, Always. +SpaceBeforeParens: ControlStatements + +# If true, spaces may be inserted into '()'. +SpaceInEmptyParentheses: false + +# The number of spaces before trailing line comments (// - comments). +SpacesBeforeTrailingComments: 1 + +# If true, spaces will be inserted after '<' and before '>' in template argument lists. +SpacesInAngles: false + +# If true, spaces may be inserted into C style casts. +SpacesInCStyleCastParentheses: false + +# If true, spaces are inserted inside container literals (e.g. ObjC and Javascript array and dict literals). +SpacesInContainerLiterals: false + +# If true, spaces will be inserted after '(' and before ')'. +SpacesInParentheses: false + +# If true, spaces will be inserted after '[' and befor']'. +SpacesInSquareBrackets: false + +# Format compatible with this standard, e.g. use A > instead of A> for LS_Cpp03. Possible values: Cpp03, Cpp11, Auto. +Standard: Cpp11 + +# The number of columns used for tab stops. +TabWidth: 4 + +# The way to use tab characters in the resulting file. Possible values: Never, ForIndentation, Always. +UseTab: ForIndentation + +# Do not reflow comments +ReflowComments: false diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000..bdd308add4 --- /dev/null +++ b/.gitignore @@ -0,0 +1,17 @@ +*.o +*.d +*.txt +/test +/spirv-cross +*.spv +/obj +/msvc/x64 +/msvc/Debug +/msvc/Release +*.suo +*.sdf +*.opensdf +*.shader +*.a + +!CMakeLists.txt diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000000..593d472b8e --- /dev/null +++ b/.travis.yml @@ -0,0 +1,26 @@ +language: cpp +os: + - linux + - osx + +# Use Ubuntu 14.04 LTS (Trusty) as the Linux testing environment. +sudo: required +dist: trusty + +# We check out glslang at a specific revision to avoid test output mismatches +env: + - GLSLANG_REV=b56f4ac72c57f5c50f14ddb0bf1f78eaaef21c2b + +before_script: + - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install python3; fi + - git clone https://github.com/KhronosGroup/glslang.git glslang + - git clone https://github.com/KhronosGroup/SPIRV-Tools SPIRV-Tools + - git clone https://github.com/KhronosGroup/SPIRV-Headers.git SPIRV-Tools/external/spirv-headers + +script: + - git -C glslang checkout $GLSLANG_REV + - cd glslang && cmake . && make -j2 && cd .. + - cd SPIRV-Tools && cmake . && make -j2 && cd .. + - make -j2 + - PATH=./glslang/StandAlone:./SPIRV-Tools/tools:$PATH + - ./test_shaders.py shaders diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000000..ad852f8e40 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,98 @@ +# Copyright 2016 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +cmake_minimum_required(VERSION 2.8) +project(SPIRV-Cross) +enable_testing() + +option(SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS "Instead of throwing exceptions assert" OFF) + +if(${CMAKE_GENERATOR} MATCHES "Makefile") + if(${CMAKE_CURRENT_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_BINARY_DIR}) + message(FATAL_ERROR "Build out of tree to avoid overwriting Makefile") + endif() +endif() + +add_library(spirv-cross-core STATIC + ${CMAKE_CURRENT_SOURCE_DIR}/GLSL.std.450.h + ${CMAKE_CURRENT_SOURCE_DIR}/spirv_common.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/spirv.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/spirv_cross.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/spirv_cross.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/spirv_cfg.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/spirv_cfg.cpp) + +add_library(spirv-cross-glsl STATIC + ${CMAKE_CURRENT_SOURCE_DIR}/spirv_glsl.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/spirv_glsl.hpp) + +add_library(spirv-cross-cpp STATIC + ${CMAKE_CURRENT_SOURCE_DIR}/spirv_cpp.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/spirv_cpp.cpp) + +add_library(spirv-cross-msl STATIC + ${CMAKE_CURRENT_SOURCE_DIR}/spirv_msl.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/spirv_msl.cpp) + +add_executable(spirv-cross main.cpp) +target_link_libraries(spirv-cross spirv-cross-glsl spirv-cross-cpp spirv-cross-msl spirv-cross-core) +target_link_libraries(spirv-cross-glsl spirv-cross-core) +target_link_libraries(spirv-cross-msl spirv-cross-glsl) +target_link_libraries(spirv-cross-cpp spirv-cross-glsl) +target_include_directories(spirv-cross-core PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) + +set(spirv-compiler-options "") +set(spirv-compiler-defines "") + +if(SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS) + set(spirv-compiler-defines ${spirv-compiler-defines} SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS) +endif() + +# To specify special debug or optimization options, use +# -DCMAKE_CXX_COMPILE_FLAGS +# However, we require the C++11 dialect. +if (NOT "${MSVC}") + set(spirv-compiler-options ${spirv-compiler-options} -std=c++11 -Wall -Wextra -Werror -Wshadow) + set(spirv-compiler-defines ${spirv-compiler-defines} __STDC_LIMIT_MACROS) + + if(SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS) + set(spirv-compiler-options ${spirv-compiler-options} -fno-exceptions) + endif() +endif() + +target_compile_options(spirv-cross-core PRIVATE ${spirv-compiler-options}) +target_compile_options(spirv-cross-glsl PRIVATE ${spirv-compiler-options}) +target_compile_options(spirv-cross-msl PRIVATE ${spirv-compiler-options}) +target_compile_options(spirv-cross-cpp PRIVATE ${spirv-compiler-options}) +target_compile_options(spirv-cross PRIVATE ${spirv-compiler-options}) +target_compile_definitions(spirv-cross-core PRIVATE ${spirv-compiler-defines}) +target_compile_definitions(spirv-cross-glsl PRIVATE ${spirv-compiler-defines}) +target_compile_definitions(spirv-cross-msl PRIVATE ${spirv-compiler-defines}) +target_compile_definitions(spirv-cross-cpp PRIVATE ${spirv-compiler-defines}) +target_compile_definitions(spirv-cross PRIVATE ${spirv-compiler-defines}) + +# Set up tests, using only the simplest modes of the test_shaders +# script. You have to invoke the script manually to: +# - Update the reference files +# - Get cycle counts from malisc +# - Keep failing outputs +find_package(PythonInterp) +if(${PYTHONINTERP_FOUND} AND ${PYTHON_VERSION_MAJOR} GREATER 2) + add_test(NAME spirv-cross-test + COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test_shaders.py + ${CMAKE_CURRENT_SOURCE_DIR}/shaders) +else() + message(WARNING "Testing disabled. Could not find python3. If you have python3 installed try running " + "cmake with -DPYTHON_EXECUTABLE:FILEPATH=/path/to/python3 to help it find the executable") +endif() diff --git a/GLSL.std.450.h b/GLSL.std.450.h new file mode 100644 index 0000000000..54cc00e9a8 --- /dev/null +++ b/GLSL.std.450.h @@ -0,0 +1,131 @@ +/* +** Copyright (c) 2014-2016 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ + +#ifndef GLSLstd450_H +#define GLSLstd450_H + +static const int GLSLstd450Version = 100; +static const int GLSLstd450Revision = 3; + +enum GLSLstd450 { + GLSLstd450Bad = 0, // Don't use + + GLSLstd450Round = 1, + GLSLstd450RoundEven = 2, + GLSLstd450Trunc = 3, + GLSLstd450FAbs = 4, + GLSLstd450SAbs = 5, + GLSLstd450FSign = 6, + GLSLstd450SSign = 7, + GLSLstd450Floor = 8, + GLSLstd450Ceil = 9, + GLSLstd450Fract = 10, + + GLSLstd450Radians = 11, + GLSLstd450Degrees = 12, + GLSLstd450Sin = 13, + GLSLstd450Cos = 14, + GLSLstd450Tan = 15, + GLSLstd450Asin = 16, + GLSLstd450Acos = 17, + GLSLstd450Atan = 18, + GLSLstd450Sinh = 19, + GLSLstd450Cosh = 20, + GLSLstd450Tanh = 21, + GLSLstd450Asinh = 22, + GLSLstd450Acosh = 23, + GLSLstd450Atanh = 24, + GLSLstd450Atan2 = 25, + + GLSLstd450Pow = 26, + GLSLstd450Exp = 27, + GLSLstd450Log = 28, + GLSLstd450Exp2 = 29, + GLSLstd450Log2 = 30, + GLSLstd450Sqrt = 31, + GLSLstd450InverseSqrt = 32, + + GLSLstd450Determinant = 33, + GLSLstd450MatrixInverse = 34, + + GLSLstd450Modf = 35, // second operand needs an OpVariable to write to + GLSLstd450ModfStruct = 36, // no OpVariable operand + GLSLstd450FMin = 37, + GLSLstd450UMin = 38, + GLSLstd450SMin = 39, + GLSLstd450FMax = 40, + GLSLstd450UMax = 41, + GLSLstd450SMax = 42, + GLSLstd450FClamp = 43, + GLSLstd450UClamp = 44, + GLSLstd450SClamp = 45, + GLSLstd450FMix = 46, + GLSLstd450IMix = 47, // Reserved + GLSLstd450Step = 48, + GLSLstd450SmoothStep = 49, + + GLSLstd450Fma = 50, + GLSLstd450Frexp = 51, // second operand needs an OpVariable to write to + GLSLstd450FrexpStruct = 52, // no OpVariable operand + GLSLstd450Ldexp = 53, + + GLSLstd450PackSnorm4x8 = 54, + GLSLstd450PackUnorm4x8 = 55, + GLSLstd450PackSnorm2x16 = 56, + GLSLstd450PackUnorm2x16 = 57, + GLSLstd450PackHalf2x16 = 58, + GLSLstd450PackDouble2x32 = 59, + GLSLstd450UnpackSnorm2x16 = 60, + GLSLstd450UnpackUnorm2x16 = 61, + GLSLstd450UnpackHalf2x16 = 62, + GLSLstd450UnpackSnorm4x8 = 63, + GLSLstd450UnpackUnorm4x8 = 64, + GLSLstd450UnpackDouble2x32 = 65, + + GLSLstd450Length = 66, + GLSLstd450Distance = 67, + GLSLstd450Cross = 68, + GLSLstd450Normalize = 69, + GLSLstd450FaceForward = 70, + GLSLstd450Reflect = 71, + GLSLstd450Refract = 72, + + GLSLstd450FindILsb = 73, + GLSLstd450FindSMsb = 74, + GLSLstd450FindUMsb = 75, + + GLSLstd450InterpolateAtCentroid = 76, + GLSLstd450InterpolateAtSample = 77, + GLSLstd450InterpolateAtOffset = 78, + + GLSLstd450NMin = 79, + GLSLstd450NMax = 80, + GLSLstd450NClamp = 81, + + GLSLstd450Count +}; + +#endif // #ifndef GLSLstd450_H diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000..d645695673 --- /dev/null +++ b/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000000..0564b650ad --- /dev/null +++ b/Makefile @@ -0,0 +1,41 @@ +TARGET := spirv-cross + +SOURCES := $(wildcard spirv_*.cpp) +CLI_SOURCES := main.cpp + +OBJECTS := $(SOURCES:.cpp=.o) +CLI_OBJECTS := $(CLI_SOURCES:.cpp=.o) + +STATIC_LIB := lib$(TARGET).a + +DEPS := $(OBJECTS:.o=.d) $(CLI_OBJECTS:.o=.d) + +CXXFLAGS += -std=c++11 -Wall -Wextra -Wshadow -D__STDC_LIMIT_MACROS + +ifeq ($(DEBUG), 1) + CXXFLAGS += -O0 -g +else + CXXFLAGS += -O2 -DNDEBUG +endif + +ifeq ($(SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS), 1) + CXXFLAGS += -DSPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS -fno-exceptions +endif + +all: $(TARGET) + +-include $(DEPS) + +$(TARGET): $(CLI_OBJECTS) $(STATIC_LIB) + $(CXX) -o $@ $(CLI_OBJECTS) $(STATIC_LIB) $(LDFLAGS) + +$(STATIC_LIB): $(OBJECTS) + $(AR) rcs $@ $(OBJECTS) + +%.o: %.cpp + $(CXX) -c -o $@ $< $(CXXFLAGS) -MMD + +clean: + rm -f $(TARGET) $(OBJECTS) $(CLI_OBJECTS) $(STATIC_LIB) $(DEPS) + +.PHONY: clean diff --git a/README.md b/README.md new file mode 100644 index 0000000000..415f9523d0 --- /dev/null +++ b/README.md @@ -0,0 +1,217 @@ +# SPIRV-Cross + +SPIRV-Cross is a tool designed for parsing and converting SPIR-V to other shader languages. + +[![Build Status](https://travis-ci.org/KhronosGroup/SPIRV-Cross.svg?branch=master)](https://travis-ci.org/KhronosGroup/SPIRV-Cross) + +## Features + + - Convert SPIR-V to readable, usable and efficient GLSL + - Convert SPIR-V to readable, usable and efficient Metal Shading Language (MSL) [EXPERIMENTAL] + - Convert SPIR-V to debuggable C++ [EXPERIMENTAL] + - Reflection API to simplify the creation of Vulkan pipeline layouts + - Reflection API to modify and tweak OpDecorations + - Supports "all" of vertex, fragment, tessellation, geometry and compute shaders. + +SPIRV-Cross tries hard to emit readable and clean output from the SPIR-V. +The goal is to emit GLSL or MSL that looks like it was written by a human and not awkward IR/assembly-like code. + +NOTE: Individual features are expected to be mostly complete, but it is possible that certain obscure GLSL features are not yet supported. +However, most missing features are expected to be "trivial" improvements at this stage. + +## Building + +SPIRV-Cross has been tested on Linux, OSX and Windows. + +The make and CMake build flavors offer the option to treat exceptions as assertions. To disable exceptions for make just append SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS=1 to the command line. For CMake append -DSPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS=ON. By default exceptions are enabled. + +### Linux and macOS + +Just run `make` on the command line. A recent GCC (4.8+) or Clang (3.x+) compiler is required as SPIRV-Cross uses C++11 extensively. + +### Windows + +MinGW-w64 based compilation works with `make`, and an MSVC 2013 solution is also included. + +## Usage + +### Using the C++ API + +To perform reflection and convert to other shader languages you can use the SPIRV-Cross API. +For example: + +``` +#include "spirv_glsl.hpp" +#include +#include + +extern std::vector load_spirv_file(); + +int main() +{ + // Read SPIR-V from disk or similar. + std::vector spirv_binary = load_spirv_file(); + + spirv_cross::CompilerGLSL glsl(std::move(spirv_binary)); + + // The SPIR-V is now parsed, and we can perform reflection on it. + spirv_cross::ShaderResources resources = glsl.get_shader_resources(); + + // Get all sampled images in the shader. + for (auto &resource : resources.sampled_images) + { + unsigned set = glsl.get_decoration(resource.id, spv::DecorationDescriptorSet); + unsigned binding = glsl.get_decoration(resource.id, spv::DecorationBinding); + printf("Image %s at set = %u, binding = %u\n", resource.name.c_str(), set, binding); + + // Modify the decoration to prepare it for GLSL. + glsl.unset_decoration(resource.id, spv::DecorationDescriptorSet); + + // Some arbitrary remapping if we want. + glsl.set_decoration(resource.id, spv::DecorationBinding, set * 16 + binding); + } + + // Set some options. + spirv_cross::CompilerGLSL::Options options; + options.version = 310; + options.es = true; + glsl.set_options(options); + + // Compile to GLSL, ready to give to GL driver. + std::string source = glsl.compile(); +} +``` + +#### Integrating SPIRV-Cross in a custom build system + +To add SPIRV-Cross to your own codebase, just copy the source and header files from root directory +and build the relevant .cpp files you need. Make sure to build with C++11 support, e.g. `-std=c++11` in GCC and Clang. +Alternatively, the Makefile generates a libspirv-cross.a static library during build that can be linked in. + +### Creating a SPIR-V file from GLSL with glslang + +``` +glslangValidator -H -V -o test.spv test.frag +``` + +### Converting a SPIR-V file to GLSL ES + +``` +glslangValidator -H -V -o test.spv shaders/comp/basic.comp +./spirv-cross --version 310 --es test.spv +``` + +#### Converting to desktop GLSL + +``` +glslangValidator -H -V -o test.spv shaders/comp/basic.comp +./spirv-cross --version 330 test.spv --output test.comp +``` + +#### Disable prettifying optimizations + +``` +glslangValidator -H -V -o test.spv shaders/comp/basic.comp +./spirv-cross --version 310 --es test.spv --output test.comp --force-temporary +``` + +### Using shaders generated from C++ backend + +Please see `samples/cpp` where some GLSL shaders are compiled to SPIR-V, decompiled to C++ and run with test data. +Reading through the samples should explain how to use the C++ interface. +A simple Makefile is included to build all shaders in the directory. + +### Using SPIRV-Cross to output GLSL shaders from glslang HLSL + +#### Entry point + +When using SPIR-V shaders compiled from HLSL, there are some extra things you need to take care of. +First make sure that the entry point is used correctly. +If you forget to set the entry point correctly in glslangValidator (-e MyFancyEntryPoint), +you will likely encounter this error message: + +``` +Cannot end a function before ending the current block. +Likely cause: If this SPIR-V was created from glslang HLSL, make sure the entry point is valid. +``` + +#### Separate image samplers + +Another thing you need to remember is when using samplers and textures in HLSL these are separable, and not directly compatible with GLSL. If you need to use this with desktop GL/GLES, you need to call `Compiler::build_combined_image_samplers` first before calling `Compiler::compile`, or you will get an exception. + +``` +// From main.cpp +// Builds a mapping for all combinations of images and samplers. +compiler->build_combined_image_samplers(); + +// Give the remapped combined samplers new names. +// Here you can also set up decorations if you want (binding = #N). +for (auto &remap : compiler->get_combined_image_samplers()) +{ + compiler->set_name(remap.combined_id, join("SPIRV_Cross_Combined", compiler->get_name(remap.image_id), + compiler->get_name(remap.sampler_id))); +} +``` + +If your target is Vulkan GLSL, `--vulkan-semantics` will emit separate image samplers as you'd expect. +The command line client does this automatically, but if you're calling the library, you'll need to do this yourself. + +## Contributing + +Contributions to SPIRV-Cross are welcome. See Testing and Licensing sections for details. + +### Testing + +SPIRV-Cross maintains a test suite of shaders with reference output of how the output looks after going through a roundtrip through +glslangValidator then back through SPIRV-Cross again. The reference files are stored inside the repository in order to be able to track regressions. + +All pull requests should ensure that test output does not change unexpectedly. This can be tested with `./test_shaders.py shaders`. +However, when improving SPIRV-Cross there are of course legitimate cases where reference output should change. +In these cases, run `./test_shaders.py shaders --update` to update the reference files and include these changes as part of the pull request. +Always make sure you are running up to date glslangValidator as well as SPIRV-Tools when updating reference files. + +In short, the master branch should always be able to run `./test_shaders.py shaders` without failure. + +When adding support for new features to SPIRV-Cross, a new shader and reference file should be added which covers usage of the new shader features in question. + +### Licensing + +Contributors of new files should add a copyright header at the top of every new source code file with their copyright +along with the Apache 2.0 licensing stub. + +### Formatting + +SPIRV-Cross uses `clang-format` to automatically format code. +Please use `clang-format` with the style sheet found in `.clang-format` to automatically format code before submitting a pull request. + +To make things easy, the `format_all.sh` script can be used to format all +source files in the library. In this directory, run the following from the +command line: + + ./format_all.sh + +## ABI concerns + +### SPIR-V headers + +The current repository uses the latest SPIR-V and GLSL.std.450 headers. +SPIR-V files created from older headers could have ABI issues. + +## Regression testing + +In shaders/ a collection of shaders are maintained for purposes of regression testing. +The current reference output is contained in reference/. +`./test_shaders.py shaders` can be run to perform regression testing. + +See `./test_shaders.py --help` for more. + +### Updating regression tests + +When legitimate changes are found, use `--update` flag to update regression files. +Otherwise, `./test_shaders.py` will fail with error code. + +### Mali Offline Compiler cycle counts + +To obtain a CSV of static shader cycle counts before and after going through spirv-cross, add +`--malisc` flag to `./test_shaders`. This requires the Mali Offline Compiler to be installed in PATH. + diff --git a/format_all.sh b/format_all.sh new file mode 100755 index 0000000000..05efeb3eae --- /dev/null +++ b/format_all.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +for file in spirv_*.{cpp,hpp} include/spirv_cross/*.{hpp,h} samples/cpp/*.cpp main.cpp +do + echo "Formatting file: $file ..." + clang-format -style=file -i $file +done diff --git a/include/spirv_cross/barrier.hpp b/include/spirv_cross/barrier.hpp new file mode 100644 index 0000000000..db7cd87e83 --- /dev/null +++ b/include/spirv_cross/barrier.hpp @@ -0,0 +1,79 @@ +/* + * Copyright 2015-2016 ARM Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SPIRV_CROSS_BARRIER_HPP +#define SPIRV_CROSS_BARRIER_HPP + +#include +#include + +namespace spirv_cross +{ +class Barrier +{ +public: + Barrier() + { + count.store(0); + iteration.store(0); + } + + void set_release_divisor(unsigned divisor) + { + this->divisor = divisor; + } + + static inline void memoryBarrier() + { + std::atomic_thread_fence(std::memory_order_seq_cst); + } + + void reset_counter() + { + count.store(0); + iteration.store(0); + } + + void wait() + { + unsigned target_iteration = iteration.load(std::memory_order_relaxed) + 1; + // Overflows cleanly. + unsigned target_count = divisor * target_iteration; + + // Barriers don't enforce memory ordering. + // Be as relaxed about the barrier as we possibly can! + unsigned c = count.fetch_add(1u, std::memory_order_relaxed); + + if (c + 1 == target_count) + { + iteration.store(target_iteration, std::memory_order_relaxed); + } + else + { + // If we have more threads than the CPU, don't hog the CPU for very long periods of time. + while (iteration.load(std::memory_order_relaxed) != target_iteration) + std::this_thread::yield(); + } + } + +private: + unsigned divisor = 1; + std::atomic count; + std::atomic iteration; +}; +} + +#endif diff --git a/include/spirv_cross/external_interface.h b/include/spirv_cross/external_interface.h new file mode 100644 index 0000000000..3e7b1f8ba2 --- /dev/null +++ b/include/spirv_cross/external_interface.h @@ -0,0 +1,126 @@ +/* + * Copyright 2015-2016 ARM Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SPIRV_CROSS_EXTERNAL_INTERFACE_H +#define SPIRV_CROSS_EXTERNAL_INTERFACE_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +typedef struct spirv_cross_shader spirv_cross_shader_t; + +struct spirv_cross_interface +{ + spirv_cross_shader_t *(*construct)(void); + void (*destruct)(spirv_cross_shader_t *thiz); + void (*invoke)(spirv_cross_shader_t *thiz); +}; + +void spirv_cross_set_stage_input(spirv_cross_shader_t *thiz, unsigned location, void *data, size_t size); + +void spirv_cross_set_stage_output(spirv_cross_shader_t *thiz, unsigned location, void *data, size_t size); + +void spirv_cross_set_push_constant(spirv_cross_shader_t *thiz, void *data, size_t size); + +void spirv_cross_set_uniform_constant(spirv_cross_shader_t *thiz, unsigned location, void *data, size_t size); + +void spirv_cross_set_resource(spirv_cross_shader_t *thiz, unsigned set, unsigned binding, void **data, size_t size); + +const struct spirv_cross_interface *spirv_cross_get_interface(void); + +typedef enum spirv_cross_builtin { + SPIRV_CROSS_BUILTIN_POSITION = 0, + SPIRV_CROSS_BUILTIN_FRAG_COORD = 1, + SPIRV_CROSS_BUILTIN_WORK_GROUP_ID = 2, + SPIRV_CROSS_BUILTIN_NUM_WORK_GROUPS = 3, + SPIRV_CROSS_NUM_BUILTINS +} spirv_cross_builtin; + +void spirv_cross_set_builtin(spirv_cross_shader_t *thiz, spirv_cross_builtin builtin, void *data, size_t size); + +#define SPIRV_CROSS_NUM_DESCRIPTOR_SETS 4 +#define SPIRV_CROSS_NUM_DESCRIPTOR_BINDINGS 16 +#define SPIRV_CROSS_NUM_STAGE_INPUTS 16 +#define SPIRV_CROSS_NUM_STAGE_OUTPUTS 16 +#define SPIRV_CROSS_NUM_UNIFORM_CONSTANTS 32 + +enum spirv_cross_format +{ + SPIRV_CROSS_FORMAT_R8_UNORM = 0, + SPIRV_CROSS_FORMAT_R8G8_UNORM = 1, + SPIRV_CROSS_FORMAT_R8G8B8_UNORM = 2, + SPIRV_CROSS_FORMAT_R8G8B8A8_UNORM = 3, + + SPIRV_CROSS_NUM_FORMATS +}; + +enum spirv_cross_wrap +{ + SPIRV_CROSS_WRAP_CLAMP_TO_EDGE = 0, + SPIRV_CROSS_WRAP_REPEAT = 1, + + SPIRV_CROSS_NUM_WRAP +}; + +enum spirv_cross_filter +{ + SPIRV_CROSS_FILTER_NEAREST = 0, + SPIRV_CROSS_FILTER_LINEAR = 1, + + SPIRV_CROSS_NUM_FILTER +}; + +enum spirv_cross_mipfilter +{ + SPIRV_CROSS_MIPFILTER_BASE = 0, + SPIRV_CROSS_MIPFILTER_NEAREST = 1, + SPIRV_CROSS_MIPFILTER_LINEAR = 2, + + SPIRV_CROSS_NUM_MIPFILTER +}; + +struct spirv_cross_miplevel +{ + const void *data; + unsigned width, height; + size_t stride; +}; + +struct spirv_cross_sampler_info +{ + const struct spirv_cross_miplevel *mipmaps; + unsigned num_mipmaps; + + enum spirv_cross_format format; + enum spirv_cross_wrap wrap_s; + enum spirv_cross_wrap wrap_t; + enum spirv_cross_filter min_filter; + enum spirv_cross_filter mag_filter; + enum spirv_cross_mipfilter mip_filter; +}; + +typedef struct spirv_cross_sampler_2d spirv_cross_sampler_2d_t; +spirv_cross_sampler_2d_t *spirv_cross_create_sampler_2d(const struct spirv_cross_sampler_info *info); +void spirv_cross_destroy_sampler_2d(spirv_cross_sampler_2d_t *samp); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/spirv_cross/image.hpp b/include/spirv_cross/image.hpp new file mode 100644 index 0000000000..927c426ea7 --- /dev/null +++ b/include/spirv_cross/image.hpp @@ -0,0 +1,62 @@ +/* + * Copyright 2015-2016 ARM Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SPIRV_CROSS_IMAGE_HPP +#define SPIRV_CROSS_IMAGE_HPP + +#ifndef GLM_SWIZZLE +#define GLM_SWIZZLE +#endif + +#ifndef GLM_FORCE_RADIANS +#define GLM_FORCE_RADIANS +#endif + +#include + +namespace spirv_cross +{ +template +struct image2DBase +{ + virtual ~image2DBase() = default; + inline virtual T load(glm::ivec2 coord) + { + return T(0, 0, 0, 1); + } + inline virtual void store(glm::ivec2 coord, const T &v) + { + } +}; + +typedef image2DBase image2D; +typedef image2DBase iimage2D; +typedef image2DBase uimage2D; + +template +inline T imageLoad(const image2DBase &image, glm::ivec2 coord) +{ + return image.load(coord); +} + +template +void imageStore(image2DBase &image, glm::ivec2 coord, const T &value) +{ + image.store(coord, value); +} +} + +#endif diff --git a/include/spirv_cross/internal_interface.hpp b/include/spirv_cross/internal_interface.hpp new file mode 100644 index 0000000000..6d1d636a51 --- /dev/null +++ b/include/spirv_cross/internal_interface.hpp @@ -0,0 +1,603 @@ +/* + * Copyright 2015-2016 ARM Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SPIRV_CROSS_INTERNAL_INTERFACE_HPP +#define SPIRV_CROSS_INTERNAL_INTERFACE_HPP + +// This file must only be included by the shader generated by spirv-cross! + +#ifndef GLM_SWIZZLE +#define GLM_SWIZZLE +#endif + +#ifndef GLM_FORCE_RADIANS +#define GLM_FORCE_RADIANS +#endif + +#include + +#include "barrier.hpp" +#include "external_interface.h" +#include "image.hpp" +#include "sampler.hpp" +#include "thread_group.hpp" +#include +#include + +namespace internal +{ +// Adaptor helpers to adapt GLSL access chain syntax to C++. +// Don't bother with arrays of arrays on uniforms ... +// Would likely need horribly complex variadic template munging. + +template +struct Interface +{ + enum + { + ArraySize = 1, + Size = sizeof(T) + }; + + Interface() + : ptr(0) + { + } + T &get() + { + assert(ptr); + return *ptr; + } + + T *ptr; +}; + +// For array types, return a pointer instead. +template +struct Interface +{ + enum + { + ArraySize = U, + Size = U * sizeof(T) + }; + + Interface() + : ptr(0) + { + } + T *get() + { + assert(ptr); + return ptr; + } + + T *ptr; +}; + +// For case when array size is 1, avoid double dereference. +template +struct PointerInterface +{ + enum + { + ArraySize = 1, + Size = sizeof(T *) + }; + enum + { + PreDereference = true + }; + + PointerInterface() + : ptr(0) + { + } + + T &get() + { + assert(ptr); + return *ptr; + } + + T *ptr; +}; + +// Automatically converts a pointer down to reference to match GLSL syntax. +template +struct DereferenceAdaptor +{ + DereferenceAdaptor(T **ptr) + : ptr(ptr) + { + } + T &operator[](unsigned index) const + { + return *(ptr[index]); + } + T **ptr; +}; + +// We can't have a linear array of T* since T* can be an abstract type in case of samplers. +// We also need a list of pointers since we can have run-time length SSBOs. +template +struct PointerInterface +{ + enum + { + ArraySize = U, + Size = sizeof(T *) * U + }; + enum + { + PreDereference = false + }; + PointerInterface() + : ptr(0) + { + } + + DereferenceAdaptor get() + { + assert(ptr); + return DereferenceAdaptor(ptr); + } + + T **ptr; +}; + +// Resources can be more abstract and be unsized, +// so we need to have an array of pointers for those cases. +template +struct Resource : PointerInterface +{ +}; + +// POD with no unknown sizes, so we can express these as flat arrays. +template +struct UniformConstant : Interface +{ +}; +template +struct StageInput : Interface +{ +}; +template +struct StageOutput : Interface +{ +}; +template +struct PushConstant : Interface +{ +}; +} + +struct spirv_cross_shader +{ + struct PPSize + { + PPSize() + : ptr(0) + , size(0) + { + } + void **ptr; + size_t size; + }; + + struct PPSizeResource + { + PPSizeResource() + : ptr(0) + , size(0) + , pre_dereference(false) + { + } + void **ptr; + size_t size; + bool pre_dereference; + }; + + PPSizeResource resources[SPIRV_CROSS_NUM_DESCRIPTOR_SETS][SPIRV_CROSS_NUM_DESCRIPTOR_BINDINGS]; + PPSize stage_inputs[SPIRV_CROSS_NUM_STAGE_INPUTS]; + PPSize stage_outputs[SPIRV_CROSS_NUM_STAGE_OUTPUTS]; + PPSize uniform_constants[SPIRV_CROSS_NUM_UNIFORM_CONSTANTS]; + PPSize push_constant; + PPSize builtins[SPIRV_CROSS_NUM_BUILTINS]; + + template + void register_builtin(spirv_cross_builtin builtin, const U &value) + { + assert(!builtins[builtin].ptr); + + builtins[builtin].ptr = (void **)&value.ptr; + builtins[builtin].size = sizeof(*value.ptr) * U::ArraySize; + } + + void set_builtin(spirv_cross_builtin builtin, void *data, size_t size) + { + assert(builtins[builtin].ptr); + assert(size >= builtins[builtin].size); + + *builtins[builtin].ptr = data; + } + + template + void register_resource(const internal::Resource &value, unsigned set, unsigned binding) + { + assert(set < SPIRV_CROSS_NUM_DESCRIPTOR_SETS); + assert(binding < SPIRV_CROSS_NUM_DESCRIPTOR_BINDINGS); + assert(!resources[set][binding].ptr); + + resources[set][binding].ptr = (void **)&value.ptr; + resources[set][binding].size = internal::Resource::Size; + resources[set][binding].pre_dereference = internal::Resource::PreDereference; + } + + template + void register_stage_input(const internal::StageInput &value, unsigned location) + { + assert(location < SPIRV_CROSS_NUM_STAGE_INPUTS); + assert(!stage_inputs[location].ptr); + + stage_inputs[location].ptr = (void **)&value.ptr; + stage_inputs[location].size = internal::StageInput::Size; + } + + template + void register_stage_output(const internal::StageOutput &value, unsigned location) + { + assert(location < SPIRV_CROSS_NUM_STAGE_OUTPUTS); + assert(!stage_outputs[location].ptr); + + stage_outputs[location].ptr = (void **)&value.ptr; + stage_outputs[location].size = internal::StageOutput::Size; + } + + template + void register_uniform_constant(const internal::UniformConstant &value, unsigned location) + { + assert(location < SPIRV_CROSS_NUM_UNIFORM_CONSTANTS); + assert(!uniform_constants[location].ptr); + + uniform_constants[location].ptr = (void **)&value.ptr; + uniform_constants[location].size = internal::UniformConstant::Size; + } + + template + void register_push_constant(const internal::PushConstant &value) + { + assert(!push_constant.ptr); + + push_constant.ptr = (void **)&value.ptr; + push_constant.size = internal::PushConstant::Size; + } + + void set_stage_input(unsigned location, void *data, size_t size) + { + assert(location < SPIRV_CROSS_NUM_STAGE_INPUTS); + assert(stage_inputs[location].ptr); + assert(size >= stage_inputs[location].size); + + *stage_inputs[location].ptr = data; + } + + void set_stage_output(unsigned location, void *data, size_t size) + { + assert(location < SPIRV_CROSS_NUM_STAGE_OUTPUTS); + assert(stage_outputs[location].ptr); + assert(size >= stage_outputs[location].size); + + *stage_outputs[location].ptr = data; + } + + void set_uniform_constant(unsigned location, void *data, size_t size) + { + assert(location < SPIRV_CROSS_NUM_UNIFORM_CONSTANTS); + assert(uniform_constants[location].ptr); + assert(size >= uniform_constants[location].size); + + *uniform_constants[location].ptr = data; + } + + void set_push_constant(void *data, size_t size) + { + assert(push_constant.ptr); + assert(size >= push_constant.size); + + *push_constant.ptr = data; + } + + void set_resource(unsigned set, unsigned binding, void **data, size_t size) + { + assert(set < SPIRV_CROSS_NUM_DESCRIPTOR_SETS); + assert(binding < SPIRV_CROSS_NUM_DESCRIPTOR_BINDINGS); + assert(resources[set][binding].ptr); + assert(size >= resources[set][binding].size); + + // We're using the regular PointerInterface, dereference ahead of time. + if (resources[set][binding].pre_dereference) + *resources[set][binding].ptr = *data; + else + *resources[set][binding].ptr = data; + } +}; + +namespace spirv_cross +{ +template +struct BaseShader : spirv_cross_shader +{ + void invoke() + { + static_cast(this)->main(); + } +}; + +struct FragmentResources +{ + internal::StageOutput gl_FragCoord; + void init(spirv_cross_shader &s) + { + s.register_builtin(SPIRV_CROSS_BUILTIN_FRAG_COORD, gl_FragCoord); + } +#define gl_FragCoord __res->gl_FragCoord.get() +}; + +template +struct FragmentShader : BaseShader> +{ + inline void main() + { + impl.main(); + } + + FragmentShader() + { + resources.init(*this); + impl.__res = &resources; + } + + T impl; + Res resources; +}; + +struct VertexResources +{ + internal::StageOutput gl_Position; + void init(spirv_cross_shader &s) + { + s.register_builtin(SPIRV_CROSS_BUILTIN_POSITION, gl_Position); + } +#define gl_Position __res->gl_Position.get() +}; + +template +struct VertexShader : BaseShader> +{ + inline void main() + { + impl.main(); + } + + VertexShader() + { + resources.init(*this); + impl.__res = &resources; + } + + T impl; + Res resources; +}; + +struct TessEvaluationResources +{ + inline void init(spirv_cross_shader &) + { + } +}; + +template +struct TessEvaluationShader : BaseShader> +{ + inline void main() + { + impl.main(); + } + + TessEvaluationShader() + { + resources.init(*this); + impl.__res = &resources; + } + + T impl; + Res resources; +}; + +struct TessControlResources +{ + inline void init(spirv_cross_shader &) + { + } +}; + +template +struct TessControlShader : BaseShader> +{ + inline void main() + { + impl.main(); + } + + TessControlShader() + { + resources.init(*this); + impl.__res = &resources; + } + + T impl; + Res resources; +}; + +struct GeometryResources +{ + inline void init(spirv_cross_shader &) + { + } +}; + +template +struct GeometryShader : BaseShader> +{ + inline void main() + { + impl.main(); + } + + GeometryShader() + { + resources.init(*this); + impl.__res = &resources; + } + + T impl; + Res resources; +}; + +struct ComputeResources +{ + internal::StageInput gl_WorkGroupID__; + internal::StageInput gl_NumWorkGroups__; + void init(spirv_cross_shader &s) + { + s.register_builtin(SPIRV_CROSS_BUILTIN_WORK_GROUP_ID, gl_WorkGroupID__); + s.register_builtin(SPIRV_CROSS_BUILTIN_NUM_WORK_GROUPS, gl_NumWorkGroups__); + } +#define gl_WorkGroupID __res->gl_WorkGroupID__.get() +#define gl_NumWorkGroups __res->gl_NumWorkGroups__.get() + + Barrier barrier__; +#define barrier() __res->barrier__.wait() +}; + +struct ComputePrivateResources +{ + uint32_t gl_LocalInvocationIndex__; +#define gl_LocalInvocationIndex __priv_res.gl_LocalInvocationIndex__ + glm::uvec3 gl_LocalInvocationID__; +#define gl_LocalInvocationID __priv_res.gl_LocalInvocationID__ + glm::uvec3 gl_GlobalInvocationID__; +#define gl_GlobalInvocationID __priv_res.gl_GlobalInvocationID__ +}; + +template +struct ComputeShader : BaseShader> +{ + inline void main() + { + resources.barrier__.reset_counter(); + + for (unsigned z = 0; z < WorkGroupZ; z++) + for (unsigned y = 0; y < WorkGroupY; y++) + for (unsigned x = 0; x < WorkGroupX; x++) + impl[z][y][x].__priv_res.gl_GlobalInvocationID__ = + glm::uvec3(WorkGroupX, WorkGroupY, WorkGroupZ) * resources.gl_WorkGroupID__.get() + + glm::uvec3(x, y, z); + + group.run(); + group.wait(); + } + + ComputeShader() + : group(&impl[0][0][0]) + { + resources.init(*this); + resources.barrier__.set_release_divisor(WorkGroupX * WorkGroupY * WorkGroupZ); + + unsigned i = 0; + for (unsigned z = 0; z < WorkGroupZ; z++) + { + for (unsigned y = 0; y < WorkGroupY; y++) + { + for (unsigned x = 0; x < WorkGroupX; x++) + { + impl[z][y][x].__priv_res.gl_LocalInvocationID__ = glm::uvec3(x, y, z); + impl[z][y][x].__priv_res.gl_LocalInvocationIndex__ = i++; + impl[z][y][x].__res = &resources; + } + } + } + } + + T impl[WorkGroupZ][WorkGroupY][WorkGroupX]; + ThreadGroup group; + Res resources; +}; + +inline void memoryBarrierShared() +{ + Barrier::memoryBarrier(); +} +inline void memoryBarrier() +{ + Barrier::memoryBarrier(); +} +// TODO: Rest of the barriers. + +// Atomics +template +inline T atomicAdd(T &v, T a) +{ + static_assert(sizeof(std::atomic) == sizeof(T), "Cannot cast properly to std::atomic."); + + // We need explicit memory barriers in GLSL to enfore any ordering. + // FIXME: Can we really cast this? There is no other way I think ... + return std::atomic_fetch_add_explicit(reinterpret_cast *>(&v), a, std::memory_order_relaxed); +} +} + +void spirv_cross_set_stage_input(spirv_cross_shader_t *shader, unsigned location, void *data, size_t size) +{ + shader->set_stage_input(location, data, size); +} + +void spirv_cross_set_stage_output(spirv_cross_shader_t *shader, unsigned location, void *data, size_t size) +{ + shader->set_stage_output(location, data, size); +} + +void spirv_cross_set_uniform_constant(spirv_cross_shader_t *shader, unsigned location, void *data, size_t size) +{ + shader->set_uniform_constant(location, data, size); +} + +void spirv_cross_set_resource(spirv_cross_shader_t *shader, unsigned set, unsigned binding, void **data, size_t size) +{ + shader->set_resource(set, binding, data, size); +} + +void spirv_cross_set_push_constant(spirv_cross_shader_t *shader, void *data, size_t size) +{ + shader->set_push_constant(data, size); +} + +void spirv_cross_set_builtin(spirv_cross_shader_t *shader, spirv_cross_builtin builtin, void *data, size_t size) +{ + shader->set_builtin(builtin, data, size); +} + +#endif diff --git a/include/spirv_cross/sampler.hpp b/include/spirv_cross/sampler.hpp new file mode 100644 index 0000000000..e38e569aab --- /dev/null +++ b/include/spirv_cross/sampler.hpp @@ -0,0 +1,105 @@ +/* + * Copyright 2015-2016 ARM Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SPIRV_CROSS_SAMPLER_HPP +#define SPIRV_CROSS_SAMPLER_HPP + +#include + +namespace spirv_cross +{ +struct spirv_cross_sampler_2d +{ + inline virtual ~spirv_cross_sampler_2d() + { + } +}; + +template +struct sampler2DBase : spirv_cross_sampler_2d +{ + sampler2DBase(const spirv_cross_sampler_info *info) + { + mips.insert(mips.end(), info->mipmaps, info->mipmaps + info->num_mipmaps); + format = info->format; + wrap_s = info->wrap_s; + wrap_t = info->wrap_t; + min_filter = info->min_filter; + mag_filter = info->mag_filter; + mip_filter = info->mip_filter; + } + + inline virtual T sample(glm::vec2 uv, float bias) + { + return sampleLod(uv, bias); + } + + inline virtual T sampleLod(glm::vec2 uv, float lod) + { + if (mag_filter == SPIRV_CROSS_FILTER_NEAREST) + { + uv.x = wrap(uv.x, wrap_s, mips[0].width); + uv.y = wrap(uv.y, wrap_t, mips[0].height); + glm::vec2 uv_full = uv * glm::vec2(mips[0].width, mips[0].height); + + int x = int(uv_full.x); + int y = int(uv_full.y); + return sample(x, y, 0); + } + else + { + return T(0, 0, 0, 1); + } + } + + inline float wrap(float v, spirv_cross_wrap wrap, unsigned size) + { + switch (wrap) + { + case SPIRV_CROSS_WRAP_REPEAT: + return v - glm::floor(v); + case SPIRV_CROSS_WRAP_CLAMP_TO_EDGE: + { + float half = 0.5f / size; + return glm::clamp(v, half, 1.0f - half); + } + + default: + return 0.0f; + } + } + + std::vector mips; + spirv_cross_format format; + spirv_cross_wrap wrap_s; + spirv_cross_format wrap_t; + spirv_cross_filter min_filter; + spirv_cross_filter mag_filter; + spirv_cross_mipfilter mip_filter; +}; + +typedef sampler2DBase sampler2D; +typedef sampler2DBase isampler2D; +typedef sampler2DBase usampler2D; + +template +inline T texture(const sampler2DBase &samp, const glm::vec2 &uv, float bias = 0.0f) +{ + return samp.sample(uv, bias); +} +} + +#endif diff --git a/include/spirv_cross/thread_group.hpp b/include/spirv_cross/thread_group.hpp new file mode 100644 index 0000000000..7aab9595fc --- /dev/null +++ b/include/spirv_cross/thread_group.hpp @@ -0,0 +1,113 @@ +/* + * Copyright 2015-2016 ARM Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SPIRV_CROSS_THREAD_GROUP_HPP +#define SPIRV_CROSS_THREAD_GROUP_HPP + +#include +#include +#include + +namespace spirv_cross +{ +template +class ThreadGroup +{ +public: + ThreadGroup(T *impl) + { + for (unsigned i = 0; i < Size; i++) + workers[i].start(&impl[i]); + } + + void run() + { + for (auto &worker : workers) + worker.run(); + } + + void wait() + { + for (auto &worker : workers) + worker.wait(); + } + +private: + struct Thread + { + enum State + { + Idle, + Running, + Dying + }; + State state = Idle; + + void start(T *impl) + { + worker = std::thread([impl, this] { + for (;;) + { + { + std::unique_lock l{ lock }; + cond.wait(l, [this] { return state != Idle; }); + if (state == Dying) + break; + } + + impl->main(); + + std::lock_guard l{ lock }; + state = Idle; + cond.notify_one(); + } + }); + } + + void wait() + { + std::unique_lock l{ lock }; + cond.wait(l, [this] { return state == Idle; }); + } + + void run() + { + std::lock_guard l{ lock }; + state = Running; + cond.notify_one(); + } + + ~Thread() + { + if (worker.joinable()) + { + { + std::lock_guard l{ lock }; + state = Dying; + cond.notify_one(); + } + worker.join(); + } + } + std::thread worker; + std::condition_variable cond; + std::mutex lock; + }; + Thread workers[Size]; +}; +} + +#endif diff --git a/jni/Android.mk b/jni/Android.mk new file mode 100644 index 0000000000..ca5014d63a --- /dev/null +++ b/jni/Android.mk @@ -0,0 +1,12 @@ +LOCAL_PATH := $(call my-dir) + +include $(CLEAR_VARS) + +LOCAL_CFLAGS += -std=c++11 -Wall -Wextra +LOCAL_MODULE := spirv-cross +LOCAL_SRC_FILES := ../spirv_cfg.cpp ../spirv_cross.cpp ../spirv_glsl.cpp ../spirv_msl.cpp ../spirv_cpp.cpp +LOCAL_CPP_FEATURES := exceptions +LOCAL_ARM_MODE := arm +LOCAL_CFLAGS := -D__STDC_LIMIT_MACROS + +include $(BUILD_STATIC_LIBRARY) diff --git a/jni/Application.mk b/jni/Application.mk new file mode 100644 index 0000000000..9a2e77f2d1 --- /dev/null +++ b/jni/Application.mk @@ -0,0 +1,2 @@ +APP_STL := c++_static +APP_ABI := armeabi-v7a diff --git a/main.cpp b/main.cpp new file mode 100644 index 0000000000..840b0f2641 --- /dev/null +++ b/main.cpp @@ -0,0 +1,712 @@ +/* + * Copyright 2015-2016 ARM Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "spirv_cpp.hpp" +#include "spirv_msl.hpp" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace spv; +using namespace spirv_cross; +using namespace std; + +#ifdef SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS +#define THROW(x) \ + do \ + { \ + fprintf(stderr, "%s.", x); \ + exit(1); \ + } while (0) +#else +#define THROW(x) runtime_error(x) +#endif + +struct CLIParser; +struct CLICallbacks +{ + void add(const char *cli, const function &func) + { + callbacks[cli] = func; + } + unordered_map> callbacks; + function error_handler; + function default_handler; +}; + +struct CLIParser +{ + CLIParser(CLICallbacks cbs_, int argc_, char *argv_[]) + : cbs(move(cbs_)) + , argc(argc_) + , argv(argv_) + { + } + + bool parse() + { +#ifndef SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS + try +#endif + { + while (argc && !ended_state) + { + const char *next = *argv++; + argc--; + + if (*next != '-' && cbs.default_handler) + { + cbs.default_handler(next); + } + else + { + auto itr = cbs.callbacks.find(next); + if (itr == ::end(cbs.callbacks)) + { + THROW("Invalid argument"); + } + + itr->second(*this); + } + } + + return true; + } +#ifndef SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS + catch (...) + { + if (cbs.error_handler) + { + cbs.error_handler(); + } + return false; + } +#endif + } + + void end() + { + ended_state = true; + } + + uint32_t next_uint() + { + if (!argc) + { + THROW("Tried to parse uint, but nothing left in arguments"); + } + + uint32_t val = stoul(*argv); + if (val > numeric_limits::max()) + { + THROW("next_uint() out of range"); + } + + argc--; + argv++; + + return val; + } + + double next_double() + { + if (!argc) + { + THROW("Tried to parse double, but nothing left in arguments"); + } + + double val = stod(*argv); + + argc--; + argv++; + + return val; + } + + const char *next_string() + { + if (!argc) + { + THROW("Tried to parse string, but nothing left in arguments"); + } + + const char *ret = *argv; + argc--; + argv++; + return ret; + } + + CLICallbacks cbs; + int argc; + char **argv; + bool ended_state = false; +}; + +static vector read_spirv_file(const char *path) +{ + FILE *file = fopen(path, "rb"); + if (!file) + { + fprintf(stderr, "Failed to open SPIRV file: %s\n", path); + return {}; + } + + fseek(file, 0, SEEK_END); + long len = ftell(file) / sizeof(uint32_t); + rewind(file); + + vector spirv(len); + if (fread(spirv.data(), sizeof(uint32_t), len, file) != size_t(len)) + spirv.clear(); + + fclose(file); + return spirv; +} + +static bool write_string_to_file(const char *path, const char *string) +{ + FILE *file = fopen(path, "w"); + if (!file) + { + fprintf(file, "Failed to write file: %s\n", path); + return false; + } + + fprintf(file, "%s", string); + fclose(file); + return true; +} + +static void print_resources(const Compiler &compiler, const char *tag, const vector &resources) +{ + fprintf(stderr, "%s\n", tag); + fprintf(stderr, "=============\n\n"); + for (auto &res : resources) + { + auto &type = compiler.get_type(res.type_id); + auto mask = compiler.get_decoration_mask(res.id); + + // If we don't have a name, use the fallback for the type instead of the variable + // for SSBOs and UBOs since those are the only meaningful names to use externally. + // Push constant blocks are still accessed by name and not block name, even though they are technically Blocks. + bool is_push_constant = compiler.get_storage_class(res.id) == StorageClassPushConstant; + bool is_block = (compiler.get_decoration_mask(type.self) & + ((1ull << DecorationBlock) | (1ull << DecorationBufferBlock))) != 0; + bool is_sized_block = is_block && (compiler.get_storage_class(res.id) == StorageClassUniform || + compiler.get_storage_class(res.id) == StorageClassUniformConstant); + uint32_t fallback_id = !is_push_constant && is_block ? res.base_type_id : res.id; + + uint32_t block_size = 0; + if (is_sized_block) + block_size = compiler.get_declared_struct_size(compiler.get_type(res.base_type_id)); + + string array; + for (auto arr : type.array) + array = join("[", arr ? convert_to_string(arr) : "", "]") + array; + + fprintf(stderr, " ID %03u : %s%s", res.id, + !res.name.empty() ? res.name.c_str() : compiler.get_fallback_name(fallback_id).c_str(), array.c_str()); + + if (mask & (1ull << DecorationLocation)) + fprintf(stderr, " (Location : %u)", compiler.get_decoration(res.id, DecorationLocation)); + if (mask & (1ull << DecorationDescriptorSet)) + fprintf(stderr, " (Set : %u)", compiler.get_decoration(res.id, DecorationDescriptorSet)); + if (mask & (1ull << DecorationBinding)) + fprintf(stderr, " (Binding : %u)", compiler.get_decoration(res.id, DecorationBinding)); + if (mask & (1ull << DecorationInputAttachmentIndex)) + fprintf(stderr, " (Attachment : %u)", compiler.get_decoration(res.id, DecorationInputAttachmentIndex)); + if (is_sized_block) + fprintf(stderr, " (BlockSize : %u bytes)", block_size); + fprintf(stderr, "\n"); + } + fprintf(stderr, "=============\n\n"); +} + +static const char *execution_model_to_str(spv::ExecutionModel model) +{ + switch (model) + { + case spv::ExecutionModelVertex: + return "vertex"; + case spv::ExecutionModelTessellationControl: + return "tessellation control"; + case ExecutionModelTessellationEvaluation: + return "tessellation evaluation"; + case ExecutionModelGeometry: + return "geometry"; + case ExecutionModelFragment: + return "fragment"; + case ExecutionModelGLCompute: + return "compute"; + default: + return "???"; + } +} + +static void print_resources(const Compiler &compiler, const ShaderResources &res) +{ + uint64_t modes = compiler.get_execution_mode_mask(); + + fprintf(stderr, "Entry points:\n"); + auto entry_points = compiler.get_entry_points(); + for (auto &e : entry_points) + fprintf(stderr, " %s (%s)\n", e.c_str(), execution_model_to_str(compiler.get_entry_point(e).model)); + fprintf(stderr, "\n"); + + fprintf(stderr, "Execution modes:\n"); + for (unsigned i = 0; i < 64; i++) + { + if (!(modes & (1ull << i))) + continue; + + auto mode = static_cast(i); + uint32_t arg0 = compiler.get_execution_mode_argument(mode, 0); + uint32_t arg1 = compiler.get_execution_mode_argument(mode, 1); + uint32_t arg2 = compiler.get_execution_mode_argument(mode, 2); + + switch (static_cast(i)) + { + case ExecutionModeInvocations: + fprintf(stderr, " Invocations: %u\n", arg0); + break; + + case ExecutionModeLocalSize: + fprintf(stderr, " LocalSize: (%u, %u, %u)\n", arg0, arg1, arg2); + break; + + case ExecutionModeOutputVertices: + fprintf(stderr, " OutputVertices: %u\n", arg0); + break; + +#define CHECK_MODE(m) \ + case ExecutionMode##m: \ + fprintf(stderr, " %s\n", #m); \ + break + CHECK_MODE(SpacingEqual); + CHECK_MODE(SpacingFractionalEven); + CHECK_MODE(SpacingFractionalOdd); + CHECK_MODE(VertexOrderCw); + CHECK_MODE(VertexOrderCcw); + CHECK_MODE(PixelCenterInteger); + CHECK_MODE(OriginUpperLeft); + CHECK_MODE(OriginLowerLeft); + CHECK_MODE(EarlyFragmentTests); + CHECK_MODE(PointMode); + CHECK_MODE(Xfb); + CHECK_MODE(DepthReplacing); + CHECK_MODE(DepthGreater); + CHECK_MODE(DepthLess); + CHECK_MODE(DepthUnchanged); + CHECK_MODE(LocalSizeHint); + CHECK_MODE(InputPoints); + CHECK_MODE(InputLines); + CHECK_MODE(InputLinesAdjacency); + CHECK_MODE(Triangles); + CHECK_MODE(InputTrianglesAdjacency); + CHECK_MODE(Quads); + CHECK_MODE(Isolines); + CHECK_MODE(OutputPoints); + CHECK_MODE(OutputLineStrip); + CHECK_MODE(OutputTriangleStrip); + CHECK_MODE(VecTypeHint); + CHECK_MODE(ContractionOff); + + default: + break; + } + } + fprintf(stderr, "\n"); + + print_resources(compiler, "subpass inputs", res.subpass_inputs); + print_resources(compiler, "inputs", res.stage_inputs); + print_resources(compiler, "outputs", res.stage_outputs); + print_resources(compiler, "textures", res.sampled_images); + print_resources(compiler, "separate images", res.separate_images); + print_resources(compiler, "separate samplers", res.separate_samplers); + print_resources(compiler, "images", res.storage_images); + print_resources(compiler, "ssbos", res.storage_buffers); + print_resources(compiler, "ubos", res.uniform_buffers); + print_resources(compiler, "push", res.push_constant_buffers); + print_resources(compiler, "counters", res.atomic_counters); +} + +static void print_push_constant_resources(const Compiler &compiler, const vector &res) +{ + for (auto &block : res) + { + auto ranges = compiler.get_active_buffer_ranges(block.id); + fprintf(stderr, "Active members in buffer: %s\n", + !block.name.empty() ? block.name.c_str() : compiler.get_fallback_name(block.id).c_str()); + + fprintf(stderr, "==================\n\n"); + for (auto &range : ranges) + { + const auto &name = compiler.get_member_name(block.base_type_id, range.index); + + fprintf(stderr, "Member #%3u (%s): Offset: %4u, Range: %4u\n", range.index, + !name.empty() ? name.c_str() : compiler.get_fallback_member_name(range.index).c_str(), + unsigned(range.offset), unsigned(range.range)); + } + fprintf(stderr, "==================\n\n"); + } +} + +static void print_spec_constants(const Compiler &compiler) +{ + auto spec_constants = compiler.get_specialization_constants(); + fprintf(stderr, "Specialization constants\n"); + fprintf(stderr, "==================\n\n"); + for (auto &c : spec_constants) + fprintf(stderr, "ID: %u, Spec ID: %u\n", c.id, c.constant_id); + fprintf(stderr, "==================\n\n"); +} + +struct PLSArg +{ + PlsFormat format; + string name; +}; + +struct Remap +{ + string src_name; + string dst_name; + unsigned components; +}; + +struct VariableTypeRemap +{ + string variable_name; + string new_variable_type; +}; + +struct CLIArguments +{ + const char *input = nullptr; + const char *output = nullptr; + const char *cpp_interface_name = nullptr; + uint32_t version = 0; + bool es = false; + bool set_version = false; + bool set_es = false; + bool dump_resources = false; + bool force_temporary = false; + bool flatten_ubo = false; + bool fixup = false; + vector pls_in; + vector pls_out; + vector remaps; + vector extensions; + vector variable_type_remaps; + string entry; + + uint32_t iterations = 1; + bool cpp = false; + bool metal = false; + bool vulkan_semantics = false; + bool remove_unused = false; + bool cfg_analysis = true; +}; + +static void print_help() +{ + fprintf(stderr, "Usage: spirv-cross [--output ] [SPIR-V file] [--es] [--no-es] [--no-cfg-analysis] " + "[--version ] [--dump-resources] [--help] [--force-temporary] [--cpp] [--cpp-interface-name ] " + "[--metal] [--vulkan-semantics] [--flatten-ubo] [--fixup-clipspace] [--iterations iter] [--pls-in " + "format input-name] [--pls-out format output-name] [--remap source_name target_name components] " + "[--extension ext] [--entry name] [--remove-unused-variables] " + "[--remap-variable-type ]\n"); +} + +static bool remap_generic(Compiler &compiler, const vector &resources, const Remap &remap) +{ + auto itr = + find_if(begin(resources), end(resources), [&remap](const Resource &res) { return res.name == remap.src_name; }); + + if (itr != end(resources)) + { + compiler.set_remapped_variable_state(itr->id, true); + compiler.set_name(itr->id, remap.dst_name); + compiler.set_subpass_input_remapped_components(itr->id, remap.components); + return true; + } + else + return false; +} + +static vector remap_pls(const vector &pls_variables, const vector &resources, + const vector *secondary_resources) +{ + vector ret; + + for (auto &pls : pls_variables) + { + bool found = false; + for (auto &res : resources) + { + if (res.name == pls.name) + { + ret.push_back({ res.id, pls.format }); + found = true; + break; + } + } + + if (!found && secondary_resources) + { + for (auto &res : *secondary_resources) + { + if (res.name == pls.name) + { + ret.push_back({ res.id, pls.format }); + found = true; + break; + } + } + } + + if (!found) + fprintf(stderr, "Did not find stage input/output/target with name \"%s\".\n", pls.name.c_str()); + } + + return ret; +} + +static PlsFormat pls_format(const char *str) +{ + if (!strcmp(str, "r11f_g11f_b10f")) + return PlsR11FG11FB10F; + else if (!strcmp(str, "r32f")) + return PlsR32F; + else if (!strcmp(str, "rg16f")) + return PlsRG16F; + else if (!strcmp(str, "rg16")) + return PlsRG16; + else if (!strcmp(str, "rgb10_a2")) + return PlsRGB10A2; + else if (!strcmp(str, "rgba8")) + return PlsRGBA8; + else if (!strcmp(str, "rgba8i")) + return PlsRGBA8I; + else if (!strcmp(str, "rgba8ui")) + return PlsRGBA8UI; + else if (!strcmp(str, "rg16i")) + return PlsRG16I; + else if (!strcmp(str, "rgb10_a2ui")) + return PlsRGB10A2UI; + else if (!strcmp(str, "rg16ui")) + return PlsRG16UI; + else if (!strcmp(str, "r32ui")) + return PlsR32UI; + else + return PlsNone; +} + +int main(int argc, char *argv[]) +{ + CLIArguments args; + CLICallbacks cbs; + + cbs.add("--help", [](CLIParser &parser) { + print_help(); + parser.end(); + }); + cbs.add("--output", [&args](CLIParser &parser) { args.output = parser.next_string(); }); + cbs.add("--es", [&args](CLIParser &) { + args.es = true; + args.set_es = true; + }); + cbs.add("--no-es", [&args](CLIParser &) { + args.es = false; + args.set_es = true; + }); + cbs.add("--version", [&args](CLIParser &parser) { + args.version = parser.next_uint(); + args.set_version = true; + }); + cbs.add("--no-cfg-analysis", [&args](CLIParser &) { args.cfg_analysis = false; }); + cbs.add("--dump-resources", [&args](CLIParser &) { args.dump_resources = true; }); + cbs.add("--force-temporary", [&args](CLIParser &) { args.force_temporary = true; }); + cbs.add("--flatten-ubo", [&args](CLIParser &) { args.flatten_ubo = true; }); + cbs.add("--fixup-clipspace", [&args](CLIParser &) { args.fixup = true; }); + cbs.add("--iterations", [&args](CLIParser &parser) { args.iterations = parser.next_uint(); }); + cbs.add("--cpp", [&args](CLIParser &) { args.cpp = true; }); + cbs.add("--cpp-interface-name", [&args](CLIParser &parser) { args.cpp_interface_name = parser.next_string(); }); + cbs.add("--metal", [&args](CLIParser &) { args.metal = true; }); + cbs.add("--vulkan-semantics", [&args](CLIParser &) { args.vulkan_semantics = true; }); + cbs.add("--extension", [&args](CLIParser &parser) { args.extensions.push_back(parser.next_string()); }); + cbs.add("--entry", [&args](CLIParser &parser) { args.entry = parser.next_string(); }); + cbs.add("--remap", [&args](CLIParser &parser) { + string src = parser.next_string(); + string dst = parser.next_string(); + uint32_t components = parser.next_uint(); + args.remaps.push_back({ move(src), move(dst), components }); + }); + + cbs.add("--remap-variable-type", [&args](CLIParser &parser) { + string var_name = parser.next_string(); + string new_type = parser.next_string(); + args.variable_type_remaps.push_back({ move(var_name), move(new_type) }); + }); + + cbs.add("--pls-in", [&args](CLIParser &parser) { + auto fmt = pls_format(parser.next_string()); + auto name = parser.next_string(); + args.pls_in.push_back({ move(fmt), move(name) }); + }); + cbs.add("--pls-out", [&args](CLIParser &parser) { + auto fmt = pls_format(parser.next_string()); + auto name = parser.next_string(); + args.pls_out.push_back({ move(fmt), move(name) }); + }); + + cbs.add("--remove-unused-variables", [&args](CLIParser &) { args.remove_unused = true; }); + + cbs.default_handler = [&args](const char *value) { args.input = value; }; + cbs.error_handler = [] { print_help(); }; + + CLIParser parser{ move(cbs), argc - 1, argv + 1 }; + if (!parser.parse()) + { + return EXIT_FAILURE; + } + else if (parser.ended_state) + { + return EXIT_SUCCESS; + } + + if (!args.input) + { + fprintf(stderr, "Didn't specify input file.\n"); + print_help(); + return EXIT_FAILURE; + } + + unique_ptr compiler; + + bool combined_image_samplers = false; + + if (args.cpp) + { + compiler = unique_ptr(new CompilerCPP(read_spirv_file(args.input))); + if (args.cpp_interface_name) + static_cast(compiler.get())->set_interface_name(args.cpp_interface_name); + } + else if (args.metal) + compiler = unique_ptr(new CompilerMSL(read_spirv_file(args.input))); + else + { + combined_image_samplers = !args.vulkan_semantics; + compiler = unique_ptr(new CompilerGLSL(read_spirv_file(args.input))); + } + + if (!args.variable_type_remaps.empty()) + { + auto remap_cb = [&](const SPIRType &, const string &name, string &out) -> void { + for (const VariableTypeRemap &remap : args.variable_type_remaps) + if (name == remap.variable_name) + out = remap.new_variable_type; + }; + + compiler->set_variable_type_remap_callback(move(remap_cb)); + } + + if (!args.entry.empty()) + compiler->set_entry_point(args.entry); + + if (!args.set_version && !compiler->get_options().version) + { + fprintf(stderr, "Didn't specify GLSL version and SPIR-V did not specify language.\n"); + print_help(); + return EXIT_FAILURE; + } + + CompilerGLSL::Options opts = compiler->get_options(); + if (args.set_version) + opts.version = args.version; + if (args.set_es) + opts.es = args.es; + opts.force_temporary = args.force_temporary; + opts.vulkan_semantics = args.vulkan_semantics; + opts.vertex.fixup_clipspace = args.fixup; + opts.cfg_analysis = args.cfg_analysis; + compiler->set_options(opts); + + ShaderResources res; + if (args.remove_unused) + { + auto active = compiler->get_active_interface_variables(); + res = compiler->get_shader_resources(active); + compiler->set_enabled_interface_variables(move(active)); + } + else + res = compiler->get_shader_resources(); + + if (args.flatten_ubo) + for (auto &ubo : res.uniform_buffers) + compiler->flatten_interface_block(ubo.id); + + auto pls_inputs = remap_pls(args.pls_in, res.stage_inputs, &res.subpass_inputs); + auto pls_outputs = remap_pls(args.pls_out, res.stage_outputs, nullptr); + compiler->remap_pixel_local_storage(move(pls_inputs), move(pls_outputs)); + + for (auto &ext : args.extensions) + compiler->require_extension(ext); + + for (auto &remap : args.remaps) + { + if (remap_generic(*compiler, res.stage_inputs, remap)) + continue; + if (remap_generic(*compiler, res.stage_outputs, remap)) + continue; + if (remap_generic(*compiler, res.subpass_inputs, remap)) + continue; + } + + if (args.dump_resources) + { + print_resources(*compiler, res); + print_push_constant_resources(*compiler, res.push_constant_buffers); + print_spec_constants(*compiler); + } + + if (combined_image_samplers) + { + compiler->build_combined_image_samplers(); + // Give the remapped combined samplers new names. + for (auto &remap : compiler->get_combined_image_samplers()) + { + compiler->set_name(remap.combined_id, join("SPIRV_Cross_Combined", compiler->get_name(remap.image_id), + compiler->get_name(remap.sampler_id))); + } + } + + string glsl; + for (uint32_t i = 0; i < args.iterations; i++) + glsl = compiler->compile(); + + if (args.output) + write_string_to_file(args.output, glsl.c_str()); + else + printf("%s", glsl.c_str()); +} diff --git a/msvc/SPIRV-Cross.sln b/msvc/SPIRV-Cross.sln new file mode 100644 index 0000000000..c265ec3347 --- /dev/null +++ b/msvc/SPIRV-Cross.sln @@ -0,0 +1,28 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Express 2013 for Windows Desktop +VisualStudioVersion = 12.0.31101.0 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "SPIRV-Cross", "SPIRV-Cross.vcxproj", "{977E3701-1A21-4425-B7E5-6BDF5EA062CD}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Win32 = Debug|Win32 + Debug|x64 = Debug|x64 + Release|Win32 = Release|Win32 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {977E3701-1A21-4425-B7E5-6BDF5EA062CD}.Debug|Win32.ActiveCfg = Debug|Win32 + {977E3701-1A21-4425-B7E5-6BDF5EA062CD}.Debug|Win32.Build.0 = Debug|Win32 + {977E3701-1A21-4425-B7E5-6BDF5EA062CD}.Debug|x64.ActiveCfg = Debug|x64 + {977E3701-1A21-4425-B7E5-6BDF5EA062CD}.Debug|x64.Build.0 = Debug|x64 + {977E3701-1A21-4425-B7E5-6BDF5EA062CD}.Release|Win32.ActiveCfg = Release|Win32 + {977E3701-1A21-4425-B7E5-6BDF5EA062CD}.Release|Win32.Build.0 = Release|Win32 + {977E3701-1A21-4425-B7E5-6BDF5EA062CD}.Release|x64.ActiveCfg = Release|x64 + {977E3701-1A21-4425-B7E5-6BDF5EA062CD}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/msvc/SPIRV-Cross.vcxproj b/msvc/SPIRV-Cross.vcxproj new file mode 100644 index 0000000000..6d029efc86 --- /dev/null +++ b/msvc/SPIRV-Cross.vcxproj @@ -0,0 +1,146 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + {977E3701-1A21-4425-B7E5-6BDF5EA062CD} + SPIRV-Cross + + + + Application + true + v120 + MultiByte + + + Application + true + v120 + MultiByte + + + Application + false + v120 + true + MultiByte + + + Application + false + v120 + true + MultiByte + + + + + + + + + + + + + + + + + + + + + Level3 + Disabled + true + _CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + MultiThreadedDebugDLL + + + true + + + + + Level3 + Disabled + true + _CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + MultiThreadedDebugDLL + + + true + + + + + Level3 + MaxSpeed + true + true + true + _CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + MultiThreadedDLL + + + true + true + true + + + + + Level3 + MaxSpeed + true + true + true + _CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + MultiThreadedDLL + + + true + true + true + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/msvc/SPIRV-Cross.vcxproj.filters b/msvc/SPIRV-Cross.vcxproj.filters new file mode 100644 index 0000000000..a0afa7e525 --- /dev/null +++ b/msvc/SPIRV-Cross.vcxproj.filters @@ -0,0 +1,63 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + diff --git a/reference/shaders/asm/comp/bitcast_iadd.asm.comp b/reference/shaders/asm/comp/bitcast_iadd.asm.comp new file mode 100644 index 0000000000..2d27240a90 --- /dev/null +++ b/reference/shaders/asm/comp/bitcast_iadd.asm.comp @@ -0,0 +1,27 @@ +#version 310 es +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) restrict buffer _3 +{ + ivec4 _0; + uvec4 _1; +} _5; + +layout(binding = 1, std430) restrict buffer _4 +{ + uvec4 _0; + ivec4 _1; +} _6; + +void main() +{ + _6._0 = _5._1 + uvec4(_5._0); + _6._0 = uvec4(_5._0) + _5._1; + _6._0 = _5._1 + _5._1; + _6._0 = uvec4(_5._0 + _5._0); + _6._1 = ivec4(_5._1 + _5._1); + _6._1 = _5._0 + _5._0; + _6._1 = ivec4(_5._1) + _5._0; + _6._1 = _5._0 + ivec4(_5._1); +} + diff --git a/reference/shaders/asm/comp/bitcast_iequal.asm.comp b/reference/shaders/asm/comp/bitcast_iequal.asm.comp new file mode 100644 index 0000000000..2663a70ab4 --- /dev/null +++ b/reference/shaders/asm/comp/bitcast_iequal.asm.comp @@ -0,0 +1,31 @@ +#version 310 es +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer _3 +{ + ivec4 _0; + uvec4 _1; +} _5; + +layout(binding = 1, std430) buffer _4 +{ + uvec4 _0; + ivec4 _1; +} _6; + +void main() +{ + bvec4 _34 = equal(ivec4(_5._1), _5._0); + bvec4 _35 = equal(_5._0, ivec4(_5._1)); + bvec4 _36 = equal(_5._1, _5._1); + bvec4 _37 = equal(_5._0, _5._0); + _6._0 = mix(uvec4(0u), uvec4(1u), _34); + _6._0 = mix(uvec4(0u), uvec4(1u), _35); + _6._0 = mix(uvec4(0u), uvec4(1u), _36); + _6._0 = mix(uvec4(0u), uvec4(1u), _37); + _6._1 = mix(ivec4(0), ivec4(1), _34); + _6._1 = mix(ivec4(0), ivec4(1), _35); + _6._1 = mix(ivec4(0), ivec4(1), _36); + _6._1 = mix(ivec4(0), ivec4(1), _37); +} + diff --git a/reference/shaders/asm/comp/bitcast_sar.asm.comp b/reference/shaders/asm/comp/bitcast_sar.asm.comp new file mode 100644 index 0000000000..7699302167 --- /dev/null +++ b/reference/shaders/asm/comp/bitcast_sar.asm.comp @@ -0,0 +1,27 @@ +#version 310 es +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer _3 +{ + ivec4 _0; + uvec4 _1; +} _5; + +layout(binding = 1, std430) buffer _4 +{ + uvec4 _0; + ivec4 _1; +} _6; + +void main() +{ + _6._0 = uvec4(ivec4(_5._1) >> _5._0); + _6._0 = uvec4(_5._0 >> ivec4(_5._1)); + _6._0 = uvec4(ivec4(_5._1) >> ivec4(_5._1)); + _6._0 = uvec4(_5._0 >> _5._0); + _6._1 = ivec4(_5._1) >> ivec4(_5._1); + _6._1 = _5._0 >> _5._0; + _6._1 = ivec4(_5._1) >> _5._0; + _6._1 = _5._0 >> ivec4(_5._1); +} + diff --git a/reference/shaders/asm/comp/bitcast_sdiv.asm.comp b/reference/shaders/asm/comp/bitcast_sdiv.asm.comp new file mode 100644 index 0000000000..afa5e41d1a --- /dev/null +++ b/reference/shaders/asm/comp/bitcast_sdiv.asm.comp @@ -0,0 +1,27 @@ +#version 310 es +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer _3 +{ + ivec4 _0; + uvec4 _1; +} _5; + +layout(binding = 1, std430) buffer _4 +{ + uvec4 _0; + ivec4 _1; +} _6; + +void main() +{ + _6._0 = uvec4(ivec4(_5._1) / _5._0); + _6._0 = uvec4(_5._0 / ivec4(_5._1)); + _6._0 = uvec4(ivec4(_5._1) / ivec4(_5._1)); + _6._0 = uvec4(_5._0 / _5._0); + _6._1 = ivec4(_5._1) / ivec4(_5._1); + _6._1 = _5._0 / _5._0; + _6._1 = ivec4(_5._1) / _5._0; + _6._1 = _5._0 / ivec4(_5._1); +} + diff --git a/reference/shaders/asm/comp/bitcast_slr.asm.comp b/reference/shaders/asm/comp/bitcast_slr.asm.comp new file mode 100644 index 0000000000..6de9201737 --- /dev/null +++ b/reference/shaders/asm/comp/bitcast_slr.asm.comp @@ -0,0 +1,27 @@ +#version 310 es +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer _3 +{ + ivec4 _0; + uvec4 _1; +} _5; + +layout(binding = 1, std430) buffer _4 +{ + uvec4 _0; + ivec4 _1; +} _6; + +void main() +{ + _6._0 = _5._1 >> uvec4(_5._0); + _6._0 = uvec4(_5._0) >> _5._1; + _6._0 = _5._1 >> _5._1; + _6._0 = uvec4(_5._0) >> uvec4(_5._0); + _6._1 = ivec4(_5._1 >> _5._1); + _6._1 = ivec4(uvec4(_5._0) >> uvec4(_5._0)); + _6._1 = ivec4(_5._1 >> uvec4(_5._0)); + _6._1 = ivec4(uvec4(_5._0) >> _5._1); +} + diff --git a/reference/shaders/asm/comp/bitcast_udiv.asm.comp b/reference/shaders/asm/comp/bitcast_udiv.asm.comp new file mode 100644 index 0000000000..aa9d68fe85 --- /dev/null +++ b/reference/shaders/asm/comp/bitcast_udiv.asm.comp @@ -0,0 +1,27 @@ +#version 310 es +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer _3 +{ + ivec4 _0; + uvec4 _1; +} _5; + +layout(binding = 1, std430) buffer _4 +{ + uvec4 _0; + ivec4 _1; +} _6; + +void main() +{ + _6._0 = _5._1 / uvec4(_5._0); + _6._0 = uvec4(_5._0) / _5._1; + _6._0 = _5._1 / _5._1; + _6._0 = uvec4(_5._0) / uvec4(_5._0); + _6._1 = ivec4(_5._1 / _5._1); + _6._1 = ivec4(uvec4(_5._0) / uvec4(_5._0)); + _6._1 = ivec4(_5._1 / uvec4(_5._0)); + _6._1 = ivec4(uvec4(_5._0) / _5._1); +} + diff --git a/reference/shaders/asm/comp/multiple-entry.asm.comp b/reference/shaders/asm/comp/multiple-entry.asm.comp new file mode 100644 index 0000000000..65a528cd4b --- /dev/null +++ b/reference/shaders/asm/comp/multiple-entry.asm.comp @@ -0,0 +1,27 @@ +#version 310 es +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) restrict buffer _6 +{ + ivec4 _0; + uvec4 _1; +} _8; + +layout(binding = 1, std430) restrict buffer _7 +{ + uvec4 _0; + ivec4 _1; +} _9; + +void main() +{ + _9._0 = _8._1 + uvec4(_8._0); + _9._0 = uvec4(_8._0) + _8._1; + _9._0 = _8._1 + _8._1; + _9._0 = uvec4(_8._0 + _8._0); + _9._1 = ivec4(_8._1 + _8._1); + _9._1 = _8._0 + _8._0; + _9._1 = ivec4(_8._1) + _8._0; + _9._1 = _8._0 + ivec4(_8._1); +} + diff --git a/reference/shaders/asm/comp/name-alias.asm.invalid.comp b/reference/shaders/asm/comp/name-alias.asm.invalid.comp new file mode 100644 index 0000000000..4928e7c4bd --- /dev/null +++ b/reference/shaders/asm/comp/name-alias.asm.invalid.comp @@ -0,0 +1,37 @@ +#version 310 es +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +struct alias +{ + vec3 alias[100]; +}; + +struct alias_1 +{ + vec4 alias; + vec2 alias_1[10]; + alias alias_2[2]; +}; + +struct alias_2 +{ + vec4 alias; + alias_1 alias_1; +}; + +layout(binding = 0, std430) buffer _10 +{ + alias_2 alias; +} alias_3; + +layout(binding = 1, std140) buffer _15 +{ + alias_2 alias; +} alias_4; + +void main() +{ + alias_2 alias_5 = alias_3.alias; + alias_4.alias = alias_5; +} + diff --git a/reference/shaders/asm/comp/quantize.asm.comp b/reference/shaders/asm/comp/quantize.asm.comp new file mode 100644 index 0000000000..c089213800 --- /dev/null +++ b/reference/shaders/asm/comp/quantize.asm.comp @@ -0,0 +1,19 @@ +#version 310 es +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer SSBO0 +{ + float scalar; + vec2 vec2_val; + vec3 vec3_val; + vec4 vec4_val; +} _4; + +void main() +{ + _4.scalar = unpackHalf2x16(packHalf2x16(vec2(_4.scalar))).x; + _4.vec2_val = unpackHalf2x16(packHalf2x16(_4.vec2_val)); + _4.vec3_val = vec3(unpackHalf2x16(packHalf2x16(_4.vec3_val.xy)), unpackHalf2x16(packHalf2x16(_4.vec3_val.zz)).x); + _4.vec4_val = vec4(unpackHalf2x16(packHalf2x16(_4.vec4_val.xy)), unpackHalf2x16(packHalf2x16(_4.vec4_val.zw))); +} + diff --git a/reference/shaders/asm/frag/invalidation.asm.frag b/reference/shaders/asm/frag/invalidation.asm.frag new file mode 100644 index 0000000000..1cdc9761f9 --- /dev/null +++ b/reference/shaders/asm/frag/invalidation.asm.frag @@ -0,0 +1,15 @@ +#version 450 + +in float v0; +in float v1; +out float FragColor; + +void main() +{ + float a = v0; + float b = v1; + float _17 = a; + a = v1; + FragColor = (_17 + b) * b; +} + diff --git a/reference/shaders/comp/atomic.comp b/reference/shaders/comp/atomic.comp new file mode 100644 index 0000000000..89b1351c0c --- /dev/null +++ b/reference/shaders/comp/atomic.comp @@ -0,0 +1,49 @@ +#version 310 es +#extension GL_OES_shader_image_atomic : require +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 2, std430) buffer SSBO +{ + uint u32; + int i32; +} ssbo; + +layout(binding = 0, r32ui) uniform highp uimage2D uImage; +layout(binding = 1, r32i) uniform highp iimage2D iImage; + +void main() +{ + uint _19 = imageAtomicAdd(uImage, ivec2(1, 5), 1u); + uint _27 = imageAtomicAdd(uImage, ivec2(1, 5), 1u); + imageStore(iImage, ivec2(1, 6), ivec4(int(_27))); + uint _32 = imageAtomicOr(uImage, ivec2(1, 5), 1u); + uint _34 = imageAtomicXor(uImage, ivec2(1, 5), 1u); + uint _36 = imageAtomicAnd(uImage, ivec2(1, 5), 1u); + uint _38 = imageAtomicMin(uImage, ivec2(1, 5), 1u); + uint _40 = imageAtomicMax(uImage, ivec2(1, 5), 1u); + uint _44 = imageAtomicCompSwap(uImage, ivec2(1, 5), 10u, 2u); + int _47 = imageAtomicAdd(iImage, ivec2(1, 6), 1); + int _49 = imageAtomicOr(iImage, ivec2(1, 6), 1); + int _51 = imageAtomicXor(iImage, ivec2(1, 6), 1); + int _53 = imageAtomicAnd(iImage, ivec2(1, 6), 1); + int _55 = imageAtomicMin(iImage, ivec2(1, 6), 1); + int _57 = imageAtomicMax(iImage, ivec2(1, 6), 1); + int _61 = imageAtomicCompSwap(iImage, ivec2(1, 5), 10, 2); + uint _68 = atomicAdd(ssbo.u32, 1u); + uint _70 = atomicOr(ssbo.u32, 1u); + uint _72 = atomicXor(ssbo.u32, 1u); + uint _74 = atomicAnd(ssbo.u32, 1u); + uint _76 = atomicMin(ssbo.u32, 1u); + uint _78 = atomicMax(ssbo.u32, 1u); + uint _80 = atomicExchange(ssbo.u32, 1u); + uint _82 = atomicCompSwap(ssbo.u32, 10u, 2u); + int _85 = atomicAdd(ssbo.i32, 1); + int _87 = atomicOr(ssbo.i32, 1); + int _89 = atomicXor(ssbo.i32, 1); + int _91 = atomicAnd(ssbo.i32, 1); + int _93 = atomicMin(ssbo.i32, 1); + int _95 = atomicMax(ssbo.i32, 1); + int _97 = atomicExchange(ssbo.i32, 1); + int _99 = atomicCompSwap(ssbo.i32, 10, 2); +} + diff --git a/reference/shaders/comp/bake_gradient.comp b/reference/shaders/comp/bake_gradient.comp new file mode 100644 index 0000000000..41facc8c3c --- /dev/null +++ b/reference/shaders/comp/bake_gradient.comp @@ -0,0 +1,39 @@ +#version 310 es +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +layout(binding = 4, std140) uniform UBO +{ + vec4 uInvSize; + vec4 uScale; +} _46; + +layout(binding = 0) uniform mediump sampler2D uHeight; +layout(binding = 1) uniform mediump sampler2D uDisplacement; +layout(binding = 2, rgba16f) uniform mediump writeonly image2D iHeightDisplacement; +layout(binding = 3, rgba16f) uniform mediump writeonly image2D iGradJacobian; + +mediump float jacobian(mediump vec2 dDdx, mediump vec2 dDdy) +{ + return ((1.0 + dDdx.x) * (1.0 + dDdy.y)) - (dDdx.y * dDdy.x); +} + +void main() +{ + vec4 uv = (vec2(gl_GlobalInvocationID.xy) * _46.uInvSize.xy).xyxy + (_46.uInvSize * 0.5); + float h = textureLod(uHeight, uv.xy, 0.0).x; + float x0 = textureLodOffset(uHeight, uv.xy, 0.0, ivec2(-1, 0)).x; + float x1 = textureLodOffset(uHeight, uv.xy, 0.0, ivec2(1, 0)).x; + float y0 = textureLodOffset(uHeight, uv.xy, 0.0, ivec2(0, -1)).x; + float y1 = textureLodOffset(uHeight, uv.xy, 0.0, ivec2(0, 1)).x; + vec2 grad = (_46.uScale.xy * 0.5) * vec2(x1 - x0, y1 - y0); + vec2 displacement = textureLod(uDisplacement, uv.zw, 0.0).xy * 1.2000000476837158203125; + vec2 dDdx = (textureLodOffset(uDisplacement, uv.zw, 0.0, ivec2(1, 0)).xy - textureLodOffset(uDisplacement, uv.zw, 0.0, ivec2(-1, 0)).xy) * 0.60000002384185791015625; + vec2 dDdy = (textureLodOffset(uDisplacement, uv.zw, 0.0, ivec2(0, 1)).xy - textureLodOffset(uDisplacement, uv.zw, 0.0, ivec2(0, -1)).xy) * 0.60000002384185791015625; + vec2 param = dDdx * _46.uScale.z; + vec2 param_1 = dDdy * _46.uScale.z; + float j = jacobian(param, param_1); + displacement = vec2(0.0); + imageStore(iHeightDisplacement, ivec2(gl_GlobalInvocationID.xy), vec4(h, displacement, 0.0)); + imageStore(iGradJacobian, ivec2(gl_GlobalInvocationID.xy), vec4(grad, j, 0.0)); +} + diff --git a/reference/shaders/comp/basic.comp b/reference/shaders/comp/basic.comp new file mode 100644 index 0000000000..ca2503bd3b --- /dev/null +++ b/reference/shaders/comp/basic.comp @@ -0,0 +1,29 @@ +#version 310 es +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer SSBO +{ + vec4 in_data[]; +} _23; + +layout(binding = 1, std430) buffer SSBO2 +{ + vec4 out_data[]; +} _45; + +layout(binding = 2, std430) buffer SSBO3 +{ + uint counter; +} _48; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + vec4 idata = _23.in_data[ident]; + if (dot(idata, vec4(1.0, 5.0, 6.0, 2.0)) > 8.19999980926513671875) + { + uint _52 = atomicAdd(_48.counter, 1u); + _45.out_data[_52] = idata; + } +} + diff --git a/reference/shaders/comp/casts.comp b/reference/shaders/comp/casts.comp new file mode 100644 index 0000000000..973668676a --- /dev/null +++ b/reference/shaders/comp/casts.comp @@ -0,0 +1,19 @@ +#version 310 es +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 1, std430) buffer SSBO1 +{ + ivec4 outputs[]; +} _21; + +layout(binding = 0, std430) buffer SSBO0 +{ + ivec4 inputs[]; +} _27; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + _21.outputs[ident] = mix(ivec4(0), ivec4(1), notEqual((_27.inputs[ident] & ivec4(3)), ivec4(uvec4(0u)))); +} + diff --git a/reference/shaders/comp/cfg.comp b/reference/shaders/comp/cfg.comp new file mode 100644 index 0000000000..707968e7e5 --- /dev/null +++ b/reference/shaders/comp/cfg.comp @@ -0,0 +1,81 @@ +#version 310 es +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer SSBO +{ + float data; +} _11; + +void test() +{ + float m; + if (_11.data != 0.0) + { + float tmp = 10.0; + _11.data = tmp; + } + else + { + float tmp_1 = 15.0; + _11.data = tmp_1; + } + if (_11.data != 0.0) + { + float e; + if (_11.data != 5.0) + { + if (_11.data != 6.0) + { + e = 10.0; + } + } + else + { + e = 20.0; + } + } + switch (int(_11.data)) + { + case 0: + { + float tmp_2 = 20.0; + _11.data = tmp_2; + break; + } + case 1: + { + float tmp_3 = 30.0; + _11.data = tmp_3; + break; + } + } + float f; + switch (int(_11.data)) + { + case 0: + { + f = 30.0; + break; + } + case 1: + { + f = 40.0; + break; + } + } + float h; + for (int i = 0; i < 20; i++, h += 10.0) + { + } + _11.data = h; + do + { + } while (m != 20.0); + _11.data = m; +} + +void main() +{ + test(); +} + diff --git a/reference/shaders/comp/composite-construct.comp b/reference/shaders/comp/composite-construct.comp new file mode 100644 index 0000000000..91bb5348f5 --- /dev/null +++ b/reference/shaders/comp/composite-construct.comp @@ -0,0 +1,38 @@ +#version 310 es +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +struct Composite +{ + vec4 a[2]; + vec4 b[2]; +}; + +layout(binding = 0, std430) buffer SSBO0 +{ + vec4 as[]; +} _41; + +layout(binding = 1, std430) buffer SSBO1 +{ + vec4 bs[]; +} _55; + +vec4 summe(vec4 values[3][2]) +{ + return ((values[0][0] + values[2][1]) + values[0][1]) + values[1][0]; +} + +void main() +{ + vec4 values[2] = vec4[](_41.as[gl_GlobalInvocationID.x], _55.bs[gl_GlobalInvocationID.x]); + vec4 const_values[2] = vec4[](vec4(10.0), vec4(30.0)); + vec4 copy_values[2] = const_values; + vec4 copy_values2[2] = values; + vec4 param[3][2] = vec4[][](values, copy_values, copy_values2); + _41.as[gl_GlobalInvocationID.x] = summe(param); + Composite c = Composite(values, copy_values); + float arrayofarray[2][3] = float[][](float[](1.0, 1.0, 1.0), float[](2.0, 2.0, 2.0)); + float b = 10.0; + float values_scalar[4] = float[](b, b, b, b); +} + diff --git a/reference/shaders/comp/culling.comp b/reference/shaders/comp/culling.comp new file mode 100644 index 0000000000..cd284b96c6 --- /dev/null +++ b/reference/shaders/comp/culling.comp @@ -0,0 +1,29 @@ +#version 310 es +layout(local_size_x = 4, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer SSBO +{ + float in_data[]; +} _22; + +layout(binding = 1, std430) buffer SSBO2 +{ + float out_data[]; +} _38; + +layout(binding = 2, std430) buffer SSBO3 +{ + uint count; +} _41; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + float idata = _22.in_data[ident]; + if (idata > 12.0) + { + uint _45 = atomicAdd(_41.count, 1u); + _38.out_data[_45] = idata; + } +} + diff --git a/reference/shaders/comp/defer-parens.comp b/reference/shaders/comp/defer-parens.comp new file mode 100644 index 0000000000..cf98529316 --- /dev/null +++ b/reference/shaders/comp/defer-parens.comp @@ -0,0 +1,21 @@ +#version 310 es +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer SSBO +{ + vec4 data; + int index; +} _13; + +void main() +{ + vec4 d = _13.data; + _13.data = vec4(d.x, d.yz + vec2(10.0), d.w); + _13.data = (d + d) + d; + _13.data = (d.yz + vec2(10.0)).xxyy; + float t = (d.yz + vec2(10.0)).y; + _13.data = vec4(t); + t = (d.zw + vec2(10.0))[_13.index]; + _13.data = vec4(t); +} + diff --git a/reference/shaders/comp/dowhile.comp b/reference/shaders/comp/dowhile.comp new file mode 100644 index 0000000000..16ba4001b8 --- /dev/null +++ b/reference/shaders/comp/dowhile.comp @@ -0,0 +1,29 @@ +#version 310 es +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer SSBO +{ + mat4 mvp; + vec4 in_data[]; +} _28; + +layout(binding = 1, std430) buffer SSBO2 +{ + vec4 out_data[]; +} _52; + +int i; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + i = 0; + vec4 idat = _28.in_data[ident]; + do + { + idat = _28.mvp * idat; + i++; + } while (i < 16); + _52.out_data[ident] = idat; +} + diff --git a/reference/shaders/comp/generate_height.comp b/reference/shaders/comp/generate_height.comp new file mode 100644 index 0000000000..a2128dd8a1 --- /dev/null +++ b/reference/shaders/comp/generate_height.comp @@ -0,0 +1,96 @@ +#version 310 es +layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer Distribution +{ + vec2 distribution[]; +} _190; + +layout(binding = 2, std140) uniform UBO +{ + vec4 uModTime; +} _218; + +layout(binding = 1, std430) buffer HeightmapFFT +{ + uint heights[]; +} _276; + +uvec2 workaround_mix(uvec2 a, uvec2 b, bvec2 sel) +{ + uint _137; + if (sel.x) + { + _137 = b.x; + } + else + { + _137 = a.x; + } + uint _147 = _137; + uint _148; + if (sel.y) + { + _148 = b.y; + } + else + { + _148 = a.y; + } + return uvec2(_147, _148); +} + +vec2 alias(vec2 i, vec2 N) +{ + return mix(i, i - N, greaterThan(i, N * 0.5)); +} + +vec2 cmul(vec2 a, vec2 b) +{ + vec2 r3 = a.yx; + vec2 r1 = b.xx; + vec2 R0 = a * r1; + vec2 r2 = b.yy; + vec2 R1 = r2 * r3; + return R0 + vec2(-R1.x, R1.y); +} + +uint pack2(vec2 v) +{ + return packHalf2x16(v); +} + +void generate_heightmap() +{ + uvec2 N = uvec2(64u, 1u) * gl_NumWorkGroups.xy; + uvec2 i = gl_GlobalInvocationID.xy; + uvec2 param = N - i; + uvec2 param_1 = uvec2(0u); + bvec2 param_2 = equal(i, uvec2(0u)); + uvec2 wi = workaround_mix(param, param_1, param_2); + vec2 a = _190.distribution[(i.y * N.x) + i.x]; + vec2 b = _190.distribution[(wi.y * N.x) + wi.x]; + vec2 param_3 = vec2(i); + vec2 param_4 = vec2(N); + vec2 k = _218.uModTime.xy * alias(param_3, param_4); + float k_len = length(k); + float w = sqrt(9.81000041961669921875 * k_len) * _218.uModTime.z; + float cw = cos(w); + float sw = sin(w); + vec2 param_5 = a; + vec2 param_6 = vec2(cw, sw); + a = cmul(param_5, param_6); + vec2 param_7 = b; + vec2 param_8 = vec2(cw, sw); + b = cmul(param_7, param_8); + b = vec2(b.x, -b.y); + vec2 res = a + b; + vec2 param_9 = res; + _276.heights[(i.y * N.x) + i.x] = pack2(param_9); +} + +void main() +{ + generate_heightmap(); +} + diff --git a/reference/shaders/comp/image.comp b/reference/shaders/comp/image.comp new file mode 100644 index 0000000000..7479905003 --- /dev/null +++ b/reference/shaders/comp/image.comp @@ -0,0 +1,12 @@ +#version 310 es +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, rgba8) uniform mediump readonly image2D uImageIn; +layout(binding = 1, rgba8) uniform mediump writeonly image2D uImageOut; + +void main() +{ + vec4 v = imageLoad(uImageIn, ivec2(gl_GlobalInvocationID.xy) + imageSize(uImageIn)); + imageStore(uImageOut, ivec2(gl_GlobalInvocationID.xy), v); +} + diff --git a/reference/shaders/comp/inout-struct.invalid.comp b/reference/shaders/comp/inout-struct.invalid.comp new file mode 100644 index 0000000000..1aaa48f2ae --- /dev/null +++ b/reference/shaders/comp/inout-struct.invalid.comp @@ -0,0 +1,65 @@ +#version 310 es +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +struct Foo +{ + vec4 a; + vec4 b; + vec4 c; + vec4 d; +}; + +layout(binding = 1, std430) buffer SSBO2 +{ + vec4 data[]; +} indata; + +layout(binding = 0, std430) buffer SSBO +{ + vec4 data[]; +} outdata; + +layout(binding = 2, std430) buffer SSBO3 +{ + Foo foos[]; +} foobar; + +void baz(out Foo foo) +{ + uint ident = gl_GlobalInvocationID.x; + foo.a = indata.data[(4u * ident) + 0u]; + foo.b = indata.data[(4u * ident) + 1u]; + foo.c = indata.data[(4u * ident) + 2u]; + foo.d = indata.data[(4u * ident) + 3u]; +} + +void meow(inout Foo foo) +{ + foo.a += vec4(10.0); + foo.b += vec4(20.0); + foo.c += vec4(30.0); + foo.d += vec4(40.0); +} + +vec4 bar(Foo foo) +{ + return ((foo.a + foo.b) + foo.c) + foo.d; +} + +void main() +{ + Foo param; + baz(param); + Foo foo = param; + Foo param_1 = foo; + meow(param_1); + foo = param_1; + Foo param_2 = foo; + Foo param_3; + param_3.a = foobar.foos[gl_GlobalInvocationID.x].a; + param_3.b = foobar.foos[gl_GlobalInvocationID.x].b; + param_3.c = foobar.foos[gl_GlobalInvocationID.x].c; + param_3.d = foobar.foos[gl_GlobalInvocationID.x].d; + outdata.data[gl_GlobalInvocationID.x] = bar(param_2) + bar(param_3); +} + diff --git a/reference/shaders/comp/insert.comp b/reference/shaders/comp/insert.comp new file mode 100644 index 0000000000..6c10020c3c --- /dev/null +++ b/reference/shaders/comp/insert.comp @@ -0,0 +1,19 @@ +#version 310 es +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer SSBO +{ + vec4 out_data[]; +} _27; + +void main() +{ + vec4 v; + v.x = 10.0; + v.y = 30.0; + v.z = 70.0; + v.w = 90.0; + _27.out_data[gl_GlobalInvocationID.x] = v; + _27.out_data[gl_GlobalInvocationID.x].y = 20.0; +} + diff --git a/reference/shaders/comp/loop.comp b/reference/shaders/comp/loop.comp new file mode 100644 index 0000000000..9853acaa35 --- /dev/null +++ b/reference/shaders/comp/loop.comp @@ -0,0 +1,105 @@ +#version 310 es +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer SSBO +{ + mat4 mvp; + vec4 in_data[]; +} _24; + +layout(binding = 1, std430) buffer SSBO2 +{ + vec4 out_data[]; +} _177; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + vec4 idat = _24.in_data[ident]; + int k = 0; + uint i = 0u; + if (idat.y == 20.0) + { + do + { + k *= 2; + i++; + } while (i < ident); + } + switch (k) + { + case 10: + { + for (;;) + { + i++; + if (i > 10u) + { + break; + } + continue; + } + break; + } + default: + { + for (;;) + { + i += 2u; + if (i > 20u) + { + break; + } + continue; + } + break; + } + } + while (k < 10) + { + idat *= 2.0; + k++; + } + for (uint i_1 = 0u; i_1 < 16u; i_1++, k++) + { + for (uint j = 0u; j < 30u; j++) + { + idat = _24.mvp * idat; + } + } + k = 0; + for (;;) + { + k++; + if (k > 10) + { + k += 2; + } + else + { + k += 3; + continue; + } + k += 10; + continue; + } + k = 0; + do + { + k++; + } while (k > 10); + int l = 0; + for (;;) + { + if (l == 5) + { + l++; + continue; + } + idat += vec4(1.0); + l++; + continue; + } + _177.out_data[ident] = idat; +} + diff --git a/reference/shaders/comp/mat3.comp b/reference/shaders/comp/mat3.comp new file mode 100644 index 0000000000..dc302396a5 --- /dev/null +++ b/reference/shaders/comp/mat3.comp @@ -0,0 +1,14 @@ +#version 310 es +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 1, std430) buffer SSBO2 +{ + mat3 out_data[]; +} _22; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + _22.out_data[ident] = mat3(vec3(10.0), vec3(20.0), vec3(40.0)); +} + diff --git a/reference/shaders/comp/mod.comp b/reference/shaders/comp/mod.comp new file mode 100644 index 0000000000..dfb9cf4c70 --- /dev/null +++ b/reference/shaders/comp/mod.comp @@ -0,0 +1,24 @@ +#version 310 es +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer SSBO +{ + vec4 in_data[]; +} _23; + +layout(binding = 1, std430) buffer SSBO2 +{ + vec4 out_data[]; +} _33; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + vec4 v = mod(_23.in_data[ident], _33.out_data[ident]); + _33.out_data[ident] = v; + uvec4 vu = floatBitsToUint(_23.in_data[ident]) % floatBitsToUint(_33.out_data[ident]); + _33.out_data[ident] = uintBitsToFloat(vu); + ivec4 vi = floatBitsToInt(_23.in_data[ident]) % floatBitsToInt(_33.out_data[ident]); + _33.out_data[ident] = intBitsToFloat(vi); +} + diff --git a/reference/shaders/comp/modf.comp b/reference/shaders/comp/modf.comp new file mode 100644 index 0000000000..721d812f10 --- /dev/null +++ b/reference/shaders/comp/modf.comp @@ -0,0 +1,22 @@ +#version 310 es +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer SSBO +{ + vec4 in_data[]; +} _23; + +layout(binding = 1, std430) buffer SSBO2 +{ + vec4 out_data[]; +} _35; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + vec4 i; + vec4 _31 = modf(_23.in_data[ident], i); + vec4 v = _31; + _35.out_data[ident] = v; +} + diff --git a/reference/shaders/comp/return.comp b/reference/shaders/comp/return.comp new file mode 100644 index 0000000000..20d61d25dc --- /dev/null +++ b/reference/shaders/comp/return.comp @@ -0,0 +1,34 @@ +#version 310 es +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 1, std430) buffer SSBO2 +{ + vec4 out_data[]; +} _27; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + if (ident == 2u) + { + _27.out_data[ident] = vec4(20.0); + } + else + { + if (ident == 4u) + { + _27.out_data[ident] = vec4(10.0); + return; + } + } + for (int i = 0; i < 20; i++) + { + if (i == 10) + { + break; + } + return; + } + _27.out_data[ident] = vec4(10.0); +} + diff --git a/reference/shaders/comp/shared.comp b/reference/shaders/comp/shared.comp new file mode 100644 index 0000000000..e2ff604563 --- /dev/null +++ b/reference/shaders/comp/shared.comp @@ -0,0 +1,25 @@ +#version 310 es +layout(local_size_x = 4, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer SSBO +{ + float in_data[]; +} _22; + +layout(binding = 1, std430) buffer SSBO2 +{ + float out_data[]; +} _44; + +shared float sShared[4]; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + float idata = _22.in_data[ident]; + sShared[gl_LocalInvocationIndex] = idata; + memoryBarrierShared(); + barrier(); + _44.out_data[ident] = sShared[(4u - gl_LocalInvocationIndex) - 1u]; +} + diff --git a/reference/shaders/comp/ssbo-array.comp b/reference/shaders/comp/ssbo-array.comp new file mode 100644 index 0000000000..e773bd093c --- /dev/null +++ b/reference/shaders/comp/ssbo-array.comp @@ -0,0 +1,14 @@ +#version 310 es +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer SSBO +{ + vec4 data[]; +} ssbos[2]; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + ssbos[1].data[ident] = ssbos[0].data[ident]; +} + diff --git a/reference/shaders/comp/struct-layout.comp b/reference/shaders/comp/struct-layout.comp new file mode 100644 index 0000000000..1cbf5dfb97 --- /dev/null +++ b/reference/shaders/comp/struct-layout.comp @@ -0,0 +1,24 @@ +#version 310 es +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +struct Foo +{ + mat4 m; +}; + +layout(binding = 1, std430) buffer SSBO2 +{ + Foo out_data[]; +} _23; + +layout(binding = 0, std430) buffer SSBO +{ + Foo in_data[]; +} _30; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + _23.out_data[ident].m = _30.in_data[ident].m * _30.in_data[ident].m; +} + diff --git a/reference/shaders/comp/struct-packing.comp b/reference/shaders/comp/struct-packing.comp new file mode 100644 index 0000000000..a6ea8d581e --- /dev/null +++ b/reference/shaders/comp/struct-packing.comp @@ -0,0 +1,96 @@ +#version 310 es +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +struct S0 +{ + vec2 a[1]; + float b; +}; + +struct S1 +{ + vec3 a; + float b; +}; + +struct S2 +{ + vec3 a[1]; + float b; +}; + +struct S3 +{ + vec2 a; + float b; +}; + +struct S4 +{ + vec2 c; +}; + +struct Content +{ + S0 m0s[1]; + S1 m1s[1]; + S2 m2s[1]; + S0 m0; + S1 m1; + S2 m2; + S3 m3; + float m4; + S4 m3s[8]; +}; + +layout(binding = 1, std430) buffer SSBO1 +{ + Content content; + Content content1[2]; + Content content2; + mat2 m0; + mat2 m1; + mat2x3 m2[4]; + mat3x2 m3; + layout(row_major) mat2 m4; + layout(row_major) mat2 m5[9]; + layout(row_major) mat2x3 m6[4][2]; + layout(row_major) mat3x2 m7; + float array[]; +} ssbo_430; + +layout(binding = 0, std140) buffer SSBO0 +{ + Content content; + Content content1[2]; + Content content2; + float array[]; +} ssbo_140; + +void main() +{ + ssbo_430.content.m0s[0].a[0] = ssbo_140.content.m0s[0].a[0]; + ssbo_430.content.m0s[0].b = ssbo_140.content.m0s[0].b; + ssbo_430.content.m1s[0].a = ssbo_140.content.m1s[0].a; + ssbo_430.content.m1s[0].b = ssbo_140.content.m1s[0].b; + ssbo_430.content.m2s[0].a[0] = ssbo_140.content.m2s[0].a[0]; + ssbo_430.content.m2s[0].b = ssbo_140.content.m2s[0].b; + ssbo_430.content.m0.a[0] = ssbo_140.content.m0.a[0]; + ssbo_430.content.m0.b = ssbo_140.content.m0.b; + ssbo_430.content.m1.a = ssbo_140.content.m1.a; + ssbo_430.content.m1.b = ssbo_140.content.m1.b; + ssbo_430.content.m2.a[0] = ssbo_140.content.m2.a[0]; + ssbo_430.content.m2.b = ssbo_140.content.m2.b; + ssbo_430.content.m3.a = ssbo_140.content.m3.a; + ssbo_430.content.m3.b = ssbo_140.content.m3.b; + ssbo_430.content.m4 = ssbo_140.content.m4; + ssbo_430.content.m3s[0].c = ssbo_140.content.m3s[0].c; + ssbo_430.content.m3s[1].c = ssbo_140.content.m3s[1].c; + ssbo_430.content.m3s[2].c = ssbo_140.content.m3s[2].c; + ssbo_430.content.m3s[3].c = ssbo_140.content.m3s[3].c; + ssbo_430.content.m3s[4].c = ssbo_140.content.m3s[4].c; + ssbo_430.content.m3s[5].c = ssbo_140.content.m3s[5].c; + ssbo_430.content.m3s[6].c = ssbo_140.content.m3s[6].c; + ssbo_430.content.m3s[7].c = ssbo_140.content.m3s[7].c; +} + diff --git a/reference/shaders/comp/torture-loop.comp b/reference/shaders/comp/torture-loop.comp new file mode 100644 index 0000000000..ae18319061 --- /dev/null +++ b/reference/shaders/comp/torture-loop.comp @@ -0,0 +1,49 @@ +#version 310 es +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer SSBO +{ + mat4 mvp; + vec4 in_data[]; +} _24; + +layout(binding = 1, std430) buffer SSBO2 +{ + vec4 out_data[]; +} _89; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + vec4 idat = _24.in_data[ident]; + int k = 0; + for (;;) + { + int _39 = k; + int _40 = _39 + 1; + k = _40; + if (_40 < 10) + { + idat *= 2.0; + k++; + continue; + } + else + { + break; + } + } + for (uint i = 0u; i < 16u; i++, k++) + { + for (uint j = 0u; j < 30u; j++) + { + idat = _24.mvp * idat; + } + } + do + { + k++; + } while (k > 10); + _89.out_data[ident] = idat; +} + diff --git a/reference/shaders/comp/type-alias.comp b/reference/shaders/comp/type-alias.comp new file mode 100644 index 0000000000..51f3792e1a --- /dev/null +++ b/reference/shaders/comp/type-alias.comp @@ -0,0 +1,49 @@ +#version 310 es +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +struct S0 +{ + vec4 a; +}; + +struct S1 +{ + vec4 a; +}; + +layout(binding = 0, std430) buffer SSBO0 +{ + S0 s0s[]; +} _36; + +layout(binding = 1, std430) buffer SSBO1 +{ + S1 s1s[]; +} _55; + +layout(binding = 2, std430) buffer SSBO2 +{ + vec4 outputs[]; +} _66; + +vec4 overload(S0 s0) +{ + return s0.a; +} + +vec4 overload(S1 s1) +{ + return s1.a; +} + +void main() +{ + S0 s0; + s0.a = _36.s0s[gl_GlobalInvocationID.x].a; + S1 s1; + s1.a = _55.s1s[gl_GlobalInvocationID.x].a; + S0 param = s0; + S1 param_1 = s1; + _66.outputs[gl_GlobalInvocationID.x] = overload(param) + overload(param_1); +} + diff --git a/reference/shaders/comp/udiv.comp b/reference/shaders/comp/udiv.comp new file mode 100644 index 0000000000..0c1f926ad0 --- /dev/null +++ b/reference/shaders/comp/udiv.comp @@ -0,0 +1,18 @@ +#version 310 es +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer SSBO2 +{ + uint outputs[]; +} _10; + +layout(binding = 0, std430) buffer SSBO +{ + uint inputs[]; +} _23; + +void main() +{ + _10.outputs[gl_GlobalInvocationID.x] = _23.inputs[gl_GlobalInvocationID.x] / 29u; +} + diff --git a/reference/shaders/desktop-only/comp/fp64.desktop.comp b/reference/shaders/desktop-only/comp/fp64.desktop.comp new file mode 100644 index 0000000000..18869eda52 --- /dev/null +++ b/reference/shaders/desktop-only/comp/fp64.desktop.comp @@ -0,0 +1,84 @@ +#version 450 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +struct M0 +{ + double v; + dvec2 b[2]; + dmat2x3 c; + dmat3x2 d; +}; + +layout(binding = 0, std430) buffer SSBO0 +{ + dvec4 a; + M0 m0; + dmat4 b; +} ssbo_0; + +layout(binding = 1, std430) buffer SSBO1 +{ + dmat4 a; + dvec4 b; + M0 m0; +} ssbo_1; + +layout(binding = 2, std430) buffer SSBO2 +{ + double a[4]; + dvec2 b[4]; +} ssbo_2; + +layout(binding = 3, std140) buffer SSBO3 +{ + double a[4]; + dvec2 b[4]; +} ssbo_3; + +void main() +{ + ssbo_0.a += dvec4(10.0lf, 20.0lf, 30.0lf, 40.0lf); + ssbo_0.a += dvec4(20.0lf); + dvec4 a = ssbo_0.a; + dmat4 amat = ssbo_0.b; + ssbo_0.a = abs(a); + ssbo_0.a = sign(a); + ssbo_0.a = floor(a); + ssbo_0.a = trunc(a); + ssbo_0.a = round(a); + ssbo_0.a = roundEven(a); + ssbo_0.a = ceil(a); + ssbo_0.a = fract(a); + ssbo_0.a = mod(a, dvec4(20.0lf)); + ssbo_0.a = mod(a, a); + ssbo_0.a = min(a, a); + ssbo_0.a = max(a, a); + ssbo_0.a = clamp(a, a, a); + ssbo_0.a = mix(a, a, a); + ssbo_0.a = step(a, a); + ssbo_0.a = smoothstep(a, a, a); + bvec4 b = isnan(a); + bvec4 c = isinf(a); + double f = packDouble2x32(uvec2(10u, 40u)); + uvec2 g = unpackDouble2x32(f); + double d = length(a); + d = distance(a, a); + d = dot(a, a); + dvec3 e = cross(a.xyz, a.yzw); + a = faceforward(a, a, a); + a = reflect(a, a); + a = refract(a, a, a.x); + dmat4 l = dmat4(amat[0] * amat[0], amat[1] * amat[1], amat[2] * amat[2], amat[3] * amat[3]); + l = outerProduct(a, a); + l = transpose(l); + double m = determinant(l); + l = inverse(l); + bvec4 k = lessThan(a, a); + k = lessThanEqual(a, a); + k = greaterThan(a, a); + k = greaterThanEqual(a, a); + ssbo_1.b.x += 1.0lf; + ssbo_2.b[0].x += 1.0lf; + ssbo_3.b[0].x += 1.0lf; +} + diff --git a/reference/shaders/desktop-only/comp/image-formats.desktop.noeliminate.comp b/reference/shaders/desktop-only/comp/image-formats.desktop.noeliminate.comp new file mode 100644 index 0000000000..7a0797578b --- /dev/null +++ b/reference/shaders/desktop-only/comp/image-formats.desktop.noeliminate.comp @@ -0,0 +1,47 @@ +#version 450 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, rgba32f) uniform readonly writeonly image2D uImg00; +layout(binding = 1, rgba16f) uniform readonly writeonly image2D uImg01; +layout(binding = 2, rg32f) uniform readonly writeonly image2D uImg02; +layout(binding = 3, rg16f) uniform readonly writeonly image2D uImg03; +layout(binding = 4, r11f_g11f_b10f) uniform readonly writeonly image2D uImg04; +layout(binding = 5, r32f) uniform readonly writeonly image2D uImg05; +layout(binding = 6, r16f) uniform readonly writeonly image2D uImg06; +layout(binding = 7, rgba16) uniform readonly writeonly image2D uImg07; +layout(binding = 8, rgb10_a2) uniform readonly writeonly image2D uImg08; +layout(binding = 9, rgba8) uniform readonly writeonly image2D uImg09; +layout(binding = 10, rg16) uniform readonly writeonly image2D uImg10; +layout(binding = 11, rg8) uniform readonly writeonly image2D uImg11; +layout(binding = 12, r16) uniform readonly writeonly image2D uImg12; +layout(binding = 13, r8) uniform readonly writeonly image2D uImg13; +layout(binding = 14, rgba16_snorm) uniform readonly writeonly image2D uImg14; +layout(binding = 15, rgba8_snorm) uniform readonly writeonly image2D uImg15; +layout(binding = 16, rg16_snorm) uniform readonly writeonly image2D uImg16; +layout(binding = 17, rg8_snorm) uniform readonly writeonly image2D uImg17; +layout(binding = 18, r16_snorm) uniform readonly writeonly image2D uImg18; +layout(binding = 19, r8_snorm) uniform readonly writeonly image2D uImg19; +layout(binding = 20, rgba32i) uniform readonly writeonly iimage2D uImage20; +layout(binding = 21, rgba16i) uniform readonly writeonly iimage2D uImage21; +layout(binding = 22, rgba8i) uniform readonly writeonly iimage2D uImage22; +layout(binding = 23, rg32i) uniform readonly writeonly iimage2D uImage23; +layout(binding = 24, rg16i) uniform readonly writeonly iimage2D uImage24; +layout(binding = 25, rg8i) uniform readonly writeonly iimage2D uImage25; +layout(binding = 26, r32i) uniform readonly writeonly iimage2D uImage26; +layout(binding = 27, r16i) uniform readonly writeonly iimage2D uImage27; +layout(binding = 28, r8i) uniform readonly writeonly iimage2D uImage28; +layout(binding = 29, rgba32ui) uniform readonly writeonly uimage2D uImage29; +layout(binding = 30, rgba16ui) uniform readonly writeonly uimage2D uImage30; +layout(binding = 31, rgb10_a2ui) uniform readonly writeonly uimage2D uImage31; +layout(binding = 32, rgba8ui) uniform readonly writeonly uimage2D uImage32; +layout(binding = 33, rg32ui) uniform readonly writeonly uimage2D uImage33; +layout(binding = 34, rg16ui) uniform readonly writeonly uimage2D uImage34; +layout(binding = 35, rg8ui) uniform readonly writeonly uimage2D uImage35; +layout(binding = 36, r32ui) uniform readonly writeonly uimage2D uImage36; +layout(binding = 37, r16ui) uniform readonly writeonly uimage2D uImage37; +layout(binding = 38, r8ui) uniform readonly writeonly uimage2D uImage38; + +void main() +{ +} + diff --git a/reference/shaders/desktop-only/comp/int64.desktop.comp b/reference/shaders/desktop-only/comp/int64.desktop.comp new file mode 100644 index 0000000000..702456b303 --- /dev/null +++ b/reference/shaders/desktop-only/comp/int64.desktop.comp @@ -0,0 +1,52 @@ +#version 450 +#extension GL_ARB_gpu_shader_int64 : require +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +struct M0 +{ + int64_t v; + i64vec2 b[2]; + uint64_t c; + uint64_t d[5]; +}; + +layout(binding = 0, std430) buffer SSBO0 +{ + i64vec4 a; + M0 m0; +} ssbo_0; + +layout(binding = 1, std430) buffer SSBO1 +{ + u64vec4 b; + M0 m0; +} ssbo_1; + +layout(binding = 2, std430) buffer SSBO2 +{ + int64_t a[4]; + i64vec2 b[4]; +} ssbo_2; + +layout(binding = 3, std140) buffer SSBO3 +{ + int64_t a[4]; + i64vec2 b[4]; +} ssbo_3; + +void main() +{ + ssbo_0.a += i64vec4(10l, 20l, 30l, 40l); + ssbo_1.b += u64vec4(999999999999999999ul, 8888888888888888ul, 77777777777777777ul, 6666666666666666ul); + ssbo_0.a += i64vec4(20l); + ssbo_0.a = abs(ssbo_0.a + i64vec4(ssbo_1.b)); + ssbo_0.a += i64vec4(1l); + ssbo_1.b += u64vec4(i64vec4(1l)); + ssbo_0.a -= i64vec4(1l); + ssbo_1.b -= u64vec4(i64vec4(1l)); + ssbo_1.b = doubleBitsToUint64(int64BitsToDouble(ssbo_0.a)); + ssbo_0.a = doubleBitsToInt64(uint64BitsToDouble(ssbo_1.b)); + ssbo_2.a[0] += 1l; + ssbo_3.a[0] += 2l; +} + diff --git a/reference/shaders/desktop-only/frag/image-ms.desktop.frag b/reference/shaders/desktop-only/frag/image-ms.desktop.frag new file mode 100644 index 0000000000..24644be170 --- /dev/null +++ b/reference/shaders/desktop-only/frag/image-ms.desktop.frag @@ -0,0 +1,13 @@ +#version 450 + +layout(binding = 0, rgba8) uniform image2DMS uImage; +layout(binding = 1, rgba8) uniform image2DMSArray uImageArray; + +void main() +{ + vec4 a = imageLoad(uImage, ivec2(1, 2), 2); + vec4 b = imageLoad(uImageArray, ivec3(1, 2, 4), 3); + imageStore(uImage, ivec2(2, 3), 1, a); + imageStore(uImageArray, ivec3(2, 3, 7), 1, b); +} + diff --git a/reference/shaders/desktop-only/frag/in-block-qualifiers.frag b/reference/shaders/desktop-only/frag/in-block-qualifiers.frag new file mode 100644 index 0000000000..281053c820 --- /dev/null +++ b/reference/shaders/desktop-only/frag/in-block-qualifiers.frag @@ -0,0 +1,21 @@ +#version 450 + +layout(location = 0) out vec4 FragColor; +in VertexData +{ + layout(location = 0) flat float f; + layout(location = 1) centroid vec4 g; + layout(location = 2) flat int h; + layout(location = 3) float i; +} vin; + +layout(location = 4) in flat float f; +layout(location = 5) in centroid vec4 g; +layout(location = 6) in flat int h; +layout(location = 7) in sample float i; + +void main() +{ + FragColor = ((((((vec4(vin.f) + vin.g) + vec4(float(vin.h))) + vec4(vin.i)) + vec4(f)) + g) + vec4(float(h))) + vec4(i); +} + diff --git a/reference/shaders/desktop-only/frag/query-levels.desktop.frag b/reference/shaders/desktop-only/frag/query-levels.desktop.frag new file mode 100644 index 0000000000..4a80cbf81f --- /dev/null +++ b/reference/shaders/desktop-only/frag/query-levels.desktop.frag @@ -0,0 +1,11 @@ +#version 450 + +layout(binding = 0) uniform sampler2D uSampler; + +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = vec4(float(textureQueryLevels(uSampler))); +} + diff --git a/reference/shaders/desktop-only/frag/query-lod.desktop.frag b/reference/shaders/desktop-only/frag/query-lod.desktop.frag new file mode 100644 index 0000000000..f43543b8c0 --- /dev/null +++ b/reference/shaders/desktop-only/frag/query-lod.desktop.frag @@ -0,0 +1,12 @@ +#version 450 + +layout(binding = 0) uniform sampler2D uSampler; + +layout(location = 0) out vec4 FragColor; +layout(location = 0) in vec2 vTexCoord; + +void main() +{ + FragColor = textureQueryLod(uSampler, vTexCoord).xyxy; +} + diff --git a/reference/shaders/desktop-only/frag/sampler-ms-query.desktop.frag b/reference/shaders/desktop-only/frag/sampler-ms-query.desktop.frag new file mode 100644 index 0000000000..4c30ed1529 --- /dev/null +++ b/reference/shaders/desktop-only/frag/sampler-ms-query.desktop.frag @@ -0,0 +1,14 @@ +#version 450 + +layout(binding = 0) uniform sampler2DMS uSampler; +layout(binding = 1) uniform sampler2DMSArray uSamplerArray; +layout(binding = 2, rgba8) uniform readonly writeonly image2DMS uImage; +layout(binding = 3, rgba8) uniform readonly writeonly image2DMSArray uImageArray; + +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = vec4(float(((textureSamples(uSampler) + textureSamples(uSamplerArray)) + imageSamples(uImage)) + imageSamples(uImageArray))); +} + diff --git a/reference/shaders/desktop-only/vert/out-block-qualifiers.vert b/reference/shaders/desktop-only/vert/out-block-qualifiers.vert new file mode 100644 index 0000000000..4a3be5c028 --- /dev/null +++ b/reference/shaders/desktop-only/vert/out-block-qualifiers.vert @@ -0,0 +1,27 @@ +#version 450 + +out VertexData +{ + layout(location = 0) flat float f; + layout(location = 1) centroid vec4 g; + layout(location = 2) flat int h; + layout(location = 3) float i; +} vout; + +layout(location = 4) out flat float f; +layout(location = 5) out centroid vec4 g; +layout(location = 6) out flat int h; +layout(location = 7) out float i; + +void main() +{ + vout.f = 10.0; + vout.g = vec4(20.0); + vout.h = 20; + vout.i = 30.0; + f = 10.0; + g = vec4(20.0); + h = 20; + i = 30.0; +} + diff --git a/reference/shaders/frag/basic.frag b/reference/shaders/frag/basic.frag new file mode 100644 index 0000000000..f83b28156a --- /dev/null +++ b/reference/shaders/frag/basic.frag @@ -0,0 +1,15 @@ +#version 310 es +precision mediump float; +precision highp int; + +layout(binding = 0) uniform mediump sampler2D uTex; + +layout(location = 0) out vec4 FragColor; +in vec4 vColor; +in vec2 vTex; + +void main() +{ + FragColor = vColor * texture(uTex, vTex); +} + diff --git a/reference/shaders/frag/composite-extract-forced-temporary.frag b/reference/shaders/frag/composite-extract-forced-temporary.frag new file mode 100644 index 0000000000..e4384f559e --- /dev/null +++ b/reference/shaders/frag/composite-extract-forced-temporary.frag @@ -0,0 +1,15 @@ +#version 310 es +precision mediump float; +precision highp int; + +layout(binding = 0) uniform mediump sampler2D Texture; + +layout(location = 0) in vec2 vTexCoord; +layout(location = 0) out vec4 FragColor; + +void main() +{ + float f = texture(Texture, vTexCoord).x; + FragColor = vec4(f * f); +} + diff --git a/reference/shaders/frag/constant-array.frag b/reference/shaders/frag/constant-array.frag new file mode 100644 index 0000000000..ef9b794739 --- /dev/null +++ b/reference/shaders/frag/constant-array.frag @@ -0,0 +1,28 @@ +#version 310 es +precision mediump float; +precision highp int; + +struct Foobar +{ + float a; + float b; +}; + +layout(location = 0) out vec4 FragColor; +layout(location = 0) in mediump flat int index; + +vec4 resolve(Foobar f) +{ + return vec4(f.a + f.b); +} + +void main() +{ + highp vec4 indexable[3] = vec4[](vec4(1.0), vec4(2.0), vec4(3.0)); + highp vec4 indexable_1[2][2] = vec4[][](vec4[](vec4(1.0), vec4(2.0)), vec4[](vec4(8.0), vec4(10.0))); + Foobar param = Foobar(10.0, 20.0); + Foobar indexable_2[2] = Foobar[](Foobar(10.0, 40.0), Foobar(90.0, 70.0)); + Foobar param_1 = indexable_2[index]; + FragColor = ((indexable[index] + (indexable_1[index][index + 1])) + resolve(param)) + resolve(param_1); +} + diff --git a/reference/shaders/frag/flush_params.frag b/reference/shaders/frag/flush_params.frag new file mode 100644 index 0000000000..ee99395a1f --- /dev/null +++ b/reference/shaders/frag/flush_params.frag @@ -0,0 +1,30 @@ +#version 310 es +precision mediump float; +precision highp int; + +struct Structy +{ + vec4 c; +}; + +layout(location = 0) out vec4 FragColor; + +void foo2(out Structy f) +{ + f.c = vec4(10.0); +} + +Structy foo() +{ + Structy param; + foo2(param); + Structy f = param; + return f; +} + +void main() +{ + Structy s = foo(); + FragColor = s.c; +} + diff --git a/reference/shaders/frag/for-loop-init.frag b/reference/shaders/frag/for-loop-init.frag new file mode 100644 index 0000000000..7c22e5c785 --- /dev/null +++ b/reference/shaders/frag/for-loop-init.frag @@ -0,0 +1,52 @@ +#version 310 es +precision mediump float; +precision highp int; + +layout(location = 0) out mediump int FragColor; + +void main() +{ + FragColor = 16; + for (mediump int i = 0; i < 25; i++) + { + FragColor += 10; + } + for (mediump int i_1 = 1, j = 4; i_1 < 30; i_1++, j += 4) + { + FragColor += 11; + } + mediump int k = 0; + for (; k < 20; k++) + { + FragColor += 12; + } + k += 3; + FragColor += k; + mediump int l; + if (k == 40) + { + l = 0; + for (; l < 40; l++) + { + FragColor += 13; + } + return; + } + else + { + l = k; + FragColor += l; + } + mediump ivec2 i_2 = ivec2(0); + for (; i_2.x < 10; i_2.x += 4) + { + FragColor += i_2.y; + } + mediump int o = k; + for (mediump int m = k; m < 40; m++) + { + FragColor += m; + } + FragColor += o; +} + diff --git a/reference/shaders/frag/ground.frag b/reference/shaders/frag/ground.frag new file mode 100644 index 0000000000..4b0ea829bd --- /dev/null +++ b/reference/shaders/frag/ground.frag @@ -0,0 +1,62 @@ +#version 310 es +precision mediump float; +precision highp int; + +layout(binding = 4, std140) uniform GlobalPSData +{ + vec4 g_CamPos; + vec4 g_SunDir; + vec4 g_SunColor; + vec4 g_ResolutionParams; + vec4 g_TimeParams; + vec4 g_FogColor_Distance; +} _56; + +layout(binding = 2) uniform mediump sampler2D TexNormalmap; + +layout(location = 3) out vec4 LightingOut; +layout(location = 2) out vec4 NormalOut; +layout(location = 1) out vec4 SpecularOut; +layout(location = 0) out vec4 AlbedoOut; +layout(location = 0) in vec2 TexCoord; +layout(location = 1) in vec3 EyeVec; + +float saturate(float x) +{ + return clamp(x, 0.0, 1.0); +} + +void Resolve(vec3 Albedo, vec3 Normal, float Roughness, float Metallic) +{ + LightingOut = vec4(0.0); + NormalOut = vec4((Normal * 0.5) + vec3(0.5), 0.0); + SpecularOut = vec4(Roughness, Metallic, 0.0, 0.0); + AlbedoOut = vec4(Albedo, 1.0); +} + +void main() +{ + vec3 Normal = (texture(TexNormalmap, TexCoord).xyz * 2.0) - vec3(1.0); + Normal = normalize(Normal); + highp float param = length(EyeVec) / 1000.0; + vec2 scatter_uv; + scatter_uv.x = saturate(param); + vec3 nEye = normalize(EyeVec); + scatter_uv.y = 0.0; + vec3 Color = vec3(0.100000001490116119384765625, 0.300000011920928955078125, 0.100000001490116119384765625); + vec3 grass = vec3(0.100000001490116119384765625, 0.300000011920928955078125, 0.100000001490116119384765625); + vec3 dirt = vec3(0.100000001490116119384765625); + vec3 snow = vec3(0.800000011920928955078125); + float grass_snow = smoothstep(0.0, 0.1500000059604644775390625, (_56.g_CamPos.y + EyeVec.y) / 200.0); + vec3 base = mix(grass, snow, vec3(grass_snow)); + float edge = smoothstep(0.699999988079071044921875, 0.75, Normal.y); + Color = mix(dirt, base, vec3(edge)); + Color *= Color; + float Roughness = 1.0 - (edge * grass_snow); + highp vec3 param_1 = Color; + highp vec3 param_2 = Normal; + highp float param_3 = Roughness; + highp float param_4 = 0.0; + Resolve(param_1, param_2, param_3, param_4); +} + diff --git a/reference/shaders/frag/mix.frag b/reference/shaders/frag/mix.frag new file mode 100644 index 0000000000..68e82273ed --- /dev/null +++ b/reference/shaders/frag/mix.frag @@ -0,0 +1,38 @@ +#version 310 es +precision mediump float; +precision highp int; + +layout(location = 0) out vec4 FragColor; +layout(location = 0) in vec4 vIn0; +layout(location = 1) in vec4 vIn1; +layout(location = 2) in float vIn2; +layout(location = 3) in float vIn3; + +void main() +{ + bvec4 l = bvec4(false, true, false, false); + FragColor = mix(vIn0, vIn1, l); + bool f = true; + FragColor = vec4(mix(vIn2, vIn3, f)); + highp vec4 _35; + if (f) + { + _35 = vIn0; + } + else + { + _35 = vIn1; + } + FragColor = _35; + highp float _44; + if (f) + { + _44 = vIn2; + } + else + { + _44 = vIn3; + } + FragColor = vec4(_44); +} + diff --git a/reference/shaders/frag/pls.frag b/reference/shaders/frag/pls.frag new file mode 100644 index 0000000000..e0c8f270db --- /dev/null +++ b/reference/shaders/frag/pls.frag @@ -0,0 +1,21 @@ +#version 310 es +precision mediump float; +precision highp int; + +layout(location = 0) out vec4 PLSOut0; +layout(location = 0) in vec4 PLSIn0; +layout(location = 1) out vec4 PLSOut1; +layout(location = 1) in vec4 PLSIn1; +layout(location = 2) out vec4 PLSOut2; +in vec4 PLSIn2; +layout(location = 3) out vec4 PLSOut3; +in vec4 PLSIn3; + +void main() +{ + PLSOut0 = PLSIn0 * 2.0; + PLSOut1 = PLSIn1 * 6.0; + PLSOut2 = PLSIn2 * 7.0; + PLSOut3 = PLSIn3 * 4.0; +} + diff --git a/reference/shaders/frag/sampler-ms.frag b/reference/shaders/frag/sampler-ms.frag new file mode 100644 index 0000000000..dbab3fb819 --- /dev/null +++ b/reference/shaders/frag/sampler-ms.frag @@ -0,0 +1,14 @@ +#version 310 es +precision mediump float; +precision highp int; + +layout(binding = 0) uniform mediump sampler2DMS uSampler; + +layout(location = 0) out vec4 FragColor; + +void main() +{ + ivec2 coord = ivec2(gl_FragCoord.xy); + FragColor = ((texelFetch(uSampler, coord, 0) + texelFetch(uSampler, coord, 1)) + texelFetch(uSampler, coord, 2)) + texelFetch(uSampler, coord, 3); +} + diff --git a/reference/shaders/frag/sampler.frag b/reference/shaders/frag/sampler.frag new file mode 100644 index 0000000000..406cec6d45 --- /dev/null +++ b/reference/shaders/frag/sampler.frag @@ -0,0 +1,21 @@ +#version 310 es +precision mediump float; +precision highp int; + +layout(binding = 0) uniform mediump sampler2D uTex; + +layout(location = 0) out vec4 FragColor; +in vec4 vColor; +in vec2 vTex; + +vec4 sample_texture(mediump sampler2D tex, vec2 uv) +{ + return texture(tex, uv); +} + +void main() +{ + highp vec2 param = vTex; + FragColor = vColor * sample_texture(uTex, param); +} + diff --git a/reference/shaders/frag/swizzle.frag b/reference/shaders/frag/swizzle.frag new file mode 100644 index 0000000000..e619be2f48 --- /dev/null +++ b/reference/shaders/frag/swizzle.frag @@ -0,0 +1,20 @@ +#version 310 es +precision mediump float; +precision highp int; + +layout(location = 0) uniform mediump sampler2D samp; + +layout(location = 0) out vec4 FragColor; +layout(location = 2) in vec2 vUV; +layout(location = 1) in vec3 vNormal; + +void main() +{ + FragColor = vec4(texture(samp, vUV).xyz, 1.0); + FragColor = vec4(texture(samp, vUV).xz, 1.0, 4.0); + FragColor = vec4(texture(samp, vUV).xx, texture(samp, vUV + vec2(0.100000001490116119384765625)).yy); + FragColor = vec4(vNormal, 1.0); + FragColor = vec4(vNormal + vec3(1.7999999523162841796875), 1.0); + FragColor = vec4(vUV, vUV + vec2(1.7999999523162841796875)); +} + diff --git a/reference/shaders/frag/ubo_layout.frag b/reference/shaders/frag/ubo_layout.frag new file mode 100644 index 0000000000..bc0b01c065 --- /dev/null +++ b/reference/shaders/frag/ubo_layout.frag @@ -0,0 +1,26 @@ +#version 310 es +precision mediump float; +precision highp int; + +struct Str +{ + mat4 foo; +}; + +layout(binding = 0, std140) uniform UBO1 +{ + layout(row_major) Str foo; +} ubo1; + +layout(binding = 1, std140) uniform UBO2 +{ + Str foo; +} ubo0; + +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = ubo1.foo.foo[0] + ubo0.foo.foo[0]; +} + diff --git a/reference/shaders/geom/basic.geom b/reference/shaders/geom/basic.geom new file mode 100644 index 0000000000..f2eea50a65 --- /dev/null +++ b/reference/shaders/geom/basic.geom @@ -0,0 +1,26 @@ +#version 310 es +#extension GL_EXT_geometry_shader : require +layout(invocations = 4, triangles) in; +layout(max_vertices = 3, triangle_strip) out; + +out vec3 vNormal; +in VertexData +{ + vec3 normal; +} vin[3]; + + +void main() +{ + gl_Position = gl_in[0].gl_Position; + vNormal = vin[0].normal + vec3(float(gl_InvocationID)); + EmitVertex(); + gl_Position = gl_in[1].gl_Position; + vNormal = vin[1].normal + vec3(4.0 * float(gl_InvocationID)); + EmitVertex(); + gl_Position = gl_in[2].gl_Position; + vNormal = vin[2].normal + vec3(2.0 * float(gl_InvocationID)); + EmitVertex(); + EndPrimitive(); +} + diff --git a/reference/shaders/geom/lines-adjacency.geom b/reference/shaders/geom/lines-adjacency.geom new file mode 100644 index 0000000000..0083b8b299 --- /dev/null +++ b/reference/shaders/geom/lines-adjacency.geom @@ -0,0 +1,26 @@ +#version 310 es +#extension GL_EXT_geometry_shader : require +layout(lines_adjacency) in; +layout(max_vertices = 3, line_strip) out; + +out vec3 vNormal; +in VertexData +{ + vec3 normal; +} vin[4]; + + +void main() +{ + gl_Position = gl_in[0].gl_Position; + vNormal = vin[0].normal; + EmitVertex(); + gl_Position = gl_in[1].gl_Position; + vNormal = vin[1].normal; + EmitVertex(); + gl_Position = gl_in[2].gl_Position; + vNormal = vin[2].normal; + EmitVertex(); + EndPrimitive(); +} + diff --git a/reference/shaders/geom/lines.geom b/reference/shaders/geom/lines.geom new file mode 100644 index 0000000000..6d20ec8253 --- /dev/null +++ b/reference/shaders/geom/lines.geom @@ -0,0 +1,23 @@ +#version 310 es +#extension GL_EXT_geometry_shader : require +layout(lines) in; +layout(max_vertices = 2, line_strip) out; + +out vec3 vNormal; +in VertexData +{ + vec3 normal; +} vin[2]; + + +void main() +{ + gl_Position = gl_in[0].gl_Position; + vNormal = vin[0].normal; + EmitVertex(); + gl_Position = gl_in[1].gl_Position; + vNormal = vin[1].normal; + EmitVertex(); + EndPrimitive(); +} + diff --git a/reference/shaders/geom/points.geom b/reference/shaders/geom/points.geom new file mode 100644 index 0000000000..76d2a2dfcd --- /dev/null +++ b/reference/shaders/geom/points.geom @@ -0,0 +1,26 @@ +#version 310 es +#extension GL_EXT_geometry_shader : require +layout(points) in; +layout(max_vertices = 3, points) out; + +out vec3 vNormal; +in VertexData +{ + vec3 normal; +} vin[1]; + + +void main() +{ + gl_Position = gl_in[0].gl_Position; + vNormal = vin[0].normal; + EmitVertex(); + gl_Position = gl_in[0].gl_Position; + vNormal = vin[0].normal; + EmitVertex(); + gl_Position = gl_in[0].gl_Position; + vNormal = vin[0].normal; + EmitVertex(); + EndPrimitive(); +} + diff --git a/reference/shaders/geom/single-invocation.geom b/reference/shaders/geom/single-invocation.geom new file mode 100644 index 0000000000..592f7996c7 --- /dev/null +++ b/reference/shaders/geom/single-invocation.geom @@ -0,0 +1,26 @@ +#version 310 es +#extension GL_EXT_geometry_shader : require +layout(triangles) in; +layout(max_vertices = 3, triangle_strip) out; + +out vec3 vNormal; +in VertexData +{ + vec3 normal; +} vin[3]; + + +void main() +{ + gl_Position = gl_in[0].gl_Position; + vNormal = vin[0].normal; + EmitVertex(); + gl_Position = gl_in[1].gl_Position; + vNormal = vin[1].normal; + EmitVertex(); + gl_Position = gl_in[2].gl_Position; + vNormal = vin[2].normal; + EmitVertex(); + EndPrimitive(); +} + diff --git a/reference/shaders/geom/triangles-adjacency.geom b/reference/shaders/geom/triangles-adjacency.geom new file mode 100644 index 0000000000..44d36548ab --- /dev/null +++ b/reference/shaders/geom/triangles-adjacency.geom @@ -0,0 +1,26 @@ +#version 310 es +#extension GL_EXT_geometry_shader : require +layout(triangles_adjacency) in; +layout(max_vertices = 3, triangle_strip) out; + +out vec3 vNormal; +in VertexData +{ + vec3 normal; +} vin[6]; + + +void main() +{ + gl_Position = gl_in[0].gl_Position; + vNormal = vin[0].normal; + EmitVertex(); + gl_Position = gl_in[1].gl_Position; + vNormal = vin[1].normal; + EmitVertex(); + gl_Position = gl_in[2].gl_Position; + vNormal = vin[2].normal; + EmitVertex(); + EndPrimitive(); +} + diff --git a/reference/shaders/geom/triangles.geom b/reference/shaders/geom/triangles.geom new file mode 100644 index 0000000000..592f7996c7 --- /dev/null +++ b/reference/shaders/geom/triangles.geom @@ -0,0 +1,26 @@ +#version 310 es +#extension GL_EXT_geometry_shader : require +layout(triangles) in; +layout(max_vertices = 3, triangle_strip) out; + +out vec3 vNormal; +in VertexData +{ + vec3 normal; +} vin[3]; + + +void main() +{ + gl_Position = gl_in[0].gl_Position; + vNormal = vin[0].normal; + EmitVertex(); + gl_Position = gl_in[1].gl_Position; + vNormal = vin[1].normal; + EmitVertex(); + gl_Position = gl_in[2].gl_Position; + vNormal = vin[2].normal; + EmitVertex(); + EndPrimitive(); +} + diff --git a/reference/shaders/tesc/basic.tesc b/reference/shaders/tesc/basic.tesc new file mode 100644 index 0000000000..daf9b33c50 --- /dev/null +++ b/reference/shaders/tesc/basic.tesc @@ -0,0 +1,17 @@ +#version 310 es +#extension GL_EXT_tessellation_shader : require +layout(vertices = 1) out; + +out patch vec3 vFoo; + +void main() +{ + gl_TessLevelInner[0] = 8.8999996185302734375; + gl_TessLevelInner[1] = 6.900000095367431640625; + gl_TessLevelOuter[0] = 8.8999996185302734375; + gl_TessLevelOuter[1] = 6.900000095367431640625; + gl_TessLevelOuter[2] = 3.900000095367431640625; + gl_TessLevelOuter[3] = 4.900000095367431640625; + vFoo = vec3(1.0); +} + diff --git a/reference/shaders/tesc/water_tess.tesc b/reference/shaders/tesc/water_tess.tesc new file mode 100644 index 0000000000..422a467e3d --- /dev/null +++ b/reference/shaders/tesc/water_tess.tesc @@ -0,0 +1,117 @@ +#version 310 es +#extension GL_EXT_tessellation_shader : require +layout(vertices = 1) out; + +layout(std140) uniform UBO +{ + vec4 uScale; + vec3 uCamPos; + vec2 uPatchSize; + vec2 uMaxTessLevel; + float uDistanceMod; + vec4 uFrustum[6]; +} _41; + +out patch vec2 vOutPatchPosBase; +out patch vec4 vPatchLods; +in vec2 vPatchPosBase[32]; + +bool frustum_cull(vec2 p0) +{ + vec2 min_xz = (p0 - vec2(10.0)) * _41.uScale.xy; + vec2 max_xz = ((p0 + _41.uPatchSize) + vec2(10.0)) * _41.uScale.xy; + vec3 bb_min = vec3(min_xz.x, -10.0, min_xz.y); + vec3 bb_max = vec3(max_xz.x, 10.0, max_xz.y); + vec3 center = (bb_min + bb_max) * 0.5; + float radius = 0.5 * length(bb_max - bb_min); + vec3 f0 = vec3(dot(_41.uFrustum[0], vec4(center, 1.0)), dot(_41.uFrustum[1], vec4(center, 1.0)), dot(_41.uFrustum[2], vec4(center, 1.0))); + vec3 f1 = vec3(dot(_41.uFrustum[3], vec4(center, 1.0)), dot(_41.uFrustum[4], vec4(center, 1.0)), dot(_41.uFrustum[5], vec4(center, 1.0))); + vec3 _199 = f0; + bool _205 = any(lessThanEqual(_199, vec3(-radius))); + bool _215; + if (!_205) + { + _215 = any(lessThanEqual(f1, vec3(-radius))); + } + else + { + _215 = _205; + } + return !_215; +} + +float lod_factor(vec2 pos_) +{ + vec2 pos = pos_ * _41.uScale.xy; + vec3 dist_to_cam = _41.uCamPos - vec3(pos.x, 0.0, pos.y); + float level = log2((length(dist_to_cam) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod); + return clamp(level, 0.0, _41.uMaxTessLevel.x); +} + +vec4 tess_level(vec4 lod) +{ + return exp2(-lod) * _41.uMaxTessLevel.y; +} + +float tess_level(float lod) +{ + return _41.uMaxTessLevel.y * exp2(-lod); +} + +void compute_tess_levels(vec2 p0) +{ + vOutPatchPosBase = p0; + vec2 param = p0 + (vec2(-0.5) * _41.uPatchSize); + float l00 = lod_factor(param); + vec2 param_1 = p0 + (vec2(0.5, -0.5) * _41.uPatchSize); + float l10 = lod_factor(param_1); + vec2 param_2 = p0 + (vec2(1.5, -0.5) * _41.uPatchSize); + float l20 = lod_factor(param_2); + vec2 param_3 = p0 + (vec2(-0.5, 0.5) * _41.uPatchSize); + float l01 = lod_factor(param_3); + vec2 param_4 = p0 + (vec2(0.5) * _41.uPatchSize); + float l11 = lod_factor(param_4); + vec2 param_5 = p0 + (vec2(1.5, 0.5) * _41.uPatchSize); + float l21 = lod_factor(param_5); + vec2 param_6 = p0 + (vec2(-0.5, 1.5) * _41.uPatchSize); + float l02 = lod_factor(param_6); + vec2 param_7 = p0 + (vec2(0.5, 1.5) * _41.uPatchSize); + float l12 = lod_factor(param_7); + vec2 param_8 = p0 + (vec2(1.5) * _41.uPatchSize); + float l22 = lod_factor(param_8); + vec4 lods = vec4(dot(vec4(l01, l11, l02, l12), vec4(0.25)), dot(vec4(l00, l10, l01, l11), vec4(0.25)), dot(vec4(l10, l20, l11, l21), vec4(0.25)), dot(vec4(l11, l21, l12, l22), vec4(0.25))); + vPatchLods = lods; + vec4 outer_lods = min(lods, lods.yzwx); + vec4 param_9 = outer_lods; + vec4 levels = tess_level(param_9); + gl_TessLevelOuter[0] = levels.x; + gl_TessLevelOuter[1] = levels.y; + gl_TessLevelOuter[2] = levels.z; + gl_TessLevelOuter[3] = levels.w; + float min_lod = min(min(lods.x, lods.y), min(lods.z, lods.w)); + float param_10 = min(min_lod, l11); + float inner = tess_level(param_10); + gl_TessLevelInner[0] = inner; + gl_TessLevelInner[1] = inner; +} + +void main() +{ + vec2 p0 = vPatchPosBase[0]; + vec2 param = p0; + if (!frustum_cull(param)) + { + gl_TessLevelOuter[0] = -1.0; + gl_TessLevelOuter[1] = -1.0; + gl_TessLevelOuter[2] = -1.0; + gl_TessLevelOuter[3] = -1.0; + gl_TessLevelInner[0] = -1.0; + gl_TessLevelInner[1] = -1.0; + } + else + { + vec2 param_1 = p0; + compute_tess_levels(param_1); + } +} + diff --git a/reference/shaders/tese/ccw.tese b/reference/shaders/tese/ccw.tese new file mode 100644 index 0000000000..a2a4508ac0 --- /dev/null +++ b/reference/shaders/tese/ccw.tese @@ -0,0 +1,9 @@ +#version 310 es +#extension GL_EXT_tessellation_shader : require +layout(triangles, ccw, fractional_even_spacing) in; + +void main() +{ + gl_Position = vec4(1.0); +} + diff --git a/reference/shaders/tese/cw.tese b/reference/shaders/tese/cw.tese new file mode 100644 index 0000000000..95781493d8 --- /dev/null +++ b/reference/shaders/tese/cw.tese @@ -0,0 +1,9 @@ +#version 310 es +#extension GL_EXT_tessellation_shader : require +layout(triangles, cw, fractional_even_spacing) in; + +void main() +{ + gl_Position = vec4(1.0); +} + diff --git a/reference/shaders/tese/equal.tese b/reference/shaders/tese/equal.tese new file mode 100644 index 0000000000..6d30518a30 --- /dev/null +++ b/reference/shaders/tese/equal.tese @@ -0,0 +1,9 @@ +#version 310 es +#extension GL_EXT_tessellation_shader : require +layout(triangles, cw, equal_spacing) in; + +void main() +{ + gl_Position = vec4(1.0); +} + diff --git a/reference/shaders/tese/fractional_even.tese b/reference/shaders/tese/fractional_even.tese new file mode 100644 index 0000000000..95781493d8 --- /dev/null +++ b/reference/shaders/tese/fractional_even.tese @@ -0,0 +1,9 @@ +#version 310 es +#extension GL_EXT_tessellation_shader : require +layout(triangles, cw, fractional_even_spacing) in; + +void main() +{ + gl_Position = vec4(1.0); +} + diff --git a/reference/shaders/tese/fractional_odd.tese b/reference/shaders/tese/fractional_odd.tese new file mode 100644 index 0000000000..608c19aba7 --- /dev/null +++ b/reference/shaders/tese/fractional_odd.tese @@ -0,0 +1,9 @@ +#version 310 es +#extension GL_EXT_tessellation_shader : require +layout(triangles, cw, fractional_odd_spacing) in; + +void main() +{ + gl_Position = vec4(1.0); +} + diff --git a/reference/shaders/tese/line.tese b/reference/shaders/tese/line.tese new file mode 100644 index 0000000000..8b6ad8da20 --- /dev/null +++ b/reference/shaders/tese/line.tese @@ -0,0 +1,9 @@ +#version 310 es +#extension GL_EXT_tessellation_shader : require +layout(isolines, point_mode, fractional_even_spacing) in; + +void main() +{ + gl_Position = vec4(1.0); +} + diff --git a/reference/shaders/tese/triangle.tese b/reference/shaders/tese/triangle.tese new file mode 100644 index 0000000000..95781493d8 --- /dev/null +++ b/reference/shaders/tese/triangle.tese @@ -0,0 +1,9 @@ +#version 310 es +#extension GL_EXT_tessellation_shader : require +layout(triangles, cw, fractional_even_spacing) in; + +void main() +{ + gl_Position = vec4(1.0); +} + diff --git a/reference/shaders/tese/water_tess.tese b/reference/shaders/tese/water_tess.tese new file mode 100644 index 0000000000..758c2b4e43 --- /dev/null +++ b/reference/shaders/tese/water_tess.tese @@ -0,0 +1,61 @@ +#version 310 es +#extension GL_EXT_tessellation_shader : require +layout(quads, cw, fractional_even_spacing) in; + +layout(binding = 1, std140) uniform UBO +{ + mat4 uMVP; + vec4 uScale; + vec2 uInvScale; + vec3 uCamPos; + vec2 uPatchSize; + vec2 uInvHeightmapSize; +} _31; + +layout(binding = 0) uniform mediump sampler2D uHeightmapDisplacement; + +in patch vec2 vOutPatchPosBase; +in patch vec4 vPatchLods; +out vec4 vGradNormalTex; +out vec3 vWorld; + +vec2 lerp_vertex(vec2 tess_coord) +{ + return vOutPatchPosBase + (tess_coord * _31.uPatchSize); +} + +mediump vec2 lod_factor(vec2 tess_coord) +{ + mediump vec2 x = mix(vPatchLods.yx, vPatchLods.zw, vec2(tess_coord.x)); + mediump float level = mix(x.x, x.y, tess_coord.y); + mediump float floor_level = floor(level); + mediump float fract_level = level - floor_level; + return vec2(floor_level, fract_level); +} + +mediump vec3 sample_height_displacement(vec2 uv, vec2 off, mediump vec2 lod) +{ + return mix(textureLod(uHeightmapDisplacement, uv + (off * 0.5), lod.x).xyz, textureLod(uHeightmapDisplacement, uv + (off * 1.0), lod.x + 1.0).xyz, vec3(lod.y)); +} + +void main() +{ + vec2 tess_coord = gl_TessCoord.xy; + vec2 param = tess_coord; + vec2 pos = lerp_vertex(param); + vec2 param_1 = tess_coord; + mediump vec2 lod = lod_factor(param_1); + vec2 tex = pos * _31.uInvHeightmapSize; + pos *= _31.uScale.xy; + mediump float delta_mod = exp2(lod.x); + vec2 off = _31.uInvHeightmapSize * delta_mod; + vGradNormalTex = vec4(tex + (_31.uInvHeightmapSize * 0.5), tex * _31.uScale.zw); + vec2 param_2 = tex; + vec2 param_3 = off; + vec2 param_4 = lod; + vec3 height_displacement = sample_height_displacement(param_2, param_3, param_4); + pos += height_displacement.yz; + vWorld = vec3(pos.x, height_displacement.x, pos.y); + gl_Position = _31.uMVP * vec4(vWorld, 1.0); +} + diff --git a/reference/shaders/vert/basic.vert b/reference/shaders/vert/basic.vert new file mode 100644 index 0000000000..429b36d58a --- /dev/null +++ b/reference/shaders/vert/basic.vert @@ -0,0 +1,17 @@ +#version 310 es + +layout(std140) uniform UBO +{ + mat4 uMVP; +} _16; + +in vec4 aVertex; +out vec3 vNormal; +in vec3 aNormal; + +void main() +{ + gl_Position = _16.uMVP * aVertex; + vNormal = aNormal; +} + diff --git a/reference/shaders/vert/ground.vert b/reference/shaders/vert/ground.vert new file mode 100644 index 0000000000..44b19a99f7 --- /dev/null +++ b/reference/shaders/vert/ground.vert @@ -0,0 +1,110 @@ +#version 310 es + +struct PatchData +{ + vec4 Position; + vec4 LODs; +}; + +layout(binding = 0, std140) uniform GlobalVSData +{ + vec4 g_ViewProj_Row0; + vec4 g_ViewProj_Row1; + vec4 g_ViewProj_Row2; + vec4 g_ViewProj_Row3; + vec4 g_CamPos; + vec4 g_CamRight; + vec4 g_CamUp; + vec4 g_CamFront; + vec4 g_SunDir; + vec4 g_SunColor; + vec4 g_TimeParams; + vec4 g_ResolutionParams; + vec4 g_CamAxisRight; + vec4 g_FogColor_Distance; + vec4 g_ShadowVP_Row0; + vec4 g_ShadowVP_Row1; + vec4 g_ShadowVP_Row2; + vec4 g_ShadowVP_Row3; +} _58; + +layout(binding = 0, std140) uniform PerPatch +{ + PatchData Patches[256]; +} _284; + +layout(binding = 2, std140) uniform GlobalGround +{ + vec4 GroundScale; + vec4 GroundPosition; + vec4 InvGroundSize_PatchScale; +} _381; + +layout(binding = 1) uniform mediump sampler2D TexLOD; +layout(binding = 0) uniform mediump sampler2D TexHeightmap; + +layout(location = 1) in vec4 LODWeights; +uniform int SPIRV_Cross_BaseInstance; +layout(location = 0) in vec2 Position; +layout(location = 1) out vec3 EyeVec; +layout(location = 0) out vec2 TexCoord; + +vec2 warp_position() +{ + float vlod = dot(LODWeights, _284.Patches[(gl_InstanceID + SPIRV_Cross_BaseInstance)].LODs); + vlod = mix(vlod, _284.Patches[(gl_InstanceID + SPIRV_Cross_BaseInstance)].Position.w, all(equal(LODWeights, vec4(0.0)))); + float floor_lod = floor(vlod); + float fract_lod = vlod - floor_lod; + uint ufloor_lod = uint(floor_lod); + uvec2 uPosition = uvec2(Position); + uvec2 mask = (uvec2(1u) << uvec2(ufloor_lod, ufloor_lod + 1u)) - uvec2(1u); + uint _332; + if (uPosition.x < 32u) + { + _332 = mask.x; + } + else + { + _332 = 0u; + } + uint _342 = _332; + uint _343; + if (uPosition.y < 32u) + { + _343 = mask.y; + } + else + { + _343 = 0u; + } + uvec2 rounding = uvec2(_342, _343); + vec4 lower_upper_snapped = vec4((uPosition + rounding).xyxy & ~mask.xxyy); + return mix(lower_upper_snapped.xy, lower_upper_snapped.zw, vec2(fract_lod)); +} + +vec2 lod_factor(vec2 uv) +{ + float level = textureLod(TexLOD, uv, 0.0).x * 7.96875; + float floor_level = floor(level); + float fract_level = level - floor_level; + return vec2(floor_level, fract_level); +} + +void main() +{ + vec2 PatchPos = _284.Patches[(gl_InstanceID + SPIRV_Cross_BaseInstance)].Position.xz * _381.InvGroundSize_PatchScale.zw; + vec2 WarpedPos = warp_position(); + vec2 VertexPos = PatchPos + WarpedPos; + vec2 NormalizedPos = VertexPos * _381.InvGroundSize_PatchScale.xy; + vec2 param = NormalizedPos; + vec2 lod = lod_factor(param); + vec2 Offset = _381.InvGroundSize_PatchScale.xy * exp2(lod.x); + float Elevation = mix(textureLod(TexHeightmap, NormalizedPos + (Offset * 0.5), lod.x).x, textureLod(TexHeightmap, NormalizedPos + (Offset * 1.0), lod.x + 1.0).x, lod.y); + vec3 WorldPos = vec3(NormalizedPos.x, Elevation, NormalizedPos.y); + WorldPos *= _381.GroundScale.xyz; + WorldPos += _381.GroundPosition.xyz; + EyeVec = WorldPos - _58.g_CamPos.xyz; + TexCoord = NormalizedPos + (_381.InvGroundSize_PatchScale.xy * 0.5); + gl_Position = (((_58.g_ViewProj_Row0 * WorldPos.x) + (_58.g_ViewProj_Row1 * WorldPos.y)) + (_58.g_ViewProj_Row2 * WorldPos.z)) + _58.g_ViewProj_Row3; +} + diff --git a/reference/shaders/vert/ocean.vert b/reference/shaders/vert/ocean.vert new file mode 100644 index 0000000000..c542fe25a4 --- /dev/null +++ b/reference/shaders/vert/ocean.vert @@ -0,0 +1,133 @@ +#version 310 es + +struct PatchData +{ + vec4 Position; + vec4 LODs; +}; + +layout(binding = 0, std140) uniform GlobalVSData +{ + vec4 g_ViewProj_Row0; + vec4 g_ViewProj_Row1; + vec4 g_ViewProj_Row2; + vec4 g_ViewProj_Row3; + vec4 g_CamPos; + vec4 g_CamRight; + vec4 g_CamUp; + vec4 g_CamFront; + vec4 g_SunDir; + vec4 g_SunColor; + vec4 g_TimeParams; + vec4 g_ResolutionParams; + vec4 g_CamAxisRight; + vec4 g_FogColor_Distance; + vec4 g_ShadowVP_Row0; + vec4 g_ShadowVP_Row1; + vec4 g_ShadowVP_Row2; + vec4 g_ShadowVP_Row3; +} _58; + +layout(binding = 0, std140) uniform Offsets +{ + PatchData Patches[256]; +} _284; + +layout(binding = 4, std140) uniform GlobalOcean +{ + vec4 OceanScale; + vec4 OceanPosition; + vec4 InvOceanSize_PatchScale; + vec4 NormalTexCoordScale; +} _405; + +layout(binding = 1) uniform mediump sampler2D TexLOD; +layout(binding = 0) uniform mediump sampler2D TexDisplacement; + +layout(location = 1) in vec4 LODWeights; +uniform int SPIRV_Cross_BaseInstance; +layout(location = 0) in vec4 Position; +layout(location = 0) out vec3 EyeVec; +layout(location = 1) out vec4 TexCoord; + +vec2 warp_position() +{ + float vlod = dot(LODWeights, _284.Patches[(gl_InstanceID + SPIRV_Cross_BaseInstance)].LODs); + vlod = mix(vlod, _284.Patches[(gl_InstanceID + SPIRV_Cross_BaseInstance)].Position.w, all(equal(LODWeights, vec4(0.0)))); + float floor_lod = floor(vlod); + float fract_lod = vlod - floor_lod; + uint ufloor_lod = uint(floor_lod); + uvec4 uPosition = uvec4(Position); + uvec2 mask = (uvec2(1u) << uvec2(ufloor_lod, ufloor_lod + 1u)) - uvec2(1u); + uint _333; + if (uPosition.x < 32u) + { + _333 = mask.x; + } + else + { + _333 = 0u; + } + uvec4 rounding; + rounding.x = _333; + uint _345; + if (uPosition.y < 32u) + { + _345 = mask.x; + } + else + { + _345 = 0u; + } + rounding.y = _345; + uint _356; + if (uPosition.x < 32u) + { + _356 = mask.y; + } + else + { + _356 = 0u; + } + rounding.z = _356; + uint _368; + if (uPosition.y < 32u) + { + _368 = mask.y; + } + else + { + _368 = 0u; + } + rounding.w = _368; + vec4 lower_upper_snapped = vec4((uPosition.xyxy + rounding) & ~mask.xxyy); + return mix(lower_upper_snapped.xy, lower_upper_snapped.zw, vec2(fract_lod)); +} + +vec2 lod_factor(vec2 uv) +{ + float level = textureLod(TexLOD, uv, 0.0).x * 7.96875; + float floor_level = floor(level); + float fract_level = level - floor_level; + return vec2(floor_level, fract_level); +} + +void main() +{ + vec2 PatchPos = _284.Patches[(gl_InstanceID + SPIRV_Cross_BaseInstance)].Position.xz * _405.InvOceanSize_PatchScale.zw; + vec2 WarpedPos = warp_position(); + vec2 VertexPos = PatchPos + WarpedPos; + vec2 NormalizedPos = VertexPos * _405.InvOceanSize_PatchScale.xy; + vec2 NormalizedTex = NormalizedPos * _405.NormalTexCoordScale.zw; + vec2 param = NormalizedPos; + vec2 lod = lod_factor(param); + vec2 Offset = (_405.InvOceanSize_PatchScale.xy * exp2(lod.x)) * _405.NormalTexCoordScale.zw; + vec3 Displacement = mix(textureLod(TexDisplacement, NormalizedTex + (Offset * 0.5), lod.x).yxz, textureLod(TexDisplacement, NormalizedTex + (Offset * 1.0), lod.x + 1.0).yxz, vec3(lod.y)); + vec3 WorldPos = vec3(NormalizedPos.x, 0.0, NormalizedPos.y) + Displacement; + WorldPos *= _405.OceanScale.xyz; + WorldPos += _405.OceanPosition.xyz; + EyeVec = WorldPos - _58.g_CamPos.xyz; + TexCoord = vec4(NormalizedTex, NormalizedTex * _405.NormalTexCoordScale.xy) + ((_405.InvOceanSize_PatchScale.xyxy * 0.5) * _405.NormalTexCoordScale.zwzw); + gl_Position = (((_58.g_ViewProj_Row0 * WorldPos.x) + (_58.g_ViewProj_Row1 * WorldPos.y)) + (_58.g_ViewProj_Row2 * WorldPos.z)) + _58.g_ViewProj_Row3; +} + diff --git a/reference/shaders/vert/texture_buffer.vert b/reference/shaders/vert/texture_buffer.vert new file mode 100644 index 0000000000..0a198e75e0 --- /dev/null +++ b/reference/shaders/vert/texture_buffer.vert @@ -0,0 +1,11 @@ +#version 310 es +#extension GL_OES_texture_buffer : require + +layout(binding = 4) uniform highp samplerBuffer uSamp; +layout(binding = 5, rgba32f) uniform highp readonly imageBuffer uSampo; + +void main() +{ + gl_Position = texelFetch(uSamp, 10) + imageLoad(uSampo, 100); +} + diff --git a/reference/shaders/vert/ubo.vert b/reference/shaders/vert/ubo.vert new file mode 100644 index 0000000000..335c1fe1b8 --- /dev/null +++ b/reference/shaders/vert/ubo.vert @@ -0,0 +1,17 @@ +#version 310 es + +layout(binding = 0, std140) uniform UBO +{ + mat4 mvp; +} _16; + +in vec4 aVertex; +out vec3 vNormal; +in vec3 aNormal; + +void main() +{ + gl_Position = _16.mvp * aVertex; + vNormal = aNormal; +} + diff --git a/reference/shaders/vulkan/frag/combined-texture-sampler.vk.frag b/reference/shaders/vulkan/frag/combined-texture-sampler.vk.frag new file mode 100644 index 0000000000..5b9c0ddadf --- /dev/null +++ b/reference/shaders/vulkan/frag/combined-texture-sampler.vk.frag @@ -0,0 +1,48 @@ +#version 310 es +precision mediump float; +precision highp int; + +uniform mediump sampler2D SPIRV_Cross_CombineduTexture0uSampler0; +uniform mediump sampler2D SPIRV_Cross_CombineduTexture1uSampler1; +uniform mediump sampler2D SPIRV_Cross_CombineduTexture1uSampler0; +uniform mediump sampler2D SPIRV_Cross_CombineduTexture0uSampler1; + +layout(location = 0) in vec2 vTex; +layout(location = 0) out vec4 FragColor; + +vec4 sample_dual(mediump sampler2D SPIRV_Cross_Combinedtexsamp) +{ + return texture(SPIRV_Cross_Combinedtexsamp, vTex); +} + +vec4 sample_duals() +{ + vec4 a = sample_dual(SPIRV_Cross_CombineduTexture0uSampler0); + vec4 b = sample_dual(SPIRV_Cross_CombineduTexture1uSampler1); + return a + b; +} + +vec4 sample_global_tex(mediump sampler2D SPIRV_Cross_CombineduTexture0samp, mediump sampler2D SPIRV_Cross_CombineduTexture1samp) +{ + vec4 a = texture(SPIRV_Cross_CombineduTexture0samp, vTex); + vec4 b = sample_dual(SPIRV_Cross_CombineduTexture1samp); + return a + b; +} + +vec4 sample_global_sampler(mediump sampler2D SPIRV_Cross_CombinedtexuSampler0, mediump sampler2D SPIRV_Cross_CombinedtexuSampler1) +{ + vec4 a = texture(SPIRV_Cross_CombinedtexuSampler0, vTex); + vec4 b = sample_dual(SPIRV_Cross_CombinedtexuSampler1); + return a + b; +} + +void main() +{ + vec4 c0 = sample_duals(); + vec4 c1 = sample_global_tex(SPIRV_Cross_CombineduTexture0uSampler0, SPIRV_Cross_CombineduTexture1uSampler0); + vec4 c2 = sample_global_tex(SPIRV_Cross_CombineduTexture0uSampler1, SPIRV_Cross_CombineduTexture1uSampler1); + vec4 c3 = sample_global_sampler(SPIRV_Cross_CombineduTexture0uSampler0, SPIRV_Cross_CombineduTexture0uSampler1); + vec4 c4 = sample_global_sampler(SPIRV_Cross_CombineduTexture1uSampler0, SPIRV_Cross_CombineduTexture1uSampler1); + FragColor = (((c0 + c1) + c2) + c3) + c4; +} + diff --git a/reference/shaders/vulkan/frag/combined-texture-sampler.vk.frag.vk b/reference/shaders/vulkan/frag/combined-texture-sampler.vk.frag.vk new file mode 100644 index 0000000000..ae8df4c925 --- /dev/null +++ b/reference/shaders/vulkan/frag/combined-texture-sampler.vk.frag.vk @@ -0,0 +1,48 @@ +#version 310 es +precision mediump float; +precision highp int; + +layout(set = 0, binding = 2) uniform mediump texture2D uTexture0; +layout(set = 0, binding = 3) uniform mediump texture2D uTexture1; +layout(set = 0, binding = 0) uniform mediump sampler uSampler0; +layout(set = 0, binding = 1) uniform mediump sampler uSampler1; + +layout(location = 0) in vec2 vTex; +layout(location = 0) out vec4 FragColor; + +vec4 sample_dual(mediump sampler samp, mediump texture2D tex) +{ + return texture(sampler2D(tex, samp), vTex); +} + +vec4 sample_duals() +{ + vec4 a = sample_dual(uSampler0, uTexture0); + vec4 b = sample_dual(uSampler1, uTexture1); + return a + b; +} + +vec4 sample_global_tex(mediump sampler samp) +{ + vec4 a = texture(sampler2D(uTexture0, samp), vTex); + vec4 b = sample_dual(samp, uTexture1); + return a + b; +} + +vec4 sample_global_sampler(mediump texture2D tex) +{ + vec4 a = texture(sampler2D(tex, uSampler0), vTex); + vec4 b = sample_dual(uSampler1, tex); + return a + b; +} + +void main() +{ + vec4 c0 = sample_duals(); + vec4 c1 = sample_global_tex(uSampler0); + vec4 c2 = sample_global_tex(uSampler1); + vec4 c3 = sample_global_sampler(uTexture0); + vec4 c4 = sample_global_sampler(uTexture1); + FragColor = (((c0 + c1) + c2) + c3) + c4; +} + diff --git a/reference/shaders/vulkan/frag/input-attachment-ms.vk.frag b/reference/shaders/vulkan/frag/input-attachment-ms.vk.frag new file mode 100644 index 0000000000..cb6745be0b --- /dev/null +++ b/reference/shaders/vulkan/frag/input-attachment-ms.vk.frag @@ -0,0 +1,14 @@ +#version 310 es +precision mediump float; +precision highp int; + +layout(binding = 0) uniform mediump sampler2DMS uSubpass0; +layout(binding = 1) uniform mediump sampler2DMS uSubpass1; + +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = texelFetch(uSubpass0, ivec2(gl_FragCoord.xy), 1) + texelFetch(uSubpass1, ivec2(gl_FragCoord.xy), 2); +} + diff --git a/reference/shaders/vulkan/frag/input-attachment-ms.vk.frag.vk b/reference/shaders/vulkan/frag/input-attachment-ms.vk.frag.vk new file mode 100644 index 0000000000..488c8939be --- /dev/null +++ b/reference/shaders/vulkan/frag/input-attachment-ms.vk.frag.vk @@ -0,0 +1,14 @@ +#version 310 es +precision mediump float; +precision highp int; + +layout(input_attachment_index = 0, set = 0, binding = 0) uniform mediump subpassInputMS uSubpass0; +layout(input_attachment_index = 1, set = 0, binding = 1) uniform mediump subpassInputMS uSubpass1; + +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = subpassLoad(uSubpass0, 1) + subpassLoad(uSubpass1, 2); +} + diff --git a/reference/shaders/vulkan/frag/input-attachment.vk.frag b/reference/shaders/vulkan/frag/input-attachment.vk.frag new file mode 100644 index 0000000000..8d216b2c49 --- /dev/null +++ b/reference/shaders/vulkan/frag/input-attachment.vk.frag @@ -0,0 +1,14 @@ +#version 310 es +precision mediump float; +precision highp int; + +layout(binding = 0) uniform mediump sampler2D uSubpass0; +layout(binding = 1) uniform mediump sampler2D uSubpass1; + +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = texelFetch(uSubpass0, ivec2(gl_FragCoord.xy), 0) + texelFetch(uSubpass1, ivec2(gl_FragCoord.xy), 0); +} + diff --git a/reference/shaders/vulkan/frag/input-attachment.vk.frag.vk b/reference/shaders/vulkan/frag/input-attachment.vk.frag.vk new file mode 100644 index 0000000000..c8b5d9a70d --- /dev/null +++ b/reference/shaders/vulkan/frag/input-attachment.vk.frag.vk @@ -0,0 +1,14 @@ +#version 310 es +precision mediump float; +precision highp int; + +layout(input_attachment_index = 0, set = 0, binding = 0) uniform mediump subpassInput uSubpass0; +layout(input_attachment_index = 1, set = 0, binding = 1) uniform mediump subpassInput uSubpass1; + +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = subpassLoad(uSubpass0) + subpassLoad(uSubpass1); +} + diff --git a/reference/shaders/vulkan/frag/push-constant.frag.vk b/reference/shaders/vulkan/frag/push-constant.frag.vk new file mode 100644 index 0000000000..748a028678 --- /dev/null +++ b/reference/shaders/vulkan/frag/push-constant.frag.vk @@ -0,0 +1,18 @@ +#version 310 es +precision mediump float; +precision highp int; + +layout(push_constant, std430) uniform PushConstants +{ + vec4 value0; + vec4 value1; +} push; + +layout(location = 0) out vec4 FragColor; +layout(location = 0) in vec4 vColor; + +void main() +{ + FragColor = ((vColor + push.value0) + push.value1); +} + diff --git a/reference/shaders/vulkan/frag/push-constant.vk.frag b/reference/shaders/vulkan/frag/push-constant.vk.frag new file mode 100644 index 0000000000..c04a7ca488 --- /dev/null +++ b/reference/shaders/vulkan/frag/push-constant.vk.frag @@ -0,0 +1,20 @@ +#version 310 es +precision mediump float; +precision highp int; + +struct PushConstants +{ + vec4 value0; + vec4 value1; +}; + +uniform PushConstants push; + +layout(location = 0) out vec4 FragColor; +layout(location = 0) in vec4 vColor; + +void main() +{ + FragColor = (vColor + push.value0) + push.value1; +} + diff --git a/reference/shaders/vulkan/frag/push-constant.vk.frag.vk b/reference/shaders/vulkan/frag/push-constant.vk.frag.vk new file mode 100644 index 0000000000..6cec90f19e --- /dev/null +++ b/reference/shaders/vulkan/frag/push-constant.vk.frag.vk @@ -0,0 +1,18 @@ +#version 310 es +precision mediump float; +precision highp int; + +layout(push_constant, std430) uniform PushConstants +{ + vec4 value0; + vec4 value1; +} push; + +layout(location = 0) out vec4 FragColor; +layout(location = 0) in vec4 vColor; + +void main() +{ + FragColor = (vColor + push.value0) + push.value1; +} + diff --git a/reference/shaders/vulkan/frag/separate-sampler-texture.vk.frag b/reference/shaders/vulkan/frag/separate-sampler-texture.vk.frag new file mode 100644 index 0000000000..78477cfbae --- /dev/null +++ b/reference/shaders/vulkan/frag/separate-sampler-texture.vk.frag @@ -0,0 +1,37 @@ +#version 310 es +precision mediump float; +precision highp int; + +uniform mediump sampler2D SPIRV_Cross_CombineduTextureuSampler; +uniform mediump sampler2DArray SPIRV_Cross_CombineduTextureArrayuSampler; +uniform mediump samplerCube SPIRV_Cross_CombineduTextureCubeuSampler; +uniform mediump sampler3D SPIRV_Cross_CombineduTexture3DuSampler; + +layout(location = 0) in vec2 vTex; +layout(location = 1) in vec3 vTex3; +layout(location = 0) out vec4 FragColor; + +vec4 sample_func(vec2 uv, mediump sampler2D SPIRV_Cross_CombineduTexturesamp) +{ + return texture(SPIRV_Cross_CombineduTexturesamp, uv); +} + +vec4 sample_func_dual(vec2 uv, mediump sampler2D SPIRV_Cross_Combinedtexsamp) +{ + return texture(SPIRV_Cross_Combinedtexsamp, uv); +} + +void main() +{ + vec2 off = vec2(1.0) / vec2(textureSize(SPIRV_Cross_CombineduTextureuSampler, 0)); + vec2 off2 = vec2(1.0) / vec2(textureSize(SPIRV_Cross_CombineduTextureuSampler, 1)); + highp vec2 param = (vTex + off) + off2; + vec4 c0 = sample_func(param, SPIRV_Cross_CombineduTextureuSampler); + highp vec2 param_1 = (vTex + off) + off2; + vec4 c1 = sample_func_dual(param_1, SPIRV_Cross_CombineduTextureuSampler); + vec4 c2 = texture(SPIRV_Cross_CombineduTextureArrayuSampler, vTex3); + vec4 c3 = texture(SPIRV_Cross_CombineduTextureCubeuSampler, vTex3); + vec4 c4 = texture(SPIRV_Cross_CombineduTexture3DuSampler, vTex3); + FragColor = (((c0 + c1) + c2) + c3) + c4; +} + diff --git a/reference/shaders/vulkan/frag/separate-sampler-texture.vk.frag.vk b/reference/shaders/vulkan/frag/separate-sampler-texture.vk.frag.vk new file mode 100644 index 0000000000..cfa2f39616 --- /dev/null +++ b/reference/shaders/vulkan/frag/separate-sampler-texture.vk.frag.vk @@ -0,0 +1,38 @@ +#version 310 es +precision mediump float; +precision highp int; + +layout(set = 0, binding = 1) uniform mediump texture2D uTexture; +layout(set = 0, binding = 0) uniform mediump sampler uSampler; +layout(set = 0, binding = 4) uniform mediump texture2DArray uTextureArray; +layout(set = 0, binding = 3) uniform mediump textureCube uTextureCube; +layout(set = 0, binding = 2) uniform mediump texture3D uTexture3D; + +layout(location = 0) in vec2 vTex; +layout(location = 1) in vec3 vTex3; +layout(location = 0) out vec4 FragColor; + +vec4 sample_func(mediump sampler samp, vec2 uv) +{ + return texture(sampler2D(uTexture, samp), uv); +} + +vec4 sample_func_dual(mediump sampler samp, mediump texture2D tex, vec2 uv) +{ + return texture(sampler2D(tex, samp), uv); +} + +void main() +{ + vec2 off = vec2(1.0) / vec2(textureSize(sampler2D(uTexture, uSampler), 0)); + vec2 off2 = vec2(1.0) / vec2(textureSize(sampler2D(uTexture, uSampler), 1)); + highp vec2 param = (vTex + off) + off2; + vec4 c0 = sample_func(uSampler, param); + highp vec2 param_1 = (vTex + off) + off2; + vec4 c1 = sample_func_dual(uSampler, uTexture, param_1); + vec4 c2 = texture(sampler2DArray(uTextureArray, uSampler), vTex3); + vec4 c3 = texture(samplerCube(uTextureCube, uSampler), vTex3); + vec4 c4 = texture(sampler3D(uTexture3D, uSampler), vTex3); + FragColor = (((c0 + c1) + c2) + c3) + c4; +} + diff --git a/reference/shaders/vulkan/frag/spec-constant.vk.frag b/reference/shaders/vulkan/frag/spec-constant.vk.frag new file mode 100644 index 0000000000..6fa15eeb5f --- /dev/null +++ b/reference/shaders/vulkan/frag/spec-constant.vk.frag @@ -0,0 +1,59 @@ +#version 310 es +precision mediump float; +precision highp int; + +struct Foo +{ + float elems[(4 + 2)]; +}; + +layout(location = 0) out vec4 FragColor; + +void main() +{ + float t0 = 1.0; + float t1 = 2.0; + mediump uint c0 = (uint(3) + 0u); + mediump int c1 = (-3); + mediump int c2 = (~3); + mediump int c3 = (3 + 4); + mediump int c4 = (3 - 4); + mediump int c5 = (3 * 4); + mediump int c6 = (3 / 4); + mediump uint c7 = (5u / 6u); + mediump int c8 = (3 % 4); + mediump uint c9 = (5u % 6u); + mediump int c10 = (3 >> 4); + mediump uint c11 = (5u >> 6u); + mediump int c12 = (3 << 4); + mediump int c13 = (3 | 4); + mediump int c14 = (3 ^ 4); + mediump int c15 = (3 & 4); + bool c16 = (false || true); + bool c17 = (false && true); + bool c18 = (!false); + bool c19 = (false == true); + bool c20 = (false != true); + bool c21 = (3 == 4); + bool c22 = (3 != 4); + bool c23 = (3 < 4); + bool c24 = (5u < 6u); + bool c25 = (3 > 4); + bool c26 = (5u > 6u); + bool c27 = (3 <= 4); + bool c28 = (5u <= 6u); + bool c29 = (3 >= 4); + bool c30 = (5u >= 6u); + mediump int c31 = c8 + c3; + mediump int c32 = int(5u + 0u); + bool c33 = (3 != int(0u)); + bool c34 = (5u != 0u); + mediump int c35 = int(false); + mediump uint c36 = uint(false); + float c37 = float(false); + float vec0[4][(3 + 3)]; + float vec1[(3 + 2)][(4 + 5)]; + Foo foo; + FragColor = ((vec4(t0 + t1) + vec4(vec0[0][0])) + vec4(vec1[0][0])) + vec4(foo.elems[3]); +} + diff --git a/reference/shaders/vulkan/frag/spec-constant.vk.frag.vk b/reference/shaders/vulkan/frag/spec-constant.vk.frag.vk new file mode 100644 index 0000000000..14747d103f --- /dev/null +++ b/reference/shaders/vulkan/frag/spec-constant.vk.frag.vk @@ -0,0 +1,68 @@ +#version 310 es +precision mediump float; +precision highp int; + +layout(constant_id = 1) const float _9 = 1.0; +layout(constant_id = 2) const float _11 = 2.0; +layout(constant_id = 3) const int _16 = 3; +layout(constant_id = 4) const int _25 = 4; +layout(constant_id = 5) const uint _34 = 5u; +layout(constant_id = 6) const uint _35 = 6u; +layout(constant_id = 7) const bool _56 = false; +layout(constant_id = 8) const bool _57 = true; + +struct Foo +{ + float elems[(_25 + 2)]; +}; + +layout(location = 0) out vec4 FragColor; + +void main() +{ + float t0 = _9; + float t1 = _11; + mediump uint c0 = (uint(_16) + 0u); + mediump int c1 = (-_16); + mediump int c2 = (~_16); + mediump int c3 = (_16 + _25); + mediump int c4 = (_16 - _25); + mediump int c5 = (_16 * _25); + mediump int c6 = (_16 / _25); + mediump uint c7 = (_34 / _35); + mediump int c8 = (_16 % _25); + mediump uint c9 = (_34 % _35); + mediump int c10 = (_16 >> _25); + mediump uint c11 = (_34 >> _35); + mediump int c12 = (_16 << _25); + mediump int c13 = (_16 | _25); + mediump int c14 = (_16 ^ _25); + mediump int c15 = (_16 & _25); + bool c16 = (_56 || _57); + bool c17 = (_56 && _57); + bool c18 = (!_56); + bool c19 = (_56 == _57); + bool c20 = (_56 != _57); + bool c21 = (_16 == _25); + bool c22 = (_16 != _25); + bool c23 = (_16 < _25); + bool c24 = (_34 < _35); + bool c25 = (_16 > _25); + bool c26 = (_34 > _35); + bool c27 = (_16 <= _25); + bool c28 = (_34 <= _35); + bool c29 = (_16 >= _25); + bool c30 = (_34 >= _35); + mediump int c31 = c8 + c3; + mediump int c32 = int(_34 + 0u); + bool c33 = (_16 != int(0u)); + bool c34 = (_34 != 0u); + mediump int c35 = int(_56); + mediump uint c36 = uint(_56); + float c37 = float(_56); + float vec0[_25][(_16 + 3)]; + float vec1[(_16 + 2)][(_25 + 5)]; + Foo foo; + FragColor = ((vec4(t0 + t1) + vec4(vec0[0][0])) + vec4(vec1[0][0])) + vec4(foo.elems[_16]); +} + diff --git a/reference/shaders/vulkan/vert/vulkan-vertex.vert b/reference/shaders/vulkan/vert/vulkan-vertex.vert new file mode 100644 index 0000000000..8de2b111ef --- /dev/null +++ b/reference/shaders/vulkan/vert/vulkan-vertex.vert @@ -0,0 +1,9 @@ +#version 310 es + +uniform int SPIRV_Cross_BaseInstance; + +void main() +{ + gl_Position = (vec4(1.0, 2.0, 3.0, 4.0) * float((gl_VertexID + (gl_InstanceID + SPIRV_Cross_BaseInstance)))); +} + diff --git a/reference/shaders/vulkan/vert/vulkan-vertex.vert.vk b/reference/shaders/vulkan/vert/vulkan-vertex.vert.vk new file mode 100644 index 0000000000..9ee3cc0997 --- /dev/null +++ b/reference/shaders/vulkan/vert/vulkan-vertex.vert.vk @@ -0,0 +1,7 @@ +#version 310 es + +void main() +{ + gl_Position = (vec4(1.0, 2.0, 3.0, 4.0) * float((gl_VertexIndex + gl_InstanceIndex))); +} + diff --git a/reference/shaders/vulkan/vert/vulkan-vertex.vk.vert b/reference/shaders/vulkan/vert/vulkan-vertex.vk.vert new file mode 100644 index 0000000000..60ba1882f8 --- /dev/null +++ b/reference/shaders/vulkan/vert/vulkan-vertex.vk.vert @@ -0,0 +1,9 @@ +#version 310 es + +uniform int SPIRV_Cross_BaseInstance; + +void main() +{ + gl_Position = vec4(1.0, 2.0, 3.0, 4.0) * float(gl_VertexID + (gl_InstanceID + SPIRV_Cross_BaseInstance)); +} + diff --git a/reference/shaders/vulkan/vert/vulkan-vertex.vk.vert.vk b/reference/shaders/vulkan/vert/vulkan-vertex.vk.vert.vk new file mode 100644 index 0000000000..8c4930d7a8 --- /dev/null +++ b/reference/shaders/vulkan/vert/vulkan-vertex.vk.vert.vk @@ -0,0 +1,7 @@ +#version 310 es + +void main() +{ + gl_Position = vec4(1.0, 2.0, 3.0, 4.0) * float(gl_VertexIndex + gl_InstanceIndex); +} + diff --git a/samples/cpp/Makefile b/samples/cpp/Makefile new file mode 100644 index 0000000000..225bb3d57d --- /dev/null +++ b/samples/cpp/Makefile @@ -0,0 +1,28 @@ +SOURCES := $(wildcard *.comp) +SPIRV := $(SOURCES:.comp=.spv) +CPP_INTERFACE := $(SOURCES:.comp=.spv.cpp) +CPP_DRIVER := $(SOURCES:.comp=.cpp) +EXECUTABLES := $(SOURCES:.comp=.shader) +OBJECTS := $(CPP_DRIVER:.cpp=.o) $(CPP_INTERFACE:.cpp=.o) + +CXXFLAGS += -std=c++11 -I../../include -I. +LDFLAGS += -pthread -lm + +all: $(EXECUTABLES) + +%.spv: %.comp + glslangValidator -V -o $@ $< + +%.spv.cpp: %.spv + ../../spirv-cross --cpp --output $@ $< + +%.o: %.cpp + $(CXX) -c -o $@ $< $(CXXFLAGS) + +%.shader: %.o %.spv.o + $(CXX) -o $@ $^ $(LDFLAGS) + +clean: + $(RM) -f $(EXECUTABLES) $(SPIRV) $(CPP_INTERFACE) $(OBJECTS) + +.PHONY: clean diff --git a/samples/cpp/atomics.comp b/samples/cpp/atomics.comp new file mode 100644 index 0000000000..0bf6d2ad01 --- /dev/null +++ b/samples/cpp/atomics.comp @@ -0,0 +1,29 @@ +#version 310 es +layout(local_size_x = 64) in; + +layout(set = 0, binding = 0, std430) readonly buffer SSBO0 +{ + float inputs[]; +}; + +layout(set = 0, binding = 1, std430) writeonly buffer SSBO1 +{ + float outputs[]; +}; + +layout(set = 0, binding = 2, std430) buffer SSBO2 +{ + uint counter; +}; + +void main() +{ + // Builds a tightly packed list of all values less than 10.0. + // The output order is random. + float value = inputs[gl_GlobalInvocationID.x]; + if (value < 10.0) + { + uint output_index = atomicAdd(counter, 1u); + outputs[output_index] = value; + } +} diff --git a/samples/cpp/atomics.cpp b/samples/cpp/atomics.cpp new file mode 100644 index 0000000000..9591461483 --- /dev/null +++ b/samples/cpp/atomics.cpp @@ -0,0 +1,90 @@ +/* + * Copyright 2015-2016 ARM Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "spirv_cross/external_interface.h" +#include + +#ifndef GLM_SWIZZLE +#define GLM_SWIZZLE +#endif + +#ifndef GLM_FORCE_RADIANS +#define GLM_FORCE_RADIANS +#endif + +#include +using namespace glm; + +int main() +{ + // First, we get the C interface to the shader. + // This can be loaded from a dynamic library, or as here, + // linked in as a static library. + auto *iface = spirv_cross_get_interface(); + + // Create an instance of the shader interface. + auto *shader = iface->construct(); + +// Build some input data for our compute shader. +#define NUM_WORKGROUPS 4 + float a[64 * NUM_WORKGROUPS]; + float b[64 * NUM_WORKGROUPS] = {}; + uint32_t counter = 0; + + for (int i = 0; i < 64 * NUM_WORKGROUPS; i++) + { + a[i] = i * 0.46f; + } + + void *aptr = a; + void *bptr = b; + void *cptr = &counter; + + // Bind resources to the shader. + // For resources like samplers and buffers, we provide a list of pointers, + // since UBOs, SSBOs and samplers can be arrays, and can point to different types, + // which is especially true for samplers. + spirv_cross_set_resource(shader, 0, 0, &aptr, sizeof(aptr)); + spirv_cross_set_resource(shader, 0, 1, &bptr, sizeof(bptr)); + spirv_cross_set_resource(shader, 0, 2, &cptr, sizeof(cptr)); + + // We also have to set builtins. + // The relevant builtins will depend on the shader, + // but for compute, there are few builtins, which are gl_NumWorkGroups and gl_WorkGroupID. + // LocalInvocationID and GlobalInvocationID are inferred when executing the invocation. + uvec3 num_workgroups(NUM_WORKGROUPS, 1, 1); + uvec3 work_group_id(0, 0, 0); + spirv_cross_set_builtin(shader, SPIRV_CROSS_BUILTIN_NUM_WORK_GROUPS, &num_workgroups, sizeof(num_workgroups)); + spirv_cross_set_builtin(shader, SPIRV_CROSS_BUILTIN_WORK_GROUP_ID, &work_group_id, sizeof(work_group_id)); + + // Execute 4 work groups. + for (unsigned i = 0; i < NUM_WORKGROUPS; i++) + { + work_group_id.x = i; + iface->invoke(shader); + } + + // Call destructor. + iface->destruct(shader); + + // Verify our output. + // TODO: Implement a test framework that asserts results computed. + fprintf(stderr, "Counter = %u\n", counter); + for (unsigned i = 0; i < counter; i++) + { + fprintf(stderr, "[%3u] = %.1f\n", i, b[i]); + } +} diff --git a/samples/cpp/multiply.comp b/samples/cpp/multiply.comp new file mode 100644 index 0000000000..1ac7869ad0 --- /dev/null +++ b/samples/cpp/multiply.comp @@ -0,0 +1,22 @@ +#version 310 es +layout(local_size_x = 64) in; + +layout(set = 0, binding = 0, std430) readonly buffer SSBO0 +{ + vec4 a[]; +}; + +layout(set = 0, binding = 1, std430) readonly buffer SSBO1 +{ + vec4 b[]; +}; + +layout(set = 0, binding = 2, std430) buffer SSBO2 +{ + vec4 c[]; +}; + +void main() +{ + c[gl_GlobalInvocationID.x] = a[gl_GlobalInvocationID.x] * b[gl_GlobalInvocationID.x]; +} diff --git a/samples/cpp/multiply.cpp b/samples/cpp/multiply.cpp new file mode 100644 index 0000000000..5b2a8cdd3c --- /dev/null +++ b/samples/cpp/multiply.cpp @@ -0,0 +1,91 @@ +/* + * Copyright 2015-2016 ARM Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "spirv_cross/external_interface.h" +#include + +#ifndef GLM_SWIZZLE +#define GLM_SWIZZLE +#endif + +#ifndef GLM_FORCE_RADIANS +#define GLM_FORCE_RADIANS +#endif + +#include +using namespace glm; + +int main() +{ + // First, we get the C interface to the shader. + // This can be loaded from a dynamic library, or as here, + // linked in as a static library. + auto *iface = spirv_cross_get_interface(); + + // Create an instance of the shader interface. + auto *shader = iface->construct(); + +// Build some input data for our compute shader. +#define NUM_WORKGROUPS 4 + vec4 a[64 * NUM_WORKGROUPS]; + vec4 b[64 * NUM_WORKGROUPS]; + vec4 c[64 * NUM_WORKGROUPS] = {}; + + for (int i = 0; i < 64 * NUM_WORKGROUPS; i++) + { + a[i] = vec4(100 + i, 101 + i, 102 + i, 103 + i); + b[i] = vec4(100 - i, 99 - i, 98 - i, 97 - i); + } + + void *aptr = a; + void *bptr = b; + void *cptr = c; + + // Bind resources to the shader. + // For resources like samplers and buffers, we provide a list of pointers, + // since UBOs, SSBOs and samplers can be arrays, and can point to different types, + // which is especially true for samplers. + spirv_cross_set_resource(shader, 0, 0, &aptr, sizeof(aptr)); + spirv_cross_set_resource(shader, 0, 1, &bptr, sizeof(bptr)); + spirv_cross_set_resource(shader, 0, 2, &cptr, sizeof(cptr)); + + // We also have to set builtins. + // The relevant builtins will depend on the shader, + // but for compute, there are few builtins, which are gl_NumWorkGroups and gl_WorkGroupID. + // LocalInvocationID and GlobalInvocationID are inferred when executing the invocation. + uvec3 num_workgroups(NUM_WORKGROUPS, 1, 1); + uvec3 work_group_id(0, 0, 0); + spirv_cross_set_builtin(shader, SPIRV_CROSS_BUILTIN_NUM_WORK_GROUPS, &num_workgroups, sizeof(num_workgroups)); + spirv_cross_set_builtin(shader, SPIRV_CROSS_BUILTIN_WORK_GROUP_ID, &work_group_id, sizeof(work_group_id)); + + // Execute 4 work groups. + for (unsigned i = 0; i < NUM_WORKGROUPS; i++) + { + work_group_id.x = i; + iface->invoke(shader); + } + + // Call destructor. + iface->destruct(shader); + + // Verify our output. + // TODO: Implement a test framework that asserts results computed. + for (unsigned i = 0; i < 64 * NUM_WORKGROUPS; i++) + { + fprintf(stderr, "(%.1f, %.1f, %.1f, %.1f) * (%.1f, %.1f, %.1f, %.1f) => (%.1f, %.1f, %.1f, %.1f)\n", a[i].x, + a[i].y, a[i].z, a[i].w, b[i].x, b[i].y, b[i].z, b[i].w, c[i].x, c[i].y, c[i].z, c[i].w); + } +} diff --git a/samples/cpp/shared.comp b/samples/cpp/shared.comp new file mode 100644 index 0000000000..7d59060aa9 --- /dev/null +++ b/samples/cpp/shared.comp @@ -0,0 +1,36 @@ +#version 310 es +layout(local_size_x = 64) in; + +layout(set = 0, binding = 0, std430) readonly buffer SSBO0 +{ + float inputs[]; +}; + +layout(set = 0, binding = 1, std430) writeonly buffer SSBO1 +{ + float outputs[]; +}; + +shared float tmp[gl_WorkGroupSize.x]; + +void main() +{ + uint local = gl_LocalInvocationIndex; + uint work_group = gl_WorkGroupID.x; + + // Does a trivial parallel reduction through shared memory. + tmp[local] = inputs[work_group * gl_WorkGroupSize.x * 2u + local] + inputs[work_group * gl_WorkGroupSize.x * 2u + local + gl_WorkGroupSize.x]; + memoryBarrierShared(); + barrier(); + + for (uint limit = 32u; limit > 1u; limit >>= 1u) + { + if (local < limit) + tmp[local] = tmp[local] + tmp[local + limit]; + memoryBarrierShared(); + barrier(); + } + + if (local == 0u) + outputs[work_group] = tmp[0] + tmp[1]; +} diff --git a/samples/cpp/shared.cpp b/samples/cpp/shared.cpp new file mode 100644 index 0000000000..d5c7f2214d --- /dev/null +++ b/samples/cpp/shared.cpp @@ -0,0 +1,89 @@ +/* + * Copyright 2015-2016 ARM Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "spirv_cross/external_interface.h" +#include + +#ifndef GLM_SWIZZLE +#define GLM_SWIZZLE +#endif + +#ifndef GLM_FORCE_RADIANS +#define GLM_FORCE_RADIANS +#endif + +#include +using namespace glm; + +int main() +{ + // First, we get the C interface to the shader. + // This can be loaded from a dynamic library, or as here, + // linked in as a static library. + auto *iface = spirv_cross_get_interface(); + + // Create an instance of the shader interface. + auto *shader = iface->construct(); + +// Build some input data for our compute shader. +#define NUM_WORKGROUPS 4 + float a[128 * NUM_WORKGROUPS]; + float b[NUM_WORKGROUPS] = {}; + + for (int i = 0; i < 128 * NUM_WORKGROUPS; i++) + { + a[i] = float(i); + } + + void *aptr = a; + void *bptr = b; + + // Bind resources to the shader. + // For resources like samplers and buffers, we provide a list of pointers, + // since UBOs, SSBOs and samplers can be arrays, and can point to different types, + // which is especially true for samplers. + spirv_cross_set_resource(shader, 0, 0, &aptr, sizeof(aptr)); + spirv_cross_set_resource(shader, 0, 1, &bptr, sizeof(bptr)); + + // We also have to set builtins. + // The relevant builtins will depend on the shader, + // but for compute, there are few builtins, which are gl_NumWorkGroups and gl_WorkGroupID. + // LocalInvocationID and GlobalInvocationID are inferred when executing the invocation. + uvec3 num_workgroups(NUM_WORKGROUPS, 1, 1); + uvec3 work_group_id(0, 0, 0); + spirv_cross_set_builtin(shader, SPIRV_CROSS_BUILTIN_NUM_WORK_GROUPS, &num_workgroups, sizeof(num_workgroups)); + spirv_cross_set_builtin(shader, SPIRV_CROSS_BUILTIN_WORK_GROUP_ID, &work_group_id, sizeof(work_group_id)); + + // Execute 4 work groups. + for (unsigned i = 0; i < NUM_WORKGROUPS; i++) + { + work_group_id.x = i; + iface->invoke(shader); + } + + // Call destructor. + iface->destruct(shader); + + // Verify our output. + // TODO: Implement a test framework that asserts results computed. + for (unsigned i = 0; i < NUM_WORKGROUPS; i++) + { + float expected_sum = 0.0f; + for (unsigned j = i * 128; j < (i + 1) * 128; j++) + expected_sum += a[j]; + fprintf(stderr, "Sum in workgroup #%u = %.1f, expected %.1f\n", i, b[i], expected_sum); + } +} diff --git a/shaders/asm/comp/bitcast_iadd.asm.comp b/shaders/asm/comp/bitcast_iadd.asm.comp new file mode 100644 index 0000000000..3b31ab2851 --- /dev/null +++ b/shaders/asm/comp/bitcast_iadd.asm.comp @@ -0,0 +1,79 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 1 +; Bound: 30 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %func "main" + OpExecutionMode %func LocalSize 1 1 1 + OpSource ESSL 310 + OpSourceExtension "GL_GOOGLE_cpp_style_line_directive" + OpSourceExtension "GL_GOOGLE_include_directive" + OpMemberDecorate %input_struct 0 Offset 0 + OpMemberDecorate %input_struct 1 Offset 16 + OpMemberDecorate %output_struct 0 Offset 0 + OpMemberDecorate %output_struct 1 Offset 16 + OpDecorate %input_struct BufferBlock + OpDecorate %inputs DescriptorSet 0 + OpDecorate %inputs Binding 0 + OpDecorate %inputs Restrict + OpDecorate %output_struct BufferBlock + OpDecorate %outputs DescriptorSet 0 + OpDecorate %outputs Binding 1 + OpDecorate %outputs Restrict + + %void = OpTypeVoid + %main_func = OpTypeFunction %void + + %uint = OpTypeInt 32 0 + %uvec4 = OpTypeVector %uint 4 + + %int = OpTypeInt 32 1 + %ivec4 = OpTypeVector %int 4 + + %ivec4_ptr = OpTypePointer Uniform %ivec4 + %uvec4_ptr = OpTypePointer Uniform %uvec4 + + %zero = OpConstant %int 0 + %one = OpConstant %int 1 + + %input_struct = OpTypeStruct %ivec4 %uvec4 + %input_struct_ptr = OpTypePointer Uniform %input_struct + %inputs = OpVariable %input_struct_ptr Uniform + %output_struct = OpTypeStruct %uvec4 %ivec4 + %output_struct_ptr = OpTypePointer Uniform %output_struct + %outputs = OpVariable %output_struct_ptr Uniform + + %func = OpFunction %void None %main_func + %block = OpLabel + + %input1_ptr = OpAccessChain %ivec4_ptr %inputs %zero + %input0_ptr = OpAccessChain %uvec4_ptr %inputs %one + %input1 = OpLoad %ivec4 %input1_ptr + %input0 = OpLoad %uvec4 %input0_ptr + + %output_ptr_uvec4 = OpAccessChain %uvec4_ptr %outputs %zero + %output_ptr_ivec4 = OpAccessChain %ivec4_ptr %outputs %one + +; Test all variants of IAdd + %result_iadd_0 = OpIAdd %uvec4 %input0 %input1 + %result_iadd_1 = OpIAdd %uvec4 %input1 %input0 + %result_iadd_2 = OpIAdd %uvec4 %input0 %input0 + %result_iadd_3 = OpIAdd %uvec4 %input1 %input1 + %result_iadd_4 = OpIAdd %ivec4 %input0 %input0 + %result_iadd_5 = OpIAdd %ivec4 %input1 %input1 + %result_iadd_6 = OpIAdd %ivec4 %input0 %input1 + %result_iadd_7 = OpIAdd %ivec4 %input1 %input0 + OpStore %output_ptr_uvec4 %result_iadd_0 + OpStore %output_ptr_uvec4 %result_iadd_1 + OpStore %output_ptr_uvec4 %result_iadd_2 + OpStore %output_ptr_uvec4 %result_iadd_3 + OpStore %output_ptr_ivec4 %result_iadd_4 + OpStore %output_ptr_ivec4 %result_iadd_5 + OpStore %output_ptr_ivec4 %result_iadd_6 + OpStore %output_ptr_ivec4 %result_iadd_7 + + OpReturn + OpFunctionEnd diff --git a/shaders/asm/comp/bitcast_iequal.asm.comp b/shaders/asm/comp/bitcast_iequal.asm.comp new file mode 100644 index 0000000000..c98f52c5ad --- /dev/null +++ b/shaders/asm/comp/bitcast_iequal.asm.comp @@ -0,0 +1,90 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 1 +; Bound: 30 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %func "main" + OpExecutionMode %func LocalSize 1 1 1 + OpSource ESSL 310 + OpSourceExtension "GL_GOOGLE_cpp_style_line_directive" + OpSourceExtension "GL_GOOGLE_include_directive" + OpMemberDecorate %input_struct 0 Offset 0 + OpMemberDecorate %input_struct 1 Offset 16 + OpMemberDecorate %output_struct 0 Offset 0 + OpMemberDecorate %output_struct 1 Offset 16 + OpDecorate %input_struct BufferBlock + OpDecorate %inputs DescriptorSet 0 + OpDecorate %inputs Binding 0 + OpDecorate %output_struct BufferBlock + OpDecorate %outputs DescriptorSet 0 + OpDecorate %outputs Binding 1 + + %void = OpTypeVoid + %main_func = OpTypeFunction %void + + %uint = OpTypeInt 32 0 + %uvec4 = OpTypeVector %uint 4 + + %int = OpTypeInt 32 1 + %ivec4 = OpTypeVector %int 4 + %bool = OpTypeBool + %bvec4 = OpTypeVector %bool 4 + + %ivec4_ptr = OpTypePointer Uniform %ivec4 + %uvec4_ptr = OpTypePointer Uniform %uvec4 + + %zero = OpConstant %int 0 + %one = OpConstant %int 1 + %uone = OpConstant %uint 1 + %uzero = OpConstant %uint 0 + %uvec41 = OpConstantComposite %uvec4 %uone %uone %uone %uone + %ivec41 = OpConstantComposite %ivec4 %one %one %one %one + %uvec40 = OpConstantComposite %uvec4 %uzero %uzero %uzero %uzero + %ivec40 = OpConstantComposite %ivec4 %zero %zero %zero %zero + + %input_struct = OpTypeStruct %ivec4 %uvec4 + %input_struct_ptr = OpTypePointer Uniform %input_struct + %inputs = OpVariable %input_struct_ptr Uniform + %output_struct = OpTypeStruct %uvec4 %ivec4 + %output_struct_ptr = OpTypePointer Uniform %output_struct + %outputs = OpVariable %output_struct_ptr Uniform + + %func = OpFunction %void None %main_func + %block = OpLabel + + %input1_ptr = OpAccessChain %ivec4_ptr %inputs %zero + %input0_ptr = OpAccessChain %uvec4_ptr %inputs %one + %input1 = OpLoad %ivec4 %input1_ptr + %input0 = OpLoad %uvec4 %input0_ptr + + %output_ptr_uvec4 = OpAccessChain %uvec4_ptr %outputs %zero + %output_ptr_ivec4 = OpAccessChain %ivec4_ptr %outputs %one + +; Test all variants of IEqual + %result_iequal0 = OpIEqual %bvec4 %input0 %input1 + %result_iequal1 = OpIEqual %bvec4 %input1 %input0 + %result_iequal2 = OpIEqual %bvec4 %input0 %input0 + %result_iequal3 = OpIEqual %bvec4 %input1 %input1 + %result_0 = OpSelect %uvec4 %result_iequal0 %uvec41 %uvec40 + %result_1 = OpSelect %uvec4 %result_iequal1 %uvec41 %uvec40 + %result_2 = OpSelect %uvec4 %result_iequal2 %uvec41 %uvec40 + %result_3 = OpSelect %uvec4 %result_iequal3 %uvec41 %uvec40 + %result_4 = OpSelect %ivec4 %result_iequal0 %ivec41 %ivec40 + %result_5 = OpSelect %ivec4 %result_iequal1 %ivec41 %ivec40 + %result_6 = OpSelect %ivec4 %result_iequal2 %ivec41 %ivec40 + %result_7 = OpSelect %ivec4 %result_iequal3 %ivec41 %ivec40 + + OpStore %output_ptr_uvec4 %result_0 + OpStore %output_ptr_uvec4 %result_1 + OpStore %output_ptr_uvec4 %result_2 + OpStore %output_ptr_uvec4 %result_3 + OpStore %output_ptr_ivec4 %result_4 + OpStore %output_ptr_ivec4 %result_5 + OpStore %output_ptr_ivec4 %result_6 + OpStore %output_ptr_ivec4 %result_7 + + OpReturn + OpFunctionEnd diff --git a/shaders/asm/comp/bitcast_sar.asm.comp b/shaders/asm/comp/bitcast_sar.asm.comp new file mode 100644 index 0000000000..64f19fc349 --- /dev/null +++ b/shaders/asm/comp/bitcast_sar.asm.comp @@ -0,0 +1,77 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 1 +; Bound: 30 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %func "main" + OpExecutionMode %func LocalSize 1 1 1 + OpSource ESSL 310 + OpSourceExtension "GL_GOOGLE_cpp_style_line_directive" + OpSourceExtension "GL_GOOGLE_include_directive" + OpMemberDecorate %input_struct 0 Offset 0 + OpMemberDecorate %input_struct 1 Offset 16 + OpMemberDecorate %output_struct 0 Offset 0 + OpMemberDecorate %output_struct 1 Offset 16 + OpDecorate %input_struct BufferBlock + OpDecorate %inputs DescriptorSet 0 + OpDecorate %inputs Binding 0 + OpDecorate %output_struct BufferBlock + OpDecorate %outputs DescriptorSet 0 + OpDecorate %outputs Binding 1 + + %void = OpTypeVoid + %main_func = OpTypeFunction %void + + %uint = OpTypeInt 32 0 + %uvec4 = OpTypeVector %uint 4 + + %int = OpTypeInt 32 1 + %ivec4 = OpTypeVector %int 4 + + %ivec4_ptr = OpTypePointer Uniform %ivec4 + %uvec4_ptr = OpTypePointer Uniform %uvec4 + + %zero = OpConstant %int 0 + %one = OpConstant %int 1 + + %input_struct = OpTypeStruct %ivec4 %uvec4 + %input_struct_ptr = OpTypePointer Uniform %input_struct + %inputs = OpVariable %input_struct_ptr Uniform + %output_struct = OpTypeStruct %uvec4 %ivec4 + %output_struct_ptr = OpTypePointer Uniform %output_struct + %outputs = OpVariable %output_struct_ptr Uniform + + %func = OpFunction %void None %main_func + %block = OpLabel + + %input1_ptr = OpAccessChain %ivec4_ptr %inputs %zero + %input0_ptr = OpAccessChain %uvec4_ptr %inputs %one + %input1 = OpLoad %ivec4 %input1_ptr + %input0 = OpLoad %uvec4 %input0_ptr + + %output_ptr_uvec4 = OpAccessChain %uvec4_ptr %outputs %zero + %output_ptr_ivec4 = OpAccessChain %ivec4_ptr %outputs %one + +; Test all variants of ShiftRightArithmetic + %result_iadd_0 = OpShiftRightArithmetic %uvec4 %input0 %input1 + %result_iadd_1 = OpShiftRightArithmetic %uvec4 %input1 %input0 + %result_iadd_2 = OpShiftRightArithmetic %uvec4 %input0 %input0 + %result_iadd_3 = OpShiftRightArithmetic %uvec4 %input1 %input1 + %result_iadd_4 = OpShiftRightArithmetic %ivec4 %input0 %input0 + %result_iadd_5 = OpShiftRightArithmetic %ivec4 %input1 %input1 + %result_iadd_6 = OpShiftRightArithmetic %ivec4 %input0 %input1 + %result_iadd_7 = OpShiftRightArithmetic %ivec4 %input1 %input0 + OpStore %output_ptr_uvec4 %result_iadd_0 + OpStore %output_ptr_uvec4 %result_iadd_1 + OpStore %output_ptr_uvec4 %result_iadd_2 + OpStore %output_ptr_uvec4 %result_iadd_3 + OpStore %output_ptr_ivec4 %result_iadd_4 + OpStore %output_ptr_ivec4 %result_iadd_5 + OpStore %output_ptr_ivec4 %result_iadd_6 + OpStore %output_ptr_ivec4 %result_iadd_7 + + OpReturn + OpFunctionEnd diff --git a/shaders/asm/comp/bitcast_sdiv.asm.comp b/shaders/asm/comp/bitcast_sdiv.asm.comp new file mode 100644 index 0000000000..ab73ec83df --- /dev/null +++ b/shaders/asm/comp/bitcast_sdiv.asm.comp @@ -0,0 +1,77 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 1 +; Bound: 30 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %func "main" + OpExecutionMode %func LocalSize 1 1 1 + OpSource ESSL 310 + OpSourceExtension "GL_GOOGLE_cpp_style_line_directive" + OpSourceExtension "GL_GOOGLE_include_directive" + OpMemberDecorate %input_struct 0 Offset 0 + OpMemberDecorate %input_struct 1 Offset 16 + OpMemberDecorate %output_struct 0 Offset 0 + OpMemberDecorate %output_struct 1 Offset 16 + OpDecorate %input_struct BufferBlock + OpDecorate %inputs DescriptorSet 0 + OpDecorate %inputs Binding 0 + OpDecorate %output_struct BufferBlock + OpDecorate %outputs DescriptorSet 0 + OpDecorate %outputs Binding 1 + + %void = OpTypeVoid + %main_func = OpTypeFunction %void + + %uint = OpTypeInt 32 0 + %uvec4 = OpTypeVector %uint 4 + + %int = OpTypeInt 32 1 + %ivec4 = OpTypeVector %int 4 + + %ivec4_ptr = OpTypePointer Uniform %ivec4 + %uvec4_ptr = OpTypePointer Uniform %uvec4 + + %zero = OpConstant %int 0 + %one = OpConstant %int 1 + + %input_struct = OpTypeStruct %ivec4 %uvec4 + %input_struct_ptr = OpTypePointer Uniform %input_struct + %inputs = OpVariable %input_struct_ptr Uniform + %output_struct = OpTypeStruct %uvec4 %ivec4 + %output_struct_ptr = OpTypePointer Uniform %output_struct + %outputs = OpVariable %output_struct_ptr Uniform + + %func = OpFunction %void None %main_func + %block = OpLabel + + %input1_ptr = OpAccessChain %ivec4_ptr %inputs %zero + %input0_ptr = OpAccessChain %uvec4_ptr %inputs %one + %input1 = OpLoad %ivec4 %input1_ptr + %input0 = OpLoad %uvec4 %input0_ptr + + %output_ptr_uvec4 = OpAccessChain %uvec4_ptr %outputs %zero + %output_ptr_ivec4 = OpAccessChain %ivec4_ptr %outputs %one + +; Test all variants of SDiv + %result_iadd_0 = OpSDiv %uvec4 %input0 %input1 + %result_iadd_1 = OpSDiv %uvec4 %input1 %input0 + %result_iadd_2 = OpSDiv %uvec4 %input0 %input0 + %result_iadd_3 = OpSDiv %uvec4 %input1 %input1 + %result_iadd_4 = OpSDiv %ivec4 %input0 %input0 + %result_iadd_5 = OpSDiv %ivec4 %input1 %input1 + %result_iadd_6 = OpSDiv %ivec4 %input0 %input1 + %result_iadd_7 = OpSDiv %ivec4 %input1 %input0 + OpStore %output_ptr_uvec4 %result_iadd_0 + OpStore %output_ptr_uvec4 %result_iadd_1 + OpStore %output_ptr_uvec4 %result_iadd_2 + OpStore %output_ptr_uvec4 %result_iadd_3 + OpStore %output_ptr_ivec4 %result_iadd_4 + OpStore %output_ptr_ivec4 %result_iadd_5 + OpStore %output_ptr_ivec4 %result_iadd_6 + OpStore %output_ptr_ivec4 %result_iadd_7 + + OpReturn + OpFunctionEnd diff --git a/shaders/asm/comp/bitcast_slr.asm.comp b/shaders/asm/comp/bitcast_slr.asm.comp new file mode 100644 index 0000000000..6741f5cb58 --- /dev/null +++ b/shaders/asm/comp/bitcast_slr.asm.comp @@ -0,0 +1,77 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 1 +; Bound: 30 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %func "main" + OpExecutionMode %func LocalSize 1 1 1 + OpSource ESSL 310 + OpSourceExtension "GL_GOOGLE_cpp_style_line_directive" + OpSourceExtension "GL_GOOGLE_include_directive" + OpMemberDecorate %input_struct 0 Offset 0 + OpMemberDecorate %input_struct 1 Offset 16 + OpMemberDecorate %output_struct 0 Offset 0 + OpMemberDecorate %output_struct 1 Offset 16 + OpDecorate %input_struct BufferBlock + OpDecorate %inputs DescriptorSet 0 + OpDecorate %inputs Binding 0 + OpDecorate %output_struct BufferBlock + OpDecorate %outputs DescriptorSet 0 + OpDecorate %outputs Binding 1 + + %void = OpTypeVoid + %main_func = OpTypeFunction %void + + %uint = OpTypeInt 32 0 + %uvec4 = OpTypeVector %uint 4 + + %int = OpTypeInt 32 1 + %ivec4 = OpTypeVector %int 4 + + %ivec4_ptr = OpTypePointer Uniform %ivec4 + %uvec4_ptr = OpTypePointer Uniform %uvec4 + + %zero = OpConstant %int 0 + %one = OpConstant %int 1 + + %input_struct = OpTypeStruct %ivec4 %uvec4 + %input_struct_ptr = OpTypePointer Uniform %input_struct + %inputs = OpVariable %input_struct_ptr Uniform + %output_struct = OpTypeStruct %uvec4 %ivec4 + %output_struct_ptr = OpTypePointer Uniform %output_struct + %outputs = OpVariable %output_struct_ptr Uniform + + %func = OpFunction %void None %main_func + %block = OpLabel + + %input1_ptr = OpAccessChain %ivec4_ptr %inputs %zero + %input0_ptr = OpAccessChain %uvec4_ptr %inputs %one + %input1 = OpLoad %ivec4 %input1_ptr + %input0 = OpLoad %uvec4 %input0_ptr + + %output_ptr_uvec4 = OpAccessChain %uvec4_ptr %outputs %zero + %output_ptr_ivec4 = OpAccessChain %ivec4_ptr %outputs %one + +; Test all variants of ShiftRightLogical + %result_iadd_0 = OpShiftRightLogical %uvec4 %input0 %input1 + %result_iadd_1 = OpShiftRightLogical %uvec4 %input1 %input0 + %result_iadd_2 = OpShiftRightLogical %uvec4 %input0 %input0 + %result_iadd_3 = OpShiftRightLogical %uvec4 %input1 %input1 + %result_iadd_4 = OpShiftRightLogical %ivec4 %input0 %input0 + %result_iadd_5 = OpShiftRightLogical %ivec4 %input1 %input1 + %result_iadd_6 = OpShiftRightLogical %ivec4 %input0 %input1 + %result_iadd_7 = OpShiftRightLogical %ivec4 %input1 %input0 + OpStore %output_ptr_uvec4 %result_iadd_0 + OpStore %output_ptr_uvec4 %result_iadd_1 + OpStore %output_ptr_uvec4 %result_iadd_2 + OpStore %output_ptr_uvec4 %result_iadd_3 + OpStore %output_ptr_ivec4 %result_iadd_4 + OpStore %output_ptr_ivec4 %result_iadd_5 + OpStore %output_ptr_ivec4 %result_iadd_6 + OpStore %output_ptr_ivec4 %result_iadd_7 + + OpReturn + OpFunctionEnd diff --git a/shaders/asm/comp/bitcast_udiv.asm.comp b/shaders/asm/comp/bitcast_udiv.asm.comp new file mode 100644 index 0000000000..090f37e8d3 --- /dev/null +++ b/shaders/asm/comp/bitcast_udiv.asm.comp @@ -0,0 +1,77 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 1 +; Bound: 30 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %func "main" + OpExecutionMode %func LocalSize 1 1 1 + OpSource ESSL 310 + OpSourceExtension "GL_GOOGLE_cpp_style_line_directive" + OpSourceExtension "GL_GOOGLE_include_directive" + OpMemberDecorate %input_struct 0 Offset 0 + OpMemberDecorate %input_struct 1 Offset 16 + OpMemberDecorate %output_struct 0 Offset 0 + OpMemberDecorate %output_struct 1 Offset 16 + OpDecorate %input_struct BufferBlock + OpDecorate %inputs DescriptorSet 0 + OpDecorate %inputs Binding 0 + OpDecorate %output_struct BufferBlock + OpDecorate %outputs DescriptorSet 0 + OpDecorate %outputs Binding 1 + + %void = OpTypeVoid + %main_func = OpTypeFunction %void + + %uint = OpTypeInt 32 0 + %uvec4 = OpTypeVector %uint 4 + + %int = OpTypeInt 32 1 + %ivec4 = OpTypeVector %int 4 + + %ivec4_ptr = OpTypePointer Uniform %ivec4 + %uvec4_ptr = OpTypePointer Uniform %uvec4 + + %zero = OpConstant %int 0 + %one = OpConstant %int 1 + + %input_struct = OpTypeStruct %ivec4 %uvec4 + %input_struct_ptr = OpTypePointer Uniform %input_struct + %inputs = OpVariable %input_struct_ptr Uniform + %output_struct = OpTypeStruct %uvec4 %ivec4 + %output_struct_ptr = OpTypePointer Uniform %output_struct + %outputs = OpVariable %output_struct_ptr Uniform + + %func = OpFunction %void None %main_func + %block = OpLabel + + %input1_ptr = OpAccessChain %ivec4_ptr %inputs %zero + %input0_ptr = OpAccessChain %uvec4_ptr %inputs %one + %input1 = OpLoad %ivec4 %input1_ptr + %input0 = OpLoad %uvec4 %input0_ptr + + %output_ptr_uvec4 = OpAccessChain %uvec4_ptr %outputs %zero + %output_ptr_ivec4 = OpAccessChain %ivec4_ptr %outputs %one + +; Test all variants of UDiv + %result_iadd_0 = OpUDiv %uvec4 %input0 %input1 + %result_iadd_1 = OpUDiv %uvec4 %input1 %input0 + %result_iadd_2 = OpUDiv %uvec4 %input0 %input0 + %result_iadd_3 = OpUDiv %uvec4 %input1 %input1 + %result_iadd_4 = OpUDiv %ivec4 %input0 %input0 + %result_iadd_5 = OpUDiv %ivec4 %input1 %input1 + %result_iadd_6 = OpUDiv %ivec4 %input0 %input1 + %result_iadd_7 = OpUDiv %ivec4 %input1 %input0 + OpStore %output_ptr_uvec4 %result_iadd_0 + OpStore %output_ptr_uvec4 %result_iadd_1 + OpStore %output_ptr_uvec4 %result_iadd_2 + OpStore %output_ptr_uvec4 %result_iadd_3 + OpStore %output_ptr_ivec4 %result_iadd_4 + OpStore %output_ptr_ivec4 %result_iadd_5 + OpStore %output_ptr_ivec4 %result_iadd_6 + OpStore %output_ptr_ivec4 %result_iadd_7 + + OpReturn + OpFunctionEnd diff --git a/shaders/asm/comp/multiple-entry.asm.comp b/shaders/asm/comp/multiple-entry.asm.comp new file mode 100644 index 0000000000..0cfb5543d1 --- /dev/null +++ b/shaders/asm/comp/multiple-entry.asm.comp @@ -0,0 +1,97 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 1 +; Bound: 30 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %func_alt "main2" %frag_in %frag_out + OpEntryPoint GLCompute %func "main" + OpExecutionMode %func LocalSize 1 1 1 + OpSource ESSL 310 + OpSourceExtension "GL_GOOGLE_cpp_style_line_directive" + OpSourceExtension "GL_GOOGLE_include_directive" + OpMemberDecorate %input_struct 0 Offset 0 + OpMemberDecorate %input_struct 1 Offset 16 + OpMemberDecorate %output_struct 0 Offset 0 + OpMemberDecorate %output_struct 1 Offset 16 + OpDecorate %input_struct BufferBlock + OpDecorate %inputs DescriptorSet 0 + OpDecorate %inputs Binding 0 + OpDecorate %inputs Restrict + OpDecorate %output_struct BufferBlock + OpDecorate %outputs DescriptorSet 0 + OpDecorate %outputs Binding 1 + OpDecorate %outputs Restrict + OpDecorate %frag_in Location 0 + OpDecorate %frag_out Location 0 + + %void = OpTypeVoid + %main_func = OpTypeFunction %void + + %uint = OpTypeInt 32 0 + %uvec4 = OpTypeVector %uint 4 + + %int = OpTypeInt 32 1 + %ivec4 = OpTypeVector %int 4 + + %ivec4_ptr = OpTypePointer Uniform %ivec4 + %uvec4_ptr = OpTypePointer Uniform %uvec4 + + %float = OpTypeFloat 32 + %vec4 = OpTypeVector %float 4 + %vec4_input_ptr = OpTypePointer Input %vec4 + %vec4_output_ptr = OpTypePointer Output %vec4 + + %zero = OpConstant %int 0 + %one = OpConstant %int 1 + + %input_struct = OpTypeStruct %ivec4 %uvec4 + %input_struct_ptr = OpTypePointer Uniform %input_struct + %inputs = OpVariable %input_struct_ptr Uniform + %output_struct = OpTypeStruct %uvec4 %ivec4 + %output_struct_ptr = OpTypePointer Uniform %output_struct + %outputs = OpVariable %output_struct_ptr Uniform + + %frag_in = OpVariable %vec4_input_ptr Input + %frag_out = OpVariable %vec4_output_ptr Output + + %func = OpFunction %void None %main_func + %block = OpLabel + + %input1_ptr = OpAccessChain %ivec4_ptr %inputs %zero + %input0_ptr = OpAccessChain %uvec4_ptr %inputs %one + %input1 = OpLoad %ivec4 %input1_ptr + %input0 = OpLoad %uvec4 %input0_ptr + + %output_ptr_uvec4 = OpAccessChain %uvec4_ptr %outputs %zero + %output_ptr_ivec4 = OpAccessChain %ivec4_ptr %outputs %one + +; Test all variants of IAdd + %result_iadd_0 = OpIAdd %uvec4 %input0 %input1 + %result_iadd_1 = OpIAdd %uvec4 %input1 %input0 + %result_iadd_2 = OpIAdd %uvec4 %input0 %input0 + %result_iadd_3 = OpIAdd %uvec4 %input1 %input1 + %result_iadd_4 = OpIAdd %ivec4 %input0 %input0 + %result_iadd_5 = OpIAdd %ivec4 %input1 %input1 + %result_iadd_6 = OpIAdd %ivec4 %input0 %input1 + %result_iadd_7 = OpIAdd %ivec4 %input1 %input0 + OpStore %output_ptr_uvec4 %result_iadd_0 + OpStore %output_ptr_uvec4 %result_iadd_1 + OpStore %output_ptr_uvec4 %result_iadd_2 + OpStore %output_ptr_uvec4 %result_iadd_3 + OpStore %output_ptr_ivec4 %result_iadd_4 + OpStore %output_ptr_ivec4 %result_iadd_5 + OpStore %output_ptr_ivec4 %result_iadd_6 + OpStore %output_ptr_ivec4 %result_iadd_7 + + OpReturn + OpFunctionEnd + + %func_alt = OpFunction %void None %main_func + %block_alt = OpLabel + %frag_input_value = OpLoad %vec4 %frag_in + OpStore %frag_out %frag_input_value + OpReturn + OpFunctionEnd diff --git a/shaders/asm/comp/name-alias.asm.invalid.comp b/shaders/asm/comp/name-alias.asm.invalid.comp new file mode 100644 index 0000000000..f9bc6dbb67 --- /dev/null +++ b/shaders/asm/comp/name-alias.asm.invalid.comp @@ -0,0 +1,124 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 1 +; Bound: 48 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %4 "main" + OpExecutionMode %4 LocalSize 1 1 1 + OpSource ESSL 310 + OpName %4 "alias" + OpName %15 "alias" + OpMemberName %15 0 "alias" + OpName %18 "alias" + OpMemberName %18 0 "alias" + OpMemberName %18 1 "alias" + OpMemberName %18 2 "alias" + OpName %19 "alias" + OpMemberName %19 0 "alias" + OpMemberName %19 1 "alias" + OpName %21 "alias" + OpName %24 "alias" + OpMemberName %24 0 "alias" + OpName %26 "alias" + OpMemberName %26 0 "alias" + OpMemberName %26 1 "alias" + OpMemberName %26 2 "alias" + OpName %27 "alias" + OpMemberName %27 0 "alias" + OpMemberName %27 1 "alias" + OpName %28 "alias" + OpMemberName %28 0 "alias" + OpName %30 "alias" + OpName %38 "alias" + OpMemberName %38 0 "alias" + OpName %40 "alias" + OpMemberName %40 0 "alias" + OpMemberName %40 1 "alias" + OpMemberName %40 2 "alias" + OpName %41 "alias" + OpMemberName %41 0 "alias" + OpMemberName %41 1 "alias" + OpName %42 "alias" + OpMemberName %42 0 "alias" + OpName %44 "alias" + OpDecorate %22 ArrayStride 8 + OpDecorate %23 ArrayStride 16 + OpMemberDecorate %24 0 Offset 0 + OpDecorate %25 ArrayStride 1600 + OpMemberDecorate %26 0 Offset 0 + OpMemberDecorate %26 1 Offset 16 + OpMemberDecorate %26 2 Offset 96 + OpMemberDecorate %27 0 Offset 0 + OpMemberDecorate %27 1 Offset 16 + OpMemberDecorate %28 0 Offset 0 + OpDecorate %28 BufferBlock + OpDecorate %30 DescriptorSet 0 + OpDecorate %30 Binding 0 + OpDecorate %36 ArrayStride 16 + OpDecorate %37 ArrayStride 16 + OpMemberDecorate %38 0 Offset 0 + OpDecorate %39 ArrayStride 1600 + OpMemberDecorate %40 0 Offset 0 + OpMemberDecorate %40 1 Offset 16 + OpMemberDecorate %40 2 Offset 176 + OpMemberDecorate %41 0 Offset 0 + OpMemberDecorate %41 1 Offset 16 + OpMemberDecorate %42 0 Offset 0 + OpDecorate %42 BufferBlock + OpDecorate %44 DescriptorSet 0 + OpDecorate %44 Binding 1 + %2 = OpTypeVoid + %3 = OpTypeFunction %2 + %6 = OpTypeFloat 32 + %7 = OpTypeVector %6 4 + %8 = OpTypeVector %6 2 + %9 = OpTypeInt 32 0 + %10 = OpConstant %9 10 + %11 = OpTypeArray %8 %10 + %12 = OpTypeVector %6 3 + %13 = OpConstant %9 100 + %14 = OpTypeArray %12 %13 + %15 = OpTypeStruct %14 + %16 = OpConstant %9 2 + %17 = OpTypeArray %15 %16 + %18 = OpTypeStruct %7 %11 %17 + %19 = OpTypeStruct %7 %18 + %20 = OpTypePointer Function %19 + %22 = OpTypeArray %8 %10 + %23 = OpTypeArray %12 %13 + %24 = OpTypeStruct %23 + %25 = OpTypeArray %24 %16 + %26 = OpTypeStruct %7 %22 %25 + %27 = OpTypeStruct %7 %26 + %28 = OpTypeStruct %27 + %29 = OpTypePointer Uniform %28 + %30 = OpVariable %29 Uniform + %31 = OpTypeInt 32 1 + %32 = OpConstant %31 0 + %33 = OpTypePointer Uniform %27 + %36 = OpTypeArray %8 %10 + %37 = OpTypeArray %12 %13 + %38 = OpTypeStruct %37 + %39 = OpTypeArray %38 %16 + %40 = OpTypeStruct %7 %36 %39 + %41 = OpTypeStruct %7 %40 + %42 = OpTypeStruct %41 + %43 = OpTypePointer Uniform %42 + %44 = OpVariable %43 Uniform + %46 = OpTypePointer Uniform %41 + %4 = OpFunction %2 None %3 + %5 = OpLabel + %21 = OpVariable %20 Function + %34 = OpAccessChain %33 %30 %32 + %35 = OpLoad %27 %34 +; This shader has an illegal aliased store for testing purposes. spirv-val is not run for this shader. + OpStore %21 %35 + %45 = OpLoad %19 %21 + %47 = OpAccessChain %46 %44 %32 +; This shader has an illegal aliased store for testing purposes. spirv-val is not run for this shader. + OpStore %47 %45 + OpReturn + OpFunctionEnd diff --git a/shaders/asm/comp/quantize.asm.comp b/shaders/asm/comp/quantize.asm.comp new file mode 100644 index 0000000000..f5afc6570c --- /dev/null +++ b/shaders/asm/comp/quantize.asm.comp @@ -0,0 +1,67 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 1 +; Bound: 38 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %4 "main" + OpExecutionMode %4 LocalSize 1 1 1 + OpSource ESSL 310 + OpName %4 "main" + OpName %10 "SSBO0" + OpMemberName %10 0 "scalar" + OpMemberName %10 1 "vec2_val" + OpMemberName %10 2 "vec3_val" + OpMemberName %10 3 "vec4_val" + OpName %12 "" + OpMemberDecorate %10 0 Offset 0 + OpMemberDecorate %10 1 Offset 8 + OpMemberDecorate %10 2 Offset 16 + OpMemberDecorate %10 3 Offset 32 + OpDecorate %10 BufferBlock + OpDecorate %12 DescriptorSet 0 + OpDecorate %12 Binding 0 + %2 = OpTypeVoid + %3 = OpTypeFunction %2 + %6 = OpTypeFloat 32 + %7 = OpTypeVector %6 2 + %8 = OpTypeVector %6 3 + %9 = OpTypeVector %6 4 + %10 = OpTypeStruct %6 %7 %8 %9 + %11 = OpTypePointer Uniform %10 + %12 = OpVariable %11 Uniform + %13 = OpTypeInt 32 1 + %14 = OpConstant %13 0 + %15 = OpTypePointer Uniform %6 + %20 = OpConstant %13 1 + %21 = OpTypePointer Uniform %7 + %26 = OpConstant %13 2 + %27 = OpTypePointer Uniform %8 + %32 = OpConstant %13 3 + %33 = OpTypePointer Uniform %9 + %4 = OpFunction %2 None %3 + %5 = OpLabel + %16 = OpAccessChain %15 %12 %14 + %17 = OpLoad %6 %16 + %18 = OpQuantizeToF16 %6 %17 + %19 = OpAccessChain %15 %12 %14 + OpStore %19 %18 + %22 = OpAccessChain %21 %12 %20 + %23 = OpLoad %7 %22 + %24 = OpQuantizeToF16 %7 %23 + %25 = OpAccessChain %21 %12 %20 + OpStore %25 %24 + %28 = OpAccessChain %27 %12 %26 + %29 = OpLoad %8 %28 + %30 = OpQuantizeToF16 %8 %29 + %31 = OpAccessChain %27 %12 %26 + OpStore %31 %30 + %34 = OpAccessChain %33 %12 %32 + %35 = OpLoad %9 %34 + %36 = OpQuantizeToF16 %9 %35 + %37 = OpAccessChain %33 %12 %32 + OpStore %37 %36 + OpReturn + OpFunctionEnd diff --git a/shaders/asm/frag/invalidation.asm.frag b/shaders/asm/frag/invalidation.asm.frag new file mode 100644 index 0000000000..1c171b6d27 --- /dev/null +++ b/shaders/asm/frag/invalidation.asm.frag @@ -0,0 +1,43 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 1 +; Bound: 28 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %4 "main" %v0 %v1 %FragColor + OpExecutionMode %4 OriginUpperLeft + OpSource GLSL 450 + OpName %4 "main" + OpName %a "a" + OpName %v0 "v0" + OpName %b "b" + OpName %v1 "v1" + OpName %FragColor "FragColor" + %2 = OpTypeVoid + %3 = OpTypeFunction %2 + %float = OpTypeFloat 32 + %pfloat = OpTypePointer Function %float + %9 = OpTypePointer Input %float + %v0 = OpVariable %9 Input + %v1 = OpVariable %9 Input + %25 = OpTypePointer Output %float + %FragColor = OpVariable %25 Output + %4 = OpFunction %2 None %3 + %5 = OpLabel + %a = OpVariable %pfloat Function + %b = OpVariable %pfloat Function + %v0_tmp = OpLoad %float %v0 + %v1_tmp = OpLoad %float %v1 + OpStore %a %v0_tmp + OpStore %b %v1_tmp + + %a_tmp = OpLoad %float %a + %b_tmp = OpLoad %float %b + %res = OpFAdd %float %a_tmp %b_tmp + %res1 = OpFMul %float %res %b_tmp + OpStore %a %v1_tmp + OpStore %FragColor %res1 + OpReturn + OpFunctionEnd diff --git a/shaders/comp/atomic.comp b/shaders/comp/atomic.comp new file mode 100644 index 0000000000..703256d879 --- /dev/null +++ b/shaders/comp/atomic.comp @@ -0,0 +1,56 @@ +#version 310 es +#extension GL_OES_shader_image_atomic : require +layout(local_size_x = 1) in; + +layout(r32ui, binding = 0) uniform highp uimage2D uImage; +layout(r32i, binding = 1) uniform highp iimage2D iImage; +layout(binding = 2, std430) buffer SSBO +{ + uint u32; + int i32; +} ssbo; + +void main() +{ + imageAtomicAdd(uImage, ivec2(1, 5), 1u); + + // Test that we do not invalidate OpImage variables which are loaded from UniformConstant + // address space. + imageStore(iImage, ivec2(1, 6), ivec4(imageAtomicAdd(uImage, ivec2(1, 5), 1u))); + + imageAtomicOr(uImage, ivec2(1, 5), 1u); + imageAtomicXor(uImage, ivec2(1, 5), 1u); + imageAtomicAnd(uImage, ivec2(1, 5), 1u); + imageAtomicMin(uImage, ivec2(1, 5), 1u); + imageAtomicMax(uImage, ivec2(1, 5), 1u); + //imageAtomicExchange(uImage, ivec2(1, 5), 1u); + imageAtomicCompSwap(uImage, ivec2(1, 5), 10u, 2u); + + imageAtomicAdd(iImage, ivec2(1, 6), 1); + imageAtomicOr(iImage, ivec2(1, 6), 1); + imageAtomicXor(iImage, ivec2(1, 6), 1); + imageAtomicAnd(iImage, ivec2(1, 6), 1); + imageAtomicMin(iImage, ivec2(1, 6), 1); + imageAtomicMax(iImage, ivec2(1, 6), 1); + //imageAtomicExchange(iImage, ivec2(1, 5), 1u); + imageAtomicCompSwap(iImage, ivec2(1, 5), 10, 2); + + atomicAdd(ssbo.u32, 1u); + atomicOr(ssbo.u32, 1u); + atomicXor(ssbo.u32, 1u); + atomicAnd(ssbo.u32, 1u); + atomicMin(ssbo.u32, 1u); + atomicMax(ssbo.u32, 1u); + atomicExchange(ssbo.u32, 1u); + atomicCompSwap(ssbo.u32, 10u, 2u); + + atomicAdd(ssbo.i32, 1); + atomicOr(ssbo.i32, 1); + atomicXor(ssbo.i32, 1); + atomicAnd(ssbo.i32, 1); + atomicMin(ssbo.i32, 1); + atomicMax(ssbo.i32, 1); + atomicExchange(ssbo.i32, 1); + atomicCompSwap(ssbo.i32, 10, 2); +} + diff --git a/shaders/comp/bake_gradient.comp b/shaders/comp/bake_gradient.comp new file mode 100644 index 0000000000..4885ff00bc --- /dev/null +++ b/shaders/comp/bake_gradient.comp @@ -0,0 +1,55 @@ +#version 310 es + +layout(local_size_x = 8, local_size_y = 8) in; + +layout(binding = 0) uniform sampler2D uHeight; +layout(binding = 1) uniform sampler2D uDisplacement; +layout(rgba16f, binding = 2) uniform writeonly mediump image2D iHeightDisplacement; +layout(rgba16f, binding = 3) uniform writeonly mediump image2D iGradJacobian; + +layout(binding = 4) uniform UBO +{ + vec4 uInvSize; + vec4 uScale; +}; + +mediump float jacobian(mediump vec2 dDdx, mediump vec2 dDdy) +{ + return (1.0 + dDdx.x) * (1.0 + dDdy.y) - dDdx.y * dDdy.x; +} +#define LAMBDA 1.2 + +void main() +{ + vec4 uv = (vec2(gl_GlobalInvocationID.xy) * uInvSize.xy).xyxy + 0.5 * uInvSize; + + float h = textureLod(uHeight, uv.xy, 0.0).x; + + // Compute the heightmap gradient by simple differentiation. + float x0 = textureLodOffset(uHeight, uv.xy, 0.0, ivec2(-1, 0)).x; + float x1 = textureLodOffset(uHeight, uv.xy, 0.0, ivec2(+1, 0)).x; + float y0 = textureLodOffset(uHeight, uv.xy, 0.0, ivec2(0, -1)).x; + float y1 = textureLodOffset(uHeight, uv.xy, 0.0, ivec2(0, +1)).x; + vec2 grad = uScale.xy * 0.5 * vec2(x1 - x0, y1 - y0); + + // Displacement map must be sampled with a different offset since it's a smaller texture. + vec2 displacement = LAMBDA * textureLod(uDisplacement, uv.zw, 0.0).xy; + + // Compute jacobian. + vec2 dDdx = 0.5 * LAMBDA * ( + textureLodOffset(uDisplacement, uv.zw, 0.0, ivec2(+1, 0)).xy - + textureLodOffset(uDisplacement, uv.zw, 0.0, ivec2(-1, 0)).xy); + vec2 dDdy = 0.5 * LAMBDA * ( + textureLodOffset(uDisplacement, uv.zw, 0.0, ivec2(0, +1)).xy - + textureLodOffset(uDisplacement, uv.zw, 0.0, ivec2(0, -1)).xy); + float j = jacobian(dDdx * uScale.z, dDdy * uScale.z); + + displacement = vec2(0.0); + + // Read by vertex shader/tess shader. + imageStore(iHeightDisplacement, ivec2(gl_GlobalInvocationID.xy), vec4(h, displacement, 0.0)); + + // Read by fragment shader. + imageStore(iGradJacobian, ivec2(gl_GlobalInvocationID.xy), vec4(grad, j, 0.0)); +} + diff --git a/shaders/comp/basic.comp b/shaders/comp/basic.comp new file mode 100644 index 0000000000..f9bf55670f --- /dev/null +++ b/shaders/comp/basic.comp @@ -0,0 +1,28 @@ +#version 310 es +layout(local_size_x = 1) in; + +layout(std430, binding = 0) readonly buffer SSBO +{ + vec4 in_data[]; +}; + +layout(std430, binding = 1) writeonly buffer SSBO2 +{ + vec4 out_data[]; +}; + +layout(std430, binding = 2) buffer SSBO3 +{ + uint counter; +}; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + vec4 idata = in_data[ident]; + if (dot(idata, vec4(1.0, 5.0, 6.0, 2.0)) > 8.2) + { + out_data[atomicAdd(counter, 1u)] = idata; + } +} + diff --git a/shaders/comp/casts.comp b/shaders/comp/casts.comp new file mode 100644 index 0000000000..6be539d7be --- /dev/null +++ b/shaders/comp/casts.comp @@ -0,0 +1,18 @@ +#version 310 es +layout(local_size_x = 1) in; + +layout(binding = 0, std430) buffer SSBO0 +{ + ivec4 inputs[]; +}; + +layout(binding = 1, std430) buffer SSBO1 +{ + ivec4 outputs[]; +}; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + outputs[ident] = ivec4(bvec4(inputs[ident] & 0x3)); +} diff --git a/shaders/comp/cfg.comp b/shaders/comp/cfg.comp new file mode 100644 index 0000000000..4f4e6c0ea8 --- /dev/null +++ b/shaders/comp/cfg.comp @@ -0,0 +1,91 @@ +#version 310 es +layout(local_size_x = 1) in; + +layout(std430, binding = 0) buffer SSBO +{ + float data; +}; + +void test() +{ + // Test that variables local to a scope stay local. + if (data != 0.0) + { + float tmp = 10.0; + data = tmp; + } + else + { + float tmp = 15.0; + data = tmp; + } + + // Test that variable access propagates up to dominator + if (data != 0.0) + { + float e; + if (data != 5.0) + { + if (data != 6.0) + e = 10.0; + } + else + e = 20.0; + } + + // Test that variables local to a switch block stay local. + switch (int(data)) + { + case 0: + { + float tmp = 20.0; + data = tmp; + break; + } + + case 1: + { + float tmp = 30.0; + data = tmp; + break; + } + } + + // Check that multibranches propagate up to dominator. + float f; + switch (int(data)) + { + case 0: + { + f = 30.0; + break; + } + + case 1: + { + f = 40.0; + break; + } + } + + // Check that loops work. + // Interesting case here is propagating variable access from the continue block. + float h; + for (int i = 0; i < 20; i++, h += 10.0) + ; + data = h; + + // Do the same with do-while, gotta test all the hard cases. + float m; + do + { + } while (m != 20.0); + data = m; +} + +void main() +{ + // Test that we do the CFG analysis for all functions. + test(); +} + diff --git a/shaders/comp/composite-construct.comp b/shaders/comp/composite-construct.comp new file mode 100644 index 0000000000..859c56f51f --- /dev/null +++ b/shaders/comp/composite-construct.comp @@ -0,0 +1,40 @@ +#version 310 es +layout(local_size_x = 1) in; + +layout(std430, binding = 0) buffer SSBO0 +{ + vec4 as[]; +}; + +layout(std430, binding = 1) buffer SSBO1 +{ + vec4 bs[]; +}; + +vec4 summe(vec4 values[3][2]) +{ + return values[0][0] + values[2][1] + values[0][1] + values[1][0]; +} + +struct Composite +{ + vec4 a[2]; + vec4 b[2]; +}; + +void main() +{ + vec4 values[2] = vec4[](as[gl_GlobalInvocationID.x], bs[gl_GlobalInvocationID.x]); + vec4 const_values[2] = vec4[](vec4(10.0), vec4(30.0)); + vec4 copy_values[2]; + copy_values = const_values; + vec4 copy_values2[2] = values; + as[gl_GlobalInvocationID.x] = summe(vec4[][](values, copy_values, copy_values2)); + + Composite c = Composite(values, copy_values); + + float arrayofarray[2][3] = float[][](float[](1.0, 1.0, 1.0), float[](2.0, 2.0, 2.0)); + + float b = 10.0; + float values_scalar[4] = float[](b, b, b, b); +} diff --git a/shaders/comp/culling.comp b/shaders/comp/culling.comp new file mode 100644 index 0000000000..9f8331b10b --- /dev/null +++ b/shaders/comp/culling.comp @@ -0,0 +1,26 @@ +#version 310 es +layout(local_size_x = 4) in; + +layout(std430, binding = 0) readonly buffer SSBO +{ + float in_data[]; +}; + +layout(std430, binding = 1) writeonly buffer SSBO2 +{ + float out_data[]; +}; + +layout(std430, binding = 2) buffer SSBO3 +{ + uint count; +}; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + float idata = in_data[ident]; + if (idata > 12.0) + out_data[atomicAdd(count, 1u)] = idata; +} + diff --git a/shaders/comp/defer-parens.comp b/shaders/comp/defer-parens.comp new file mode 100644 index 0000000000..4e8ea6b399 --- /dev/null +++ b/shaders/comp/defer-parens.comp @@ -0,0 +1,30 @@ +#version 310 es +layout(local_size_x = 1) in; + +layout(binding = 0, std430) buffer SSBO +{ + vec4 data; + int index; +}; + +void main() +{ + // Tests defer-parens behavior where a binary expression is OpCompositeExtracted chained together + // with an OpCompositeConstruct optimization. + vec4 d = data; + data = vec4(d.x, d.yz + 10.0, d.w); + + // Verify binary ops. + data = d + d + d; + + // Verify swizzles. + data = (d.yz + 10.0).xxyy; + + // OpCompositeExtract + float t = (d.yz + 10.0).y; + data = vec4(t); + + // OpVectorExtractDynamic + t = (d.zw + 10.0)[index]; + data = vec4(t); +} diff --git a/shaders/comp/dowhile.comp b/shaders/comp/dowhile.comp new file mode 100644 index 0000000000..709db75a17 --- /dev/null +++ b/shaders/comp/dowhile.comp @@ -0,0 +1,31 @@ +#version 310 es +layout(local_size_x = 1) in; + +layout(std430, binding = 0) readonly buffer SSBO +{ + mat4 mvp; + vec4 in_data[]; +}; + +layout(std430, binding = 1) writeonly buffer SSBO2 +{ + vec4 out_data[]; +}; + +int i; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + + i = 0; + vec4 idat = in_data[ident]; + do + { + idat = mvp * idat; + i++; + } while(i < 16); + + out_data[ident] = idat; +} + diff --git a/shaders/comp/generate_height.comp b/shaders/comp/generate_height.comp new file mode 100644 index 0000000000..16cef4de78 --- /dev/null +++ b/shaders/comp/generate_height.comp @@ -0,0 +1,97 @@ +#version 310 es + +layout(local_size_x = 64) in; + +layout(std430, binding = 0) readonly buffer Distribution +{ + vec2 distribution[]; +}; + +layout(std430, binding = 1) writeonly buffer HeightmapFFT +{ + uint heights[]; +}; + +layout(binding = 2, std140) uniform UBO +{ + vec4 uModTime; +}; + +vec2 alias(vec2 i, vec2 N) +{ + return mix(i, i - N, greaterThan(i, 0.5 * N)); +} + +vec4 cmul(vec4 a, vec4 b) +{ + vec4 r3 = a.yxwz; + vec4 r1 = b.xxzz; + vec4 R0 = a * r1; + vec4 r2 = b.yyww; + vec4 R1 = r2 * r3; + return R0 + vec4(-R1.x, R1.y, -R1.z, R1.w); +} + +vec2 cmul(vec2 a, vec2 b) +{ + vec2 r3 = a.yx; + vec2 r1 = b.xx; + vec2 R0 = a * r1; + vec2 r2 = b.yy; + vec2 R1 = r2 * r3; + return R0 + vec2(-R1.x, R1.y); +} + +uint pack2(vec2 v) +{ + return packHalf2x16(v); +} + +uvec2 pack4(vec4 v) +{ + return uvec2(packHalf2x16(v.xy), packHalf2x16(v.zw)); +} + +uvec2 workaround_mix(uvec2 a, uvec2 b, bvec2 sel) +{ + return uvec2(sel.x ? b.x : a.x, sel.y ? b.y : a.y); +} + +void generate_heightmap() +{ + uvec2 N = gl_WorkGroupSize.xy * gl_NumWorkGroups.xy; + uvec2 i = gl_GlobalInvocationID.xy; + // Pick out the negative frequency variant. + uvec2 wi = workaround_mix(N - i, uvec2(0u), equal(i, uvec2(0u))); + + // Pick out positive and negative travelling waves. + vec2 a = distribution[i.y * N.x + i.x]; + vec2 b = distribution[wi.y * N.x + wi.x]; + + vec2 k = uModTime.xy * alias(vec2(i), vec2(N)); + float k_len = length(k); + + const float G = 9.81; + + // If this sample runs for hours on end, the cosines of very large numbers will eventually become unstable. + // It is fairly easy to fix this by wrapping uTime, + // and quantizing w such that wrapping uTime does not change the result. + // See Tessendorf's paper for how to do it. + // The sqrt(G * k_len) factor represents how fast ocean waves at different frequencies propagate. + float w = sqrt(G * k_len) * uModTime.z; + float cw = cos(w); + float sw = sin(w); + + // Complex multiply to rotate our frequency samples. + a = cmul(a, vec2(cw, sw)); + b = cmul(b, vec2(cw, sw)); + b = vec2(b.x, -b.y); // Complex conjugate since we picked a frequency with the opposite direction. + vec2 res = a + b; // Sum up forward and backwards travelling waves. + heights[i.y * N.x + i.x] = pack2(res); +} + +void main() +{ + generate_heightmap(); +} + diff --git a/shaders/comp/image.comp b/shaders/comp/image.comp new file mode 100644 index 0000000000..e375534a51 --- /dev/null +++ b/shaders/comp/image.comp @@ -0,0 +1,12 @@ +#version 310 es +layout(local_size_x = 1) in; + +layout(rgba8, binding = 0) uniform readonly mediump image2D uImageIn; +layout(rgba8, binding = 1) uniform writeonly mediump image2D uImageOut; + +void main() +{ + vec4 v = imageLoad(uImageIn, ivec2(gl_GlobalInvocationID.xy) + imageSize(uImageIn)); + imageStore(uImageOut, ivec2(gl_GlobalInvocationID.xy), v); +} + diff --git a/shaders/comp/inout-struct.invalid.comp b/shaders/comp/inout-struct.invalid.comp new file mode 100644 index 0000000000..c1de959743 --- /dev/null +++ b/shaders/comp/inout-struct.invalid.comp @@ -0,0 +1,55 @@ +#version 310 es +layout(local_size_x = 1) in; + +layout(std430, binding = 0) writeonly buffer SSBO +{ + vec4 data[]; +} outdata; + +layout(std430, binding = 1) readonly buffer SSBO2 +{ + vec4 data[]; +} indata; + +struct Foo +{ + vec4 a; + vec4 b; + vec4 c; + vec4 d; +}; + +layout(std430, binding = 2) readonly buffer SSBO3 +{ + Foo foos[]; +} foobar; + +vec4 bar(Foo foo) +{ + return foo.a + foo.b + foo.c + foo.d; +} + +void baz(out Foo foo) +{ + uint ident = gl_GlobalInvocationID.x; + foo.a = indata.data[4u * ident + 0u]; + foo.b = indata.data[4u * ident + 1u]; + foo.c = indata.data[4u * ident + 2u]; + foo.d = indata.data[4u * ident + 3u]; +} + +void meow(inout Foo foo) +{ + foo.a += 10.0; + foo.b += 20.0; + foo.c += 30.0; + foo.d += 40.0; +} + +void main() +{ + Foo foo; + baz(foo); + meow(foo); + outdata.data[gl_GlobalInvocationID.x] = bar(foo) + bar(foobar.foos[gl_GlobalInvocationID.x]); +} diff --git a/shaders/comp/insert.comp b/shaders/comp/insert.comp new file mode 100644 index 0000000000..07c1f8d7aa --- /dev/null +++ b/shaders/comp/insert.comp @@ -0,0 +1,18 @@ +#version 310 es +layout(local_size_x = 1) in; + +layout(std430, binding = 0) writeonly buffer SSBO +{ + vec4 out_data[]; +}; + +void main() +{ + vec4 v; + v.x = 10.0; + v.y = 30.0; + v.z = 70.0; + v.w = 90.0; + out_data[gl_GlobalInvocationID.x] = v; + out_data[gl_GlobalInvocationID.x].y = 20.0; +} diff --git a/shaders/comp/loop.comp b/shaders/comp/loop.comp new file mode 100644 index 0000000000..6d6c324243 --- /dev/null +++ b/shaders/comp/loop.comp @@ -0,0 +1,98 @@ +#version 310 es +layout(local_size_x = 1) in; + +layout(std430, binding = 0) readonly buffer SSBO +{ + mat4 mvp; + vec4 in_data[]; +}; + +layout(std430, binding = 1) writeonly buffer SSBO2 +{ + vec4 out_data[]; +}; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + vec4 idat = in_data[ident]; + + int k = 0; + uint i = 0u; + + if (idat.y == 20.0) + { + do + { + k = k * 2; + i++; + } while (i < ident); + } + + switch (k) + { + case 10: + for (;;) + { + i++; + if (i > 10u) + break; + } + break; + + default: + for (;;) + { + i += 2u; + if (i > 20u) + break; + } + break; + } + + while (k < 10) + { + idat *= 2.0; + k++; + } + + for (uint i = 0u; i < 16u; i++, k++) + for (uint j = 0u; j < 30u; j++) + idat = mvp * idat; + + k = 0; + for (;;) + { + k++; + if (k > 10) + { + k += 2; + } + else + { + k += 3; + continue; + } + + k += 10; + } + + k = 0; + do + { + k++; + } while (k > 10); + + int l = 0; + for (;; l++) + { + if (l == 5) + { + continue; + } + + idat += 1.0; + } + out_data[ident] = idat; +} + diff --git a/shaders/comp/mat3.comp b/shaders/comp/mat3.comp new file mode 100644 index 0000000000..7c5bb1e4f5 --- /dev/null +++ b/shaders/comp/mat3.comp @@ -0,0 +1,14 @@ +#version 310 es +layout(local_size_x = 1) in; + +layout(std430, binding = 1) writeonly buffer SSBO2 +{ + mat3 out_data[]; +}; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + out_data[ident] = mat3(vec3(10.0), vec3(20.0), vec3(40.0)); +} + diff --git a/shaders/comp/mod.comp b/shaders/comp/mod.comp new file mode 100644 index 0000000000..1631456e30 --- /dev/null +++ b/shaders/comp/mod.comp @@ -0,0 +1,26 @@ +#version 310 es +layout(local_size_x = 1) in; + +layout(std430, binding = 0) readonly buffer SSBO +{ + vec4 in_data[]; +}; + +layout(std430, binding = 1) writeonly buffer SSBO2 +{ + vec4 out_data[]; +}; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + vec4 v = mod(in_data[ident], out_data[ident]); + out_data[ident] = v; + + uvec4 vu = floatBitsToUint(in_data[ident]) % floatBitsToUint(out_data[ident]); + out_data[ident] = uintBitsToFloat(vu); + + ivec4 vi = floatBitsToInt(in_data[ident]) % floatBitsToInt(out_data[ident]); + out_data[ident] = intBitsToFloat(vi); +} + diff --git a/shaders/comp/modf.comp b/shaders/comp/modf.comp new file mode 100644 index 0000000000..edadefcf05 --- /dev/null +++ b/shaders/comp/modf.comp @@ -0,0 +1,23 @@ +#version 310 es +layout(local_size_x = 1) in; + +layout(std430, binding = 0) readonly buffer SSBO +{ + vec4 in_data[]; +}; + +layout(std430, binding = 1) writeonly buffer SSBO2 +{ + vec4 out_data[]; +}; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + vec4 i; + //vec4 v = frexp(in_data[ident], i); + //out_data[ident] = ldexp(v, i); + vec4 v = modf(in_data[ident], i); + out_data[ident] = v; +} + diff --git a/shaders/comp/return.comp b/shaders/comp/return.comp new file mode 100644 index 0000000000..617f437182 --- /dev/null +++ b/shaders/comp/return.comp @@ -0,0 +1,33 @@ +#version 310 es +layout(local_size_x = 1) in; + +layout(std430, binding = 1) writeonly buffer SSBO2 +{ + vec4 out_data[]; +}; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + + if (ident == 2u) + { + out_data[ident] = vec4(20.0); + } + else if (ident == 4u) + { + out_data[ident] = vec4(10.0); + return; + } + + for (int i = 0; i < 20; i++) + { + if (i == 10) + break; + + return; + } + + out_data[ident] = vec4(10.0); +} + diff --git a/shaders/comp/shared.comp b/shaders/comp/shared.comp new file mode 100644 index 0000000000..4deff93597 --- /dev/null +++ b/shaders/comp/shared.comp @@ -0,0 +1,27 @@ +#version 310 es +layout(local_size_x = 4) in; + +shared float sShared[gl_WorkGroupSize.x]; + +layout(std430, binding = 0) readonly buffer SSBO +{ + float in_data[]; +}; + +layout(std430, binding = 1) writeonly buffer SSBO2 +{ + float out_data[]; +}; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + float idata = in_data[ident]; + + sShared[gl_LocalInvocationIndex] = idata; + memoryBarrierShared(); + barrier(); + + out_data[ident] = sShared[gl_WorkGroupSize.x - gl_LocalInvocationIndex - 1u]; +} + diff --git a/shaders/comp/ssbo-array.comp b/shaders/comp/ssbo-array.comp new file mode 100644 index 0000000000..da0eae0889 --- /dev/null +++ b/shaders/comp/ssbo-array.comp @@ -0,0 +1,14 @@ +#version 310 es +layout(local_size_x = 1) in; + +layout(std430, binding = 0) buffer SSBO +{ + vec4 data[]; +} ssbos[2]; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + ssbos[1].data[ident] = ssbos[0].data[ident]; +} + diff --git a/shaders/comp/struct-layout.comp b/shaders/comp/struct-layout.comp new file mode 100644 index 0000000000..5a2b7802df --- /dev/null +++ b/shaders/comp/struct-layout.comp @@ -0,0 +1,24 @@ +#version 310 es +layout(local_size_x = 1) in; + +struct Foo +{ + mat4 m; +}; + +layout(std430, binding = 0) readonly buffer SSBO +{ + Foo in_data[]; +}; + +layout(std430, binding = 1) writeonly buffer SSBO2 +{ + Foo out_data[]; +}; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + out_data[ident].m = in_data[ident].m * in_data[ident].m; +} + diff --git a/shaders/comp/struct-packing.comp b/shaders/comp/struct-packing.comp new file mode 100644 index 0000000000..04b933dd18 --- /dev/null +++ b/shaders/comp/struct-packing.comp @@ -0,0 +1,76 @@ +#version 310 es +layout(local_size_x = 1) in; + +struct S0 +{ + vec2 a[1]; + float b; +}; + +struct S1 +{ + vec3 a; + float b; +}; + +struct S2 +{ + vec3 a[1]; + float b; +}; + +struct S3 +{ + vec2 a; + float b; +}; + +struct S4 +{ + vec2 c; +}; + +struct Content +{ + S0 m0s[1]; + S1 m1s[1]; + S2 m2s[1]; + S0 m0; + S1 m1; + S2 m2; + S3 m3; + float m4; + + S4 m3s[8]; +}; + +layout(binding = 1, std430) buffer SSBO1 +{ + Content content; + Content content1[2]; + Content content2; + + layout(column_major) mat2 m0; + layout(column_major) mat2 m1; + layout(column_major) mat2x3 m2[4]; + layout(column_major) mat3x2 m3; + layout(row_major) mat2 m4; + layout(row_major) mat2 m5[9]; + layout(row_major) mat2x3 m6[4][2]; + layout(row_major) mat3x2 m7; + float array[]; +} ssbo_430; + +layout(binding = 0, std140) buffer SSBO0 +{ + Content content; + Content content1[2]; + Content content2; + float array[]; +} ssbo_140; + +void main() +{ + ssbo_430.content = ssbo_140.content; +} + diff --git a/shaders/comp/torture-loop.comp b/shaders/comp/torture-loop.comp new file mode 100644 index 0000000000..54a1221a15 --- /dev/null +++ b/shaders/comp/torture-loop.comp @@ -0,0 +1,40 @@ +#version 310 es +layout(local_size_x = 1) in; + +layout(std430, binding = 0) readonly buffer SSBO +{ + mat4 mvp; + vec4 in_data[]; +}; + +layout(std430, binding = 1) writeonly buffer SSBO2 +{ + vec4 out_data[]; +}; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + vec4 idat = in_data[ident]; + + int k = 0; + + // Continue with side effects. + while (++k < 10) + { + idat *= 2.0; + k++; + } + + // Again used here ... + for (uint i = 0u; i < 16u; i++, k++) + for (uint j = 0u; j < 30u; j++) + idat = mvp * idat; + + do + { + k++; + } while (k > 10); + out_data[ident] = idat; +} + diff --git a/shaders/comp/type-alias.comp b/shaders/comp/type-alias.comp new file mode 100644 index 0000000000..343d350a2f --- /dev/null +++ b/shaders/comp/type-alias.comp @@ -0,0 +1,45 @@ +#version 310 es +layout(local_size_x = 1) in; + +struct S0 +{ + vec4 a; +}; + +struct S1 +{ + vec4 a; +}; + +vec4 overload(S0 s0) +{ + return s0.a; +} + +vec4 overload(S1 s1) +{ + return s1.a; +} + +layout(std430, binding = 0) buffer SSBO0 +{ + S0 s0s[]; +}; + +layout(std430, binding = 1) buffer SSBO1 +{ + S1 s1s[]; +}; + +layout(std430, binding = 2) buffer SSBO2 +{ + vec4 outputs[]; +}; + + +void main() +{ + S0 s0 = s0s[gl_GlobalInvocationID.x]; + S1 s1 = s1s[gl_GlobalInvocationID.x]; + outputs[gl_GlobalInvocationID.x] = overload(s0) + overload(s1); +} diff --git a/shaders/comp/udiv.comp b/shaders/comp/udiv.comp new file mode 100644 index 0000000000..33fe564f07 --- /dev/null +++ b/shaders/comp/udiv.comp @@ -0,0 +1,17 @@ +#version 310 es +layout(local_size_x = 1) in; + +layout(std430, binding = 0) buffer SSBO +{ + uint inputs[]; +}; + +layout(std430, binding = 0) buffer SSBO2 +{ + uint outputs[]; +}; + +void main() +{ + outputs[gl_GlobalInvocationID.x] = inputs[gl_GlobalInvocationID.x] / 29u; +} diff --git a/shaders/desktop-only/comp/fp64.desktop.comp b/shaders/desktop-only/comp/fp64.desktop.comp new file mode 100644 index 0000000000..dd488a3077 --- /dev/null +++ b/shaders/desktop-only/comp/fp64.desktop.comp @@ -0,0 +1,91 @@ +#version 450 +layout(local_size_x = 1) in; + +struct M0 +{ + double v; + dvec2 b[2]; + dmat2x3 c; + dmat3x2 d; +}; + +// Test buffer layout handling. +layout(std430, binding = 0) buffer SSBO0 +{ + dvec4 a; + M0 m0; + dmat4 b; +} ssbo_0; + +layout(std430, binding = 1) buffer SSBO1 +{ + dmat4 a; + dvec4 b; + M0 m0; +} ssbo_1; + +layout(std430, binding = 2) buffer SSBO2 +{ + double a[4]; + dvec2 b[4]; +} ssbo_2; + +layout(std140, binding = 3) buffer SSBO3 +{ + double a[4]; + dvec2 b[4]; +} ssbo_3; + +void main() +{ + ssbo_0.a += dvec4(10, 20, 30, 40); + ssbo_0.a += 20; + + dvec4 a = ssbo_0.a; + dmat4 amat = ssbo_0.b; + + ssbo_0.a = abs(a); + ssbo_0.a = sign(a); + ssbo_0.a = floor(a); + ssbo_0.a = trunc(a); + ssbo_0.a = round(a); + ssbo_0.a = roundEven(a); + ssbo_0.a = ceil(a); + ssbo_0.a = fract(a); + ssbo_0.a = mod(a, 20.0); + ssbo_0.a = mod(a, a); + ssbo_0.a = min(a, a); + ssbo_0.a = max(a, a); + ssbo_0.a = clamp(a, a, a); + ssbo_0.a = mix(a, a, a); + ssbo_0.a = step(a, a); + ssbo_0.a = smoothstep(a, a, a); + bvec4 b = isnan(a); + bvec4 c = isinf(a); + + double f = packDouble2x32(uvec2(10, 40)); + uvec2 g = unpackDouble2x32(f); + + double d = length(a); + d = distance(a, a); + d = dot(a, a); + dvec3 e = cross(a.xyz, a.yzw); + a = faceforward(a, a, a); + a = reflect(a, a); + a = refract(a, a, a.x); + + dmat4 l = matrixCompMult(amat, amat); + l = outerProduct(a, a); + l = transpose(l); + double m = determinant(l); + l = inverse(l); + + bvec4 k = lessThan(a, a); + k = lessThanEqual(a, a); + k = greaterThan(a, a); + k = greaterThanEqual(a, a); + + ssbo_1.b.x += 1.0lf; + ssbo_2.b[0].x += 1.0lf; + ssbo_3.b[0].x += 1.0lf; +} diff --git a/shaders/desktop-only/comp/image-formats.desktop.noeliminate.comp b/shaders/desktop-only/comp/image-formats.desktop.noeliminate.comp new file mode 100644 index 0000000000..5a70623c85 --- /dev/null +++ b/shaders/desktop-only/comp/image-formats.desktop.noeliminate.comp @@ -0,0 +1,48 @@ +#version 450 +layout(local_size_x = 1) in; + +layout(rgba32f, binding = 0) uniform image2D uImg00; +layout(rgba16f, binding = 1) uniform image2D uImg01; +layout(rg32f, binding = 2) uniform image2D uImg02; +layout(rg16f, binding = 3) uniform image2D uImg03; +layout(r11f_g11f_b10f, binding = 4) uniform image2D uImg04; +layout(r32f, binding = 5) uniform image2D uImg05; +layout(r16f, binding = 6) uniform image2D uImg06; +layout(rgba16, binding = 7) uniform image2D uImg07; +layout(rgb10_a2, binding = 8) uniform image2D uImg08; +layout(rgba8, binding = 9) uniform image2D uImg09; +layout(rg16, binding = 10) uniform image2D uImg10; +layout(rg8, binding = 11) uniform image2D uImg11; +layout(r16, binding = 12) uniform image2D uImg12; +layout(r8, binding = 13) uniform image2D uImg13; +layout(rgba16_snorm, binding = 14) uniform image2D uImg14; +layout(rgba8_snorm, binding = 15) uniform image2D uImg15; +layout(rg16_snorm, binding = 16) uniform image2D uImg16; +layout(rg8_snorm, binding = 17) uniform image2D uImg17; +layout(r16_snorm, binding = 18) uniform image2D uImg18; +layout(r8_snorm, binding = 19) uniform image2D uImg19; + +layout(rgba32i, binding = 20) uniform iimage2D uImage20; +layout(rgba16i, binding = 21) uniform iimage2D uImage21; +layout(rgba8i, binding = 22) uniform iimage2D uImage22; +layout(rg32i, binding = 23) uniform iimage2D uImage23; +layout(rg16i, binding = 24) uniform iimage2D uImage24; +layout(rg8i, binding = 25) uniform iimage2D uImage25; +layout(r32i, binding = 26) uniform iimage2D uImage26; +layout(r16i, binding = 27) uniform iimage2D uImage27; +layout(r8i, binding = 28) uniform iimage2D uImage28; + +layout(rgba32ui, binding = 29) uniform uimage2D uImage29; +layout(rgba16ui, binding = 30) uniform uimage2D uImage30; +layout(rgb10_a2ui, binding = 31) uniform uimage2D uImage31; +layout(rgba8ui, binding = 32) uniform uimage2D uImage32; +layout(rg32ui, binding = 33) uniform uimage2D uImage33; +layout(rg16ui, binding = 34) uniform uimage2D uImage34; +layout(rg8ui, binding = 35) uniform uimage2D uImage35; +layout(r32ui, binding = 36) uniform uimage2D uImage36; +layout(r16ui, binding = 37) uniform uimage2D uImage37; +layout(r8ui, binding = 38) uniform uimage2D uImage38; + +void main() +{ +} diff --git a/shaders/desktop-only/comp/int64.desktop.comp b/shaders/desktop-only/comp/int64.desktop.comp new file mode 100644 index 0000000000..81004d4ad6 --- /dev/null +++ b/shaders/desktop-only/comp/int64.desktop.comp @@ -0,0 +1,55 @@ +#version 450 +#extension GL_ARB_gpu_shader_int64 : require +layout(local_size_x = 1) in; + +struct M0 +{ + int64_t v; + i64vec2 b[2]; + uint64_t c; + uint64_t d[5]; +}; + +// Test buffer layout handling. +layout(std430, binding = 0) buffer SSBO0 +{ + i64vec4 a; + M0 m0; +} ssbo_0; + +layout(std430, binding = 1) buffer SSBO1 +{ + u64vec4 b; + M0 m0; +} ssbo_1; + +layout(std430, binding = 2) buffer SSBO2 +{ + int64_t a[4]; + i64vec2 b[4]; +} ssbo_2; + +layout(std140, binding = 3) buffer SSBO3 +{ + int64_t a[4]; + i64vec2 b[4]; +} ssbo_3; + +void main() +{ + ssbo_0.a += i64vec4(10, 20, 30, 40); + ssbo_1.b += u64vec4(999999999999999999ul, 8888888888888888ul, 77777777777777777ul, 6666666666666666ul); + ssbo_0.a += 20; + ssbo_0.a = abs(ssbo_0.a + i64vec4(ssbo_1.b)); + + ssbo_0.a++; + ssbo_1.b++; + ssbo_0.a--; + ssbo_1.b--; + + ssbo_1.b = doubleBitsToUint64(int64BitsToDouble(ssbo_0.a)); + ssbo_0.a = doubleBitsToInt64(uint64BitsToDouble(ssbo_1.b)); + + ssbo_2.a[0] += 1l; + ssbo_3.a[0] += 2l; +} diff --git a/shaders/desktop-only/frag/image-ms.desktop.frag b/shaders/desktop-only/frag/image-ms.desktop.frag new file mode 100644 index 0000000000..d3acc3081a --- /dev/null +++ b/shaders/desktop-only/frag/image-ms.desktop.frag @@ -0,0 +1,12 @@ +#version 450 + +layout(rgba8, binding = 0) uniform image2DMS uImage; +layout(rgba8, binding = 1) uniform image2DMSArray uImageArray; + +void main() +{ + vec4 a = imageLoad(uImage, ivec2(1, 2), 2); + vec4 b = imageLoad(uImageArray, ivec3(1, 2, 4), 3); + imageStore(uImage, ivec2(2, 3), 1, a); + imageStore(uImageArray, ivec3(2, 3, 7), 1, b); +} diff --git a/shaders/desktop-only/frag/in-block-qualifiers.frag b/shaders/desktop-only/frag/in-block-qualifiers.frag new file mode 100644 index 0000000000..f22096e6d1 --- /dev/null +++ b/shaders/desktop-only/frag/in-block-qualifiers.frag @@ -0,0 +1,20 @@ +#version 450 + +layout(location = 0) in VertexData { + flat float f; + centroid vec4 g; + flat int h; + float i; +} vin; + +layout(location = 4) in flat float f; +layout(location = 5) in centroid vec4 g; +layout(location = 6) in flat int h; +layout(location = 7) in sample float i; + +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = vin.f + vin.g + float(vin.h) + vin.i + f + g + float(h) + i; +} diff --git a/shaders/desktop-only/frag/query-levels.desktop.frag b/shaders/desktop-only/frag/query-levels.desktop.frag new file mode 100644 index 0000000000..3a6977611b --- /dev/null +++ b/shaders/desktop-only/frag/query-levels.desktop.frag @@ -0,0 +1,9 @@ +#version 450 + +layout(binding = 0) uniform sampler2D uSampler; +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = vec4(textureQueryLevels(uSampler)); +} diff --git a/shaders/desktop-only/frag/query-lod.desktop.frag b/shaders/desktop-only/frag/query-lod.desktop.frag new file mode 100644 index 0000000000..0cb160402f --- /dev/null +++ b/shaders/desktop-only/frag/query-lod.desktop.frag @@ -0,0 +1,10 @@ +#version 450 + +layout(location = 0) in vec2 vTexCoord; +layout(binding = 0) uniform sampler2D uSampler; +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = textureQueryLod(uSampler, vTexCoord).xyxy; +} diff --git a/shaders/desktop-only/frag/sampler-ms-query.desktop.frag b/shaders/desktop-only/frag/sampler-ms-query.desktop.frag new file mode 100644 index 0000000000..f707ed5c41 --- /dev/null +++ b/shaders/desktop-only/frag/sampler-ms-query.desktop.frag @@ -0,0 +1,17 @@ +#version 450 + +layout(location = 0) out vec4 FragColor; +layout(binding = 0) uniform sampler2DMS uSampler; +layout(binding = 1) uniform sampler2DMSArray uSamplerArray; +layout(rgba8, binding = 2) uniform image2DMS uImage; +layout(rgba8, binding = 3) uniform image2DMSArray uImageArray; + +void main() +{ + FragColor = + vec4( + textureSamples(uSampler) + + textureSamples(uSamplerArray) + + imageSamples(uImage) + + imageSamples(uImageArray)); +} diff --git a/shaders/desktop-only/vert/out-block-qualifiers.vert b/shaders/desktop-only/vert/out-block-qualifiers.vert new file mode 100644 index 0000000000..c1e409fb4c --- /dev/null +++ b/shaders/desktop-only/vert/out-block-qualifiers.vert @@ -0,0 +1,26 @@ +#version 450 + +layout(location = 0) out VertexData { + flat float f; + centroid vec4 g; + flat int h; + float i; +} vout; + +layout(location = 4) out flat float f; +layout(location = 5) out centroid vec4 g; +layout(location = 6) out flat int h; +layout(location = 7) out float i; + +void main() +{ + vout.f = 10.0; + vout.g = vec4(20.0); + vout.h = 20; + vout.i = 30.0; + + f = 10.0; + g = vec4(20.0); + h = 20; + i = 30.0; +} diff --git a/shaders/frag/basic.frag b/shaders/frag/basic.frag new file mode 100644 index 0000000000..7c3ad20ba4 --- /dev/null +++ b/shaders/frag/basic.frag @@ -0,0 +1,13 @@ +#version 310 es +precision mediump float; + +in vec4 vColor; +in vec2 vTex; +layout(binding = 0) uniform sampler2D uTex; +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = vColor * texture(uTex, vTex); +} + diff --git a/shaders/frag/composite-extract-forced-temporary.frag b/shaders/frag/composite-extract-forced-temporary.frag new file mode 100644 index 0000000000..35fdbe8624 --- /dev/null +++ b/shaders/frag/composite-extract-forced-temporary.frag @@ -0,0 +1,11 @@ +#version 310 es +precision mediump float; +layout(binding = 0) uniform sampler2D Texture; +layout(location = 0) out vec4 FragColor; +layout(location = 0) in vec2 vTexCoord; + +void main() +{ + float f = texture(Texture, vTexCoord).x; + FragColor = vec4(f * f); +} diff --git a/shaders/frag/constant-array.frag b/shaders/frag/constant-array.frag new file mode 100644 index 0000000000..b862cb1dbf --- /dev/null +++ b/shaders/frag/constant-array.frag @@ -0,0 +1,21 @@ +#version 310 es +precision mediump float; +layout(location = 0) out vec4 FragColor; + +layout(location = 0) flat in int index; + +struct Foobar { float a; float b; }; + +vec4 resolve(Foobar f) +{ + return vec4(f.a + f.b); +} + +void main() +{ + const vec4 foo[3] = vec4[](vec4(1.0), vec4(2.0), vec4(3.0)); + const vec4 foobars[2][2] = vec4[][](vec4[](vec4(1.0), vec4(2.0)), vec4[](vec4(8.0), vec4(10.0))); + const Foobar foos[2] = Foobar[](Foobar(10.0, 40.0), Foobar(90.0, 70.0)); + + FragColor = foo[index] + foobars[index][index + 1] + resolve(Foobar(10.0, 20.0)) + resolve(foos[index]); +} diff --git a/shaders/frag/flush_params.frag b/shaders/frag/flush_params.frag new file mode 100644 index 0000000000..8a26ad3a28 --- /dev/null +++ b/shaders/frag/flush_params.frag @@ -0,0 +1,27 @@ +#version 310 es +precision mediump float; + +layout(location = 0) out vec4 FragColor; + +struct Structy +{ + vec4 c; +}; + +void foo2(out Structy f) +{ + f.c = vec4(10.0); +} + +Structy foo() +{ + Structy f; + foo2(f); + return f; +} + +void main() +{ + Structy s = foo(); + FragColor = s.c; +} diff --git a/shaders/frag/for-loop-init.frag b/shaders/frag/for-loop-init.frag new file mode 100644 index 0000000000..0cde26765e --- /dev/null +++ b/shaders/frag/for-loop-init.frag @@ -0,0 +1,52 @@ +#version 310 es +precision mediump float; +layout(location = 0) out int FragColor; + +void main() +{ + FragColor = 16; + + // Basic loop variable. + for (int i = 0; i < 25; i++) + FragColor += 10; + + // Multiple loop variables. + for (int i = 1, j = 4; i < 30; i++, j += 4) + FragColor += 11; + + // A potential loop variables, but we access it outside the loop, + // so cannot be one. + int k = 0; + for (; k < 20; k++) + FragColor += 12; + k += 3; + FragColor += k; + + // Potential loop variables, but the dominator is not trivial. + int l; + if (k == 40) + { + for (l = 0; l < 40; l++) + FragColor += 13; + return; + } + else + { + l = k; + FragColor += l; + } + + // Vectors cannot be loop variables + for (ivec2 i = ivec2(0); i.x < 10; i.x += 4) + { + FragColor += i.y; + } + + // Check that static expressions can be used before the loop header. + int m = 0; + m = k; + int o = m; + for (; m < 40; m++) + FragColor += m; + FragColor += o; +} diff --git a/shaders/frag/ground.frag b/shaders/frag/ground.frag new file mode 100755 index 0000000000..d1fcfd4907 --- /dev/null +++ b/shaders/frag/ground.frag @@ -0,0 +1,162 @@ +#version 310 es +precision mediump float; + +#define DEBUG_NONE 0 +#define DEBUG_DIFFUSE 1 +#define DEBUG_SPECULAR 2 +#define DEBUG_LIGHTING 3 +#define DEBUG_FOG 4 +#define DEBUG DEBUG_NONE + +#define FORWARD 0 +#define DEFERRED 1 +#define DEFERRED_VTEX 2 + +float saturate(float x) { return clamp(x, 0.0, 1.0); } + +layout(std140, binding = 4) uniform GlobalPSData +{ + vec4 g_CamPos; + vec4 g_SunDir; + vec4 g_SunColor; + vec4 g_ResolutionParams; + vec4 g_TimeParams; + vec4 g_FogColor_Distance; +}; + +vec4 ComputeFogFactor(vec3 WorldPos) +{ + vec4 FogData; + vec3 vEye = WorldPos - g_CamPos.xyz; + vec3 nEye = normalize(vEye); + FogData.w = exp(-dot(vEye, vEye) * g_FogColor_Distance.w * 0.75); + + float fog_sun_factor = pow(saturate(dot(nEye, g_SunDir.xyz)), 8.0); + FogData.xyz = mix(vec3(1.0, 1.0, 1.0), vec3(0.6, 0.6, 0.9), nEye.y * 0.5 + 0.5); + FogData.xyz = mix(FogData.xyz, vec3(0.95, 0.87, 0.78), fog_sun_factor); + return FogData; +} + +void ApplyFog(inout vec3 Color, vec4 FogData) +{ + Color = mix(FogData.xyz, Color, FogData.w); +} + +void ApplyLighting(inout mediump vec3 Color, mediump float DiffuseFactor) +{ + mediump vec3 DiffuseLight = g_SunColor.xyz * DiffuseFactor; + mediump vec3 AmbientLight = vec3(0.2, 0.35, 0.55) * 0.5; + mediump vec3 Lighting = DiffuseLight + AmbientLight; +#if DEBUG == DEBUG_LIGHTING + Color = Lighting; +#else + Color *= Lighting; +#endif +} + +#define SPECULAR 0 +#define GLOSSMAP 0 + +void ApplySpecular(inout mediump vec3 Color, mediump vec3 EyeVec, mediump vec3 Normal, mediump vec3 SpecularColor, mediump float Shininess, mediump float FresnelAmount) +{ + mediump vec3 HalfAngle = normalize(-EyeVec + g_SunDir.xyz); + + mediump float v_dot_h = saturate(dot(HalfAngle, -EyeVec)); + mediump float n_dot_l = saturate(dot(Normal, g_SunDir.xyz)); + mediump float n_dot_h = saturate(dot(Normal, HalfAngle)); + mediump float n_dot_v = saturate(dot(-EyeVec, Normal)); + mediump float h_dot_l = saturate(dot(g_SunDir.xyz, HalfAngle)); + + const mediump float roughness_value = 0.25; + + mediump float r_sq = roughness_value * roughness_value; + mediump float n_dot_h_sq = n_dot_h * n_dot_h; + mediump float roughness_a = 1.0 / (4.0 * r_sq * n_dot_h_sq * n_dot_h_sq); + mediump float roughness_b = n_dot_h_sq - 1.0; + mediump float roughness_c = r_sq * n_dot_h_sq; + mediump float roughness = saturate(roughness_a * exp(roughness_b / roughness_c)); + + FresnelAmount = 0.5; + mediump float fresnel_term = pow(1.0 - n_dot_v, 5.0) * (1.0 - FresnelAmount) + FresnelAmount; + + mediump float geo_numerator = 2.0 * n_dot_h; + mediump float geo_denominator = 1.0 / v_dot_h; + mediump float geo_term = min(1.0, min(n_dot_v, n_dot_l) * geo_numerator * geo_denominator); + +#if SPECULAR || GLOSSMAP + Color += SpecularColor * g_SunColor.xyz * fresnel_term * roughness * n_dot_l * geo_term / (n_dot_v * n_dot_l + 0.0001); +#endif + + //Color = vec3(0.025 * 1.0 / (n_dot_v * n_dot_l)); +} +layout(location = 0) in vec2 TexCoord; +layout(location = 1) in vec3 EyeVec; + +layout(binding = 2) uniform sampler2D TexNormalmap; +//layout(binding = 3) uniform sampler2D TexScatteringLUT; + +#define DIFFUSE_ONLY 0 +#define GLOBAL_RENDERER DEFERRED +#define OUTPUT_FEEDBACK_TEXTURE 0 + +#if DIFFUSE_ONLY +layout(location = 0) out vec4 ColorOut; +layout(location = 1) out vec4 NormalOut; +#else +layout(location = 0) out vec4 AlbedoOut; +layout(location = 1) out vec4 SpecularOut; +layout(location = 2) out vec4 NormalOut; +layout(location = 3) out vec4 LightingOut; +#endif + +void Resolve(vec3 Albedo, vec3 Normal, float Roughness, float Metallic) +{ +#if (GLOBAL_RENDERER == FORWARD) || OUTPUT_FEEDBACK_TEXTURE + float Lighting = saturate(dot(Normal, normalize(vec3(1.0, 0.5, 1.0)))); + ColorOut.xyz = Albedo * Lighting; + ColorOut.w = 1.0; +#elif DIFFUSE_ONLY + ColorOut = vec4(Albedo, 0.0); + NormalOut.xyz = Normal * 0.5 + 0.5; + NormalOut.w = 1.0; + + // linearize and map to 0..255 range + ColorOut.w = -0.003921569 / (gl_FragCoord.z - 1.003921569); + ColorOut.w = log2(1.0 + saturate(length(EyeVec.xyz) / 200.0)); + ColorOut.w -= 1.0 / 255.0; +#else + LightingOut = vec4(0.0); + NormalOut = vec4(Normal * 0.5 + 0.5, 0.0); + SpecularOut = vec4(Roughness, Metallic, 0.0, 0.0); + AlbedoOut = vec4(Albedo, 1.0); +#endif +} + +void main() +{ + vec3 Normal = texture(TexNormalmap, TexCoord).xyz * 2.0 - 1.0; + Normal = normalize(Normal); + + vec2 scatter_uv; + scatter_uv.x = saturate(length(EyeVec) / 1000.0); + + vec3 nEye = normalize(EyeVec); + scatter_uv.y = 0.0; //nEye.x * 0.5 + 0.5; + + vec3 Color = vec3(0.1, 0.3, 0.1); + vec3 grass = vec3(0.1, 0.3, 0.1); + vec3 dirt = vec3(0.1, 0.1, 0.1); + vec3 snow = vec3(0.8, 0.8, 0.8); + + float grass_snow = smoothstep(0.0, 0.15, (g_CamPos.y + EyeVec.y) / 200.0); + vec3 base = mix(grass, snow, grass_snow); + + float edge = smoothstep(0.7, 0.75, Normal.y); + Color = mix(dirt, base, edge); + Color *= Color; + + float Roughness = 1.0 - edge * grass_snow; + + Resolve(Color, Normal, Roughness, 0.0); +} + diff --git a/shaders/frag/mix.frag b/shaders/frag/mix.frag new file mode 100644 index 0000000000..a5d589dd08 --- /dev/null +++ b/shaders/frag/mix.frag @@ -0,0 +1,20 @@ +#version 310 es +precision mediump float; + +layout(location = 0) in vec4 vIn0; +layout(location = 1) in vec4 vIn1; +layout(location = 2) in float vIn2; +layout(location = 3) in float vIn3; +layout(location = 0) out vec4 FragColor; + +void main() +{ + bvec4 l = bvec4(false, true, false, false); + FragColor = mix(vIn0, vIn1, l); + + bool f = true; + FragColor = vec4(mix(vIn2, vIn3, f)); + + FragColor = f ? vIn0 : vIn1; + FragColor = vec4(f ? vIn2 : vIn3); +} diff --git a/shaders/frag/pls.frag b/shaders/frag/pls.frag new file mode 100644 index 0000000000..314fd99427 --- /dev/null +++ b/shaders/frag/pls.frag @@ -0,0 +1,20 @@ +#version 310 es +precision mediump float; + +layout(location = 0) in vec4 PLSIn0; +layout(location = 1) in vec4 PLSIn1; +in vec4 PLSIn2; +in vec4 PLSIn3; + +layout(location = 0) out vec4 PLSOut0; +layout(location = 1) out vec4 PLSOut1; +layout(location = 2) out vec4 PLSOut2; +layout(location = 3) out vec4 PLSOut3; + +void main() +{ + PLSOut0 = 2.0 * PLSIn0; + PLSOut1 = 6.0 * PLSIn1; + PLSOut2 = 7.0 * PLSIn2; + PLSOut3 = 4.0 * PLSIn3; +} diff --git a/shaders/frag/sampler-ms.frag b/shaders/frag/sampler-ms.frag new file mode 100644 index 0000000000..6593928271 --- /dev/null +++ b/shaders/frag/sampler-ms.frag @@ -0,0 +1,16 @@ +#version 310 es +precision mediump float; +precision highp int; + +layout(binding = 0) uniform mediump sampler2DMS uSampler; +layout(location = 0) out vec4 FragColor; + +void main() +{ + ivec2 coord = ivec2(gl_FragCoord.xy); + FragColor = + texelFetch(uSampler, coord, 0) + + texelFetch(uSampler, coord, 1) + + texelFetch(uSampler, coord, 2) + + texelFetch(uSampler, coord, 3); +} diff --git a/shaders/frag/sampler.frag b/shaders/frag/sampler.frag new file mode 100644 index 0000000000..fb4292f2e7 --- /dev/null +++ b/shaders/frag/sampler.frag @@ -0,0 +1,18 @@ +#version 310 es +precision mediump float; + +in vec4 vColor; +in vec2 vTex; +layout(binding = 0) uniform sampler2D uTex; +layout(location = 0) out vec4 FragColor; + +vec4 sample_texture(sampler2D tex, vec2 uv) +{ + return texture(tex, uv); +} + +void main() +{ + FragColor = vColor * sample_texture(uTex, vTex); +} + diff --git a/shaders/frag/swizzle.frag b/shaders/frag/swizzle.frag new file mode 100644 index 0000000000..271ba6cb64 --- /dev/null +++ b/shaders/frag/swizzle.frag @@ -0,0 +1,17 @@ +#version 310 es +precision mediump float; + +layout(location = 0) uniform sampler2D samp; +layout(location = 0) out vec4 FragColor; +layout(location = 1) in vec3 vNormal; +layout(location = 2) in vec2 vUV; + +void main() +{ + FragColor = vec4(texture(samp, vUV).xyz, 1.0); + FragColor = vec4(texture(samp, vUV).xz, 1.0, 4.0); + FragColor = vec4(texture(samp, vUV).xx, texture(samp, vUV + vec2(0.1)).yy); + FragColor = vec4(vNormal, 1.0); + FragColor = vec4(vNormal + 1.8, 1.0); + FragColor = vec4(vUV, vUV + 1.8); +} diff --git a/shaders/frag/ubo_layout.frag b/shaders/frag/ubo_layout.frag new file mode 100644 index 0000000000..80f9f16d3d --- /dev/null +++ b/shaders/frag/ubo_layout.frag @@ -0,0 +1,24 @@ +#version 310 es +precision mediump float; + +layout(location = 0) out vec4 FragColor; + +struct Str +{ + mat4 foo; +}; + +layout(binding = 0, std140) uniform UBO1 +{ + layout(row_major) Str foo; +} ubo1; + +layout(binding = 1, std140) uniform UBO2 +{ + layout(column_major) Str foo; +} ubo0; + +void main() +{ + FragColor = ubo1.foo.foo[0] + ubo0.foo.foo[0]; +} diff --git a/shaders/geom/basic.geom b/shaders/geom/basic.geom new file mode 100644 index 0000000000..de5f515836 --- /dev/null +++ b/shaders/geom/basic.geom @@ -0,0 +1,28 @@ +#version 310 es +#extension GL_EXT_geometry_shader : require + +layout(triangles, invocations = 4) in; +layout(triangle_strip, max_vertices = 3) out; + +in VertexData { + vec3 normal; +} vin[]; + +out vec3 vNormal; + +void main() +{ + gl_Position = gl_in[0].gl_Position; + vNormal = vin[0].normal + float(gl_InvocationID); + EmitVertex(); + + gl_Position = gl_in[1].gl_Position; + vNormal = vin[1].normal + 4.0 * float(gl_InvocationID); + EmitVertex(); + + gl_Position = gl_in[2].gl_Position; + vNormal = vin[2].normal + 2.0 * float(gl_InvocationID); + EmitVertex(); + + EndPrimitive(); +} diff --git a/shaders/geom/lines-adjacency.geom b/shaders/geom/lines-adjacency.geom new file mode 100644 index 0000000000..1c184505b5 --- /dev/null +++ b/shaders/geom/lines-adjacency.geom @@ -0,0 +1,28 @@ +#version 310 es +#extension GL_EXT_geometry_shader : require + +layout(lines_adjacency) in; +layout(line_strip, max_vertices = 3) out; + +in VertexData { + vec3 normal; +} vin[]; + +out vec3 vNormal; + +void main() +{ + gl_Position = gl_in[0].gl_Position; + vNormal = vin[0].normal; + EmitVertex(); + + gl_Position = gl_in[1].gl_Position; + vNormal = vin[1].normal; + EmitVertex(); + + gl_Position = gl_in[2].gl_Position; + vNormal = vin[2].normal; + EmitVertex(); + + EndPrimitive(); +} diff --git a/shaders/geom/lines.geom b/shaders/geom/lines.geom new file mode 100644 index 0000000000..4d5a0d7e7a --- /dev/null +++ b/shaders/geom/lines.geom @@ -0,0 +1,24 @@ +#version 310 es +#extension GL_EXT_geometry_shader : require + +layout(lines) in; +layout(line_strip, max_vertices = 2) out; + +in VertexData { + vec3 normal; +} vin[]; + +out vec3 vNormal; + +void main() +{ + gl_Position = gl_in[0].gl_Position; + vNormal = vin[0].normal; + EmitVertex(); + + gl_Position = gl_in[1].gl_Position; + vNormal = vin[1].normal; + EmitVertex(); + + EndPrimitive(); +} diff --git a/shaders/geom/points.geom b/shaders/geom/points.geom new file mode 100644 index 0000000000..d416e5e826 --- /dev/null +++ b/shaders/geom/points.geom @@ -0,0 +1,28 @@ +#version 310 es +#extension GL_EXT_geometry_shader : require + +layout(points) in; +layout(points, max_vertices = 3) out; + +in VertexData { + vec3 normal; +} vin[]; + +out vec3 vNormal; + +void main() +{ + gl_Position = gl_in[0].gl_Position; + vNormal = vin[0].normal; + EmitVertex(); + + gl_Position = gl_in[0].gl_Position; + vNormal = vin[0].normal; + EmitVertex(); + + gl_Position = gl_in[0].gl_Position; + vNormal = vin[0].normal; + EmitVertex(); + + EndPrimitive(); +} diff --git a/shaders/geom/single-invocation.geom b/shaders/geom/single-invocation.geom new file mode 100644 index 0000000000..92f60011ca --- /dev/null +++ b/shaders/geom/single-invocation.geom @@ -0,0 +1,28 @@ +#version 310 es +#extension GL_EXT_geometry_shader : require + +layout(triangles) in; +layout(triangle_strip, max_vertices = 3) out; + +in VertexData { + vec3 normal; +} vin[]; + +out vec3 vNormal; + +void main() +{ + gl_Position = gl_in[0].gl_Position; + vNormal = vin[0].normal; + EmitVertex(); + + gl_Position = gl_in[1].gl_Position; + vNormal = vin[1].normal; + EmitVertex(); + + gl_Position = gl_in[2].gl_Position; + vNormal = vin[2].normal; + EmitVertex(); + + EndPrimitive(); +} diff --git a/shaders/geom/triangles-adjacency.geom b/shaders/geom/triangles-adjacency.geom new file mode 100644 index 0000000000..02040ebfb9 --- /dev/null +++ b/shaders/geom/triangles-adjacency.geom @@ -0,0 +1,28 @@ +#version 310 es +#extension GL_EXT_geometry_shader : require + +layout(triangles_adjacency) in; +layout(triangle_strip, max_vertices = 3) out; + +in VertexData { + vec3 normal; +} vin[]; + +out vec3 vNormal; + +void main() +{ + gl_Position = gl_in[0].gl_Position; + vNormal = vin[0].normal; + EmitVertex(); + + gl_Position = gl_in[1].gl_Position; + vNormal = vin[1].normal; + EmitVertex(); + + gl_Position = gl_in[2].gl_Position; + vNormal = vin[2].normal; + EmitVertex(); + + EndPrimitive(); +} diff --git a/shaders/geom/triangles.geom b/shaders/geom/triangles.geom new file mode 100644 index 0000000000..92f60011ca --- /dev/null +++ b/shaders/geom/triangles.geom @@ -0,0 +1,28 @@ +#version 310 es +#extension GL_EXT_geometry_shader : require + +layout(triangles) in; +layout(triangle_strip, max_vertices = 3) out; + +in VertexData { + vec3 normal; +} vin[]; + +out vec3 vNormal; + +void main() +{ + gl_Position = gl_in[0].gl_Position; + vNormal = vin[0].normal; + EmitVertex(); + + gl_Position = gl_in[1].gl_Position; + vNormal = vin[1].normal; + EmitVertex(); + + gl_Position = gl_in[2].gl_Position; + vNormal = vin[2].normal; + EmitVertex(); + + EndPrimitive(); +} diff --git a/shaders/tesc/basic.tesc b/shaders/tesc/basic.tesc new file mode 100644 index 0000000000..1a0e1d6cc8 --- /dev/null +++ b/shaders/tesc/basic.tesc @@ -0,0 +1,17 @@ +#version 310 es +#extension GL_EXT_tessellation_shader : require + +patch out vec3 vFoo; + +layout(vertices = 1) out; + +void main() +{ + gl_TessLevelInner[0] = 8.9; + gl_TessLevelInner[1] = 6.9; + gl_TessLevelOuter[0] = 8.9; + gl_TessLevelOuter[1] = 6.9; + gl_TessLevelOuter[2] = 3.9; + gl_TessLevelOuter[3] = 4.9; + vFoo = vec3(1.0); +} diff --git a/shaders/tesc/water_tess.tesc b/shaders/tesc/water_tess.tesc new file mode 100644 index 0000000000..9e9c0d477e --- /dev/null +++ b/shaders/tesc/water_tess.tesc @@ -0,0 +1,115 @@ +#version 310 es +#extension GL_EXT_tessellation_shader : require + +layout(vertices = 1) out; +in vec2 vPatchPosBase[]; + +layout(std140) uniform UBO +{ + vec4 uScale; + highp vec3 uCamPos; + vec2 uPatchSize; + vec2 uMaxTessLevel; + float uDistanceMod; + vec4 uFrustum[6]; +}; + +patch out vec2 vOutPatchPosBase; +patch out vec4 vPatchLods; + +float lod_factor(vec2 pos_) +{ + vec2 pos = pos_ * uScale.xy; + vec3 dist_to_cam = uCamPos - vec3(pos.x, 0.0, pos.y); + float level = log2((length(dist_to_cam) + 0.0001) * uDistanceMod); + return clamp(level, 0.0, uMaxTessLevel.x); +} + +float tess_level(float lod) +{ + return uMaxTessLevel.y * exp2(-lod); +} + +vec4 tess_level(vec4 lod) +{ + return uMaxTessLevel.y * exp2(-lod); +} + +// Guard band for vertex displacement. +#define GUARD_BAND 10.0 +bool frustum_cull(vec2 p0) +{ + vec2 min_xz = (p0 - GUARD_BAND) * uScale.xy; + vec2 max_xz = (p0 + uPatchSize + GUARD_BAND) * uScale.xy; + + vec3 bb_min = vec3(min_xz.x, -GUARD_BAND, min_xz.y); + vec3 bb_max = vec3(max_xz.x, +GUARD_BAND, max_xz.y); + vec3 center = 0.5 * (bb_min + bb_max); + float radius = 0.5 * length(bb_max - bb_min); + + vec3 f0 = vec3( + dot(uFrustum[0], vec4(center, 1.0)), + dot(uFrustum[1], vec4(center, 1.0)), + dot(uFrustum[2], vec4(center, 1.0))); + + vec3 f1 = vec3( + dot(uFrustum[3], vec4(center, 1.0)), + dot(uFrustum[4], vec4(center, 1.0)), + dot(uFrustum[5], vec4(center, 1.0))); + + return !(any(lessThanEqual(f0, vec3(-radius))) || any(lessThanEqual(f1, vec3(-radius)))); +} + +void compute_tess_levels(vec2 p0) +{ + vOutPatchPosBase = p0; + + float l00 = lod_factor(p0 + vec2(-0.5, -0.5) * uPatchSize); + float l10 = lod_factor(p0 + vec2(+0.5, -0.5) * uPatchSize); + float l20 = lod_factor(p0 + vec2(+1.5, -0.5) * uPatchSize); + float l01 = lod_factor(p0 + vec2(-0.5, +0.5) * uPatchSize); + float l11 = lod_factor(p0 + vec2(+0.5, +0.5) * uPatchSize); + float l21 = lod_factor(p0 + vec2(+1.5, +0.5) * uPatchSize); + float l02 = lod_factor(p0 + vec2(-0.5, +1.5) * uPatchSize); + float l12 = lod_factor(p0 + vec2(+0.5, +1.5) * uPatchSize); + float l22 = lod_factor(p0 + vec2(+1.5, +1.5) * uPatchSize); + + vec4 lods = vec4( + dot(vec4(l01, l11, l02, l12), vec4(0.25)), + dot(vec4(l00, l10, l01, l11), vec4(0.25)), + dot(vec4(l10, l20, l11, l21), vec4(0.25)), + dot(vec4(l11, l21, l12, l22), vec4(0.25))); + + vPatchLods = lods; + + vec4 outer_lods = min(lods.xyzw, lods.yzwx); + vec4 levels = tess_level(outer_lods); + gl_TessLevelOuter[0] = levels.x; + gl_TessLevelOuter[1] = levels.y; + gl_TessLevelOuter[2] = levels.z; + gl_TessLevelOuter[3] = levels.w; + + float min_lod = min(min(lods.x, lods.y), min(lods.z, lods.w)); + float inner = tess_level(min(min_lod, l11)); + gl_TessLevelInner[0] = inner; + gl_TessLevelInner[1] = inner; +} + +void main() +{ + vec2 p0 = vPatchPosBase[0]; + if (!frustum_cull(p0)) + { + gl_TessLevelOuter[0] = -1.0; + gl_TessLevelOuter[1] = -1.0; + gl_TessLevelOuter[2] = -1.0; + gl_TessLevelOuter[3] = -1.0; + gl_TessLevelInner[0] = -1.0; + gl_TessLevelInner[1] = -1.0; + } + else + { + compute_tess_levels(p0); + } +} + diff --git a/shaders/tese/ccw.tese b/shaders/tese/ccw.tese new file mode 100644 index 0000000000..26e9cc698d --- /dev/null +++ b/shaders/tese/ccw.tese @@ -0,0 +1,10 @@ +#version 310 es +#extension GL_EXT_tessellation_shader : require + +layout(ccw, triangles, fractional_even_spacing) in; + +void main() +{ + gl_Position = vec4(1.0); +} + diff --git a/shaders/tese/cw.tese b/shaders/tese/cw.tese new file mode 100644 index 0000000000..6ce7c2d6d9 --- /dev/null +++ b/shaders/tese/cw.tese @@ -0,0 +1,10 @@ +#version 310 es +#extension GL_EXT_tessellation_shader : require + +layout(cw, triangles, fractional_even_spacing) in; + +void main() +{ + gl_Position = vec4(1.0); +} + diff --git a/shaders/tese/equal.tese b/shaders/tese/equal.tese new file mode 100644 index 0000000000..08ab36ec23 --- /dev/null +++ b/shaders/tese/equal.tese @@ -0,0 +1,10 @@ +#version 310 es +#extension GL_EXT_tessellation_shader : require + +layout(cw, triangles, equal_spacing) in; + +void main() +{ + gl_Position = vec4(1.0); +} + diff --git a/shaders/tese/fractional_even.tese b/shaders/tese/fractional_even.tese new file mode 100644 index 0000000000..6ce7c2d6d9 --- /dev/null +++ b/shaders/tese/fractional_even.tese @@ -0,0 +1,10 @@ +#version 310 es +#extension GL_EXT_tessellation_shader : require + +layout(cw, triangles, fractional_even_spacing) in; + +void main() +{ + gl_Position = vec4(1.0); +} + diff --git a/shaders/tese/fractional_odd.tese b/shaders/tese/fractional_odd.tese new file mode 100644 index 0000000000..a15a32926b --- /dev/null +++ b/shaders/tese/fractional_odd.tese @@ -0,0 +1,10 @@ +#version 310 es +#extension GL_EXT_tessellation_shader : require + +layout(cw, triangles, fractional_odd_spacing) in; + +void main() +{ + gl_Position = vec4(1.0); +} + diff --git a/shaders/tese/line.tese b/shaders/tese/line.tese new file mode 100644 index 0000000000..b4237ef559 --- /dev/null +++ b/shaders/tese/line.tese @@ -0,0 +1,10 @@ +#version 310 es +#extension GL_EXT_tessellation_shader : require + +layout(isolines, point_mode, fractional_even_spacing) in; + +void main() +{ + gl_Position = vec4(1.0); +} + diff --git a/shaders/tese/triangle.tese b/shaders/tese/triangle.tese new file mode 100644 index 0000000000..6ce7c2d6d9 --- /dev/null +++ b/shaders/tese/triangle.tese @@ -0,0 +1,10 @@ +#version 310 es +#extension GL_EXT_tessellation_shader : require + +layout(cw, triangles, fractional_even_spacing) in; + +void main() +{ + gl_Position = vec4(1.0); +} + diff --git a/shaders/tese/water_tess.tese b/shaders/tese/water_tess.tese new file mode 100644 index 0000000000..f9628b1e18 --- /dev/null +++ b/shaders/tese/water_tess.tese @@ -0,0 +1,65 @@ +#version 310 es +#extension GL_EXT_tessellation_shader : require +precision highp int; + +layout(cw, quads, fractional_even_spacing) in; + +patch in vec2 vOutPatchPosBase; +patch in vec4 vPatchLods; + +layout(binding = 1, std140) uniform UBO +{ + mat4 uMVP; + vec4 uScale; + vec2 uInvScale; + vec3 uCamPos; + vec2 uPatchSize; + vec2 uInvHeightmapSize; +}; +layout(binding = 0) uniform mediump sampler2D uHeightmapDisplacement; + +highp out vec3 vWorld; +highp out vec4 vGradNormalTex; + +vec2 lerp_vertex(vec2 tess_coord) +{ + return vOutPatchPosBase + tess_coord * uPatchSize; +} + +mediump vec2 lod_factor(vec2 tess_coord) +{ + mediump vec2 x = mix(vPatchLods.yx, vPatchLods.zw, tess_coord.x); + mediump float level = mix(x.x, x.y, tess_coord.y); + mediump float floor_level = floor(level); + mediump float fract_level = level - floor_level; + return vec2(floor_level, fract_level); +} + +mediump vec3 sample_height_displacement(vec2 uv, vec2 off, mediump vec2 lod) +{ + return mix( + textureLod(uHeightmapDisplacement, uv + 0.5 * off, lod.x).xyz, + textureLod(uHeightmapDisplacement, uv + 1.0 * off, lod.x + 1.0).xyz, + lod.y); +} + +void main() +{ + vec2 tess_coord = gl_TessCoord.xy; + vec2 pos = lerp_vertex(tess_coord); + mediump vec2 lod = lod_factor(tess_coord); + + vec2 tex = pos * uInvHeightmapSize.xy; + pos *= uScale.xy; + + mediump float delta_mod = exp2(lod.x); + vec2 off = uInvHeightmapSize.xy * delta_mod; + + vGradNormalTex = vec4(tex + 0.5 * uInvHeightmapSize.xy, tex * uScale.zw); + vec3 height_displacement = sample_height_displacement(tex, off, lod); + + pos += height_displacement.yz; + vWorld = vec3(pos.x, height_displacement.x, pos.y); + gl_Position = uMVP * vec4(vWorld, 1.0); +} + diff --git a/shaders/vert/basic.vert b/shaders/vert/basic.vert new file mode 100644 index 0000000000..801724f325 --- /dev/null +++ b/shaders/vert/basic.vert @@ -0,0 +1,15 @@ +#version 310 es + +layout(std140) uniform UBO +{ + uniform mat4 uMVP; +}; +in vec4 aVertex; +in vec3 aNormal; +out vec3 vNormal; + +void main() +{ + gl_Position = uMVP * aVertex; + vNormal = aNormal; +} diff --git a/shaders/vert/ground.vert b/shaders/vert/ground.vert new file mode 100755 index 0000000000..2deeb5a94b --- /dev/null +++ b/shaders/vert/ground.vert @@ -0,0 +1,202 @@ +#version 310 es + +#define YFLIP 0 +#define SPECULAR 0 +#define GLOSSMAP 0 + +#define DEBUG_NONE 0 +#define DEBUG_DIFFUSE 1 +#define DEBUG_SPECULAR 2 +#define DEBUG_LIGHTING 3 +#define DEBUG_FOG 4 +#define DEBUG DEBUG_NONE + +#define FORWARD 0 +#define DEFERRED 1 +#define DEFERRED_VTEX 2 + +float saturate(float x) { return clamp(x, 0.0, 1.0); } + +layout(std140, binding = 0) uniform GlobalVSData +{ + vec4 g_ViewProj_Row0; + vec4 g_ViewProj_Row1; + vec4 g_ViewProj_Row2; + vec4 g_ViewProj_Row3; + vec4 g_CamPos; + vec4 g_CamRight; + vec4 g_CamUp; + vec4 g_CamFront; + vec4 g_SunDir; + vec4 g_SunColor; + vec4 g_TimeParams; + vec4 g_ResolutionParams; + vec4 g_CamAxisRight; + vec4 g_FogColor_Distance; + vec4 g_ShadowVP_Row0; + vec4 g_ShadowVP_Row1; + vec4 g_ShadowVP_Row2; + vec4 g_ShadowVP_Row3; +}; + +vec4 ComputeFogFactor(vec3 WorldPos) +{ + vec4 FogData; + vec3 vEye = WorldPos - g_CamPos.xyz; + vec3 nEye = normalize(vEye); + FogData.w = exp(-dot(vEye, vEye) * g_FogColor_Distance.w * 0.75); + + float fog_sun_factor = pow(saturate(dot(nEye, g_SunDir.xyz)), 8.0); + FogData.xyz = mix(vec3(1.0, 1.0, 1.0), vec3(0.6, 0.6, 0.9), nEye.y * 0.5 + 0.5); + FogData.xyz = mix(FogData.xyz, vec3(0.95, 0.87, 0.78), fog_sun_factor); + return FogData; +} + +void ApplyFog(inout vec3 Color, vec4 FogData) +{ + Color = mix(FogData.xyz, Color, FogData.w); +} + +void ApplyLighting(inout mediump vec3 Color, mediump float DiffuseFactor) +{ + mediump vec3 DiffuseLight = g_SunColor.xyz * DiffuseFactor; + mediump vec3 AmbientLight = vec3(0.2, 0.35, 0.55) * 0.5; + mediump vec3 Lighting = DiffuseLight + AmbientLight; +#if DEBUG == DEBUG_LIGHTING + Color = Lighting; +#else + Color *= Lighting; +#endif +} + +#pragma VARIANT SPECULAR +#pragma VARIANT GLOSSMAP + +void ApplySpecular(inout mediump vec3 Color, mediump vec3 EyeVec, mediump vec3 Normal, mediump vec3 SpecularColor, mediump float Shininess, mediump float FresnelAmount) +{ + mediump vec3 HalfAngle = normalize(-EyeVec + g_SunDir.xyz); + + mediump float v_dot_h = saturate(dot(HalfAngle, -EyeVec)); + mediump float n_dot_l = saturate(dot(Normal, g_SunDir.xyz)); + mediump float n_dot_h = saturate(dot(Normal, HalfAngle)); + mediump float n_dot_v = saturate(dot(-EyeVec, Normal)); + mediump float h_dot_l = saturate(dot(g_SunDir.xyz, HalfAngle)); + + const mediump float roughness_value = 0.25; + + mediump float r_sq = roughness_value * roughness_value; + mediump float n_dot_h_sq = n_dot_h * n_dot_h; + mediump float roughness_a = 1.0 / (4.0 * r_sq * n_dot_h_sq * n_dot_h_sq); + mediump float roughness_b = n_dot_h_sq - 1.0; + mediump float roughness_c = r_sq * n_dot_h_sq; + mediump float roughness = saturate(roughness_a * exp(roughness_b / roughness_c)); + + FresnelAmount = 0.5; + mediump float fresnel_term = pow(1.0 - n_dot_v, 5.0) * (1.0 - FresnelAmount) + FresnelAmount; + + mediump float geo_numerator = 2.0 * n_dot_h; + mediump float geo_denominator = 1.0 / v_dot_h; + mediump float geo_term = min(1.0, min(n_dot_v, n_dot_l) * geo_numerator * geo_denominator); + +#if SPECULAR || GLOSSMAP + Color += SpecularColor * g_SunColor.xyz * fresnel_term * roughness * n_dot_l * geo_term / (n_dot_v * n_dot_l + 0.0001); +#endif + + //Color = vec3(0.025 * 1.0 / (n_dot_v * n_dot_l)); +} + +layout(location = 0) in vec2 Position; +layout(location = 1) in vec4 LODWeights; + +layout(location = 0) out vec2 TexCoord; +layout(location = 1) out vec3 EyeVec; + +layout(std140, binding = 2) uniform GlobalGround +{ + vec4 GroundScale; + vec4 GroundPosition; + vec4 InvGroundSize_PatchScale; +}; + +struct PatchData +{ + vec4 Position; + vec4 LODs; +}; + +layout(std140, binding = 0) uniform PerPatch +{ + PatchData Patches[256]; +}; + +layout(binding = 0) uniform sampler2D TexHeightmap; +layout(binding = 1) uniform sampler2D TexLOD; + +vec2 lod_factor(vec2 uv) +{ + float level = textureLod(TexLOD, uv, 0.0).x * (255.0 / 32.0); + float floor_level = floor(level); + float fract_level = level - floor_level; + return vec2(floor_level, fract_level); +} + +#ifdef VULKAN +#define INSTANCE_ID gl_InstanceIndex +#else +#define INSTANCE_ID gl_InstanceID +#endif + +vec2 warp_position() +{ + float vlod = dot(LODWeights, Patches[INSTANCE_ID].LODs); + vlod = mix(vlod, Patches[INSTANCE_ID].Position.w, all(equal(LODWeights, vec4(0.0)))); + +#ifdef DEBUG_LOD_HEIGHT + LODFactor = vec4(vlod); +#endif + + float floor_lod = floor(vlod); + float fract_lod = vlod - floor_lod; + uint ufloor_lod = uint(floor_lod); + +#ifdef DEBUG_LOD_HEIGHT + LODFactor = vec4(fract_lod); +#endif + + uvec2 uPosition = uvec2(Position); + uvec2 mask = (uvec2(1u) << uvec2(ufloor_lod, ufloor_lod + 1u)) - 1u; + //uvec2 rounding = mix(uvec2(0u), mask, lessThan(uPosition, uvec2(32u))); + + uvec2 rounding = uvec2( + uPosition.x < 32u ? mask.x : 0u, + uPosition.y < 32u ? mask.y : 0u); + + vec4 lower_upper_snapped = vec4((uPosition + rounding).xyxy & (~mask).xxyy); + return mix(lower_upper_snapped.xy, lower_upper_snapped.zw, fract_lod); +} + +void main() +{ + vec2 PatchPos = Patches[INSTANCE_ID].Position.xz * InvGroundSize_PatchScale.zw; + vec2 WarpedPos = warp_position(); + vec2 VertexPos = PatchPos + WarpedPos; + vec2 NormalizedPos = VertexPos * InvGroundSize_PatchScale.xy; + vec2 lod = lod_factor(NormalizedPos); + + vec2 Offset = exp2(lod.x) * InvGroundSize_PatchScale.xy; + + float Elevation = + mix(textureLod(TexHeightmap, NormalizedPos + 0.5 * Offset, lod.x).x, + textureLod(TexHeightmap, NormalizedPos + 1.0 * Offset, lod.x + 1.0).x, + lod.y); + + vec3 WorldPos = vec3(NormalizedPos.x, Elevation, NormalizedPos.y); + WorldPos *= GroundScale.xyz; + WorldPos += GroundPosition.xyz; + + EyeVec = WorldPos - g_CamPos.xyz; + TexCoord = NormalizedPos + 0.5 * InvGroundSize_PatchScale.xy; + + gl_Position = WorldPos.x * g_ViewProj_Row0 + WorldPos.y * g_ViewProj_Row1 + WorldPos.z * g_ViewProj_Row2 + g_ViewProj_Row3; +} + diff --git a/shaders/vert/ocean.vert b/shaders/vert/ocean.vert new file mode 100644 index 0000000000..8a5677fa12 --- /dev/null +++ b/shaders/vert/ocean.vert @@ -0,0 +1,200 @@ +#version 310 es + +#define YFLIP 0 +#define SPECULAR 0 +#define GLOSSMAP 0 + +#define DEBUG_NONE 0 +#define DEBUG_DIFFUSE 1 +#define DEBUG_SPECULAR 2 +#define DEBUG_LIGHTING 3 +#define DEBUG_FOG 4 +#define DEBUG DEBUG_NONE + +#define FORWARD 0 +#define DEFERRED 1 +#define DEFERRED_VTEX 2 + +float saturate(float x) { return clamp(x, 0.0, 1.0); } + +layout(std140, binding = 0) uniform GlobalVSData +{ + vec4 g_ViewProj_Row0; + vec4 g_ViewProj_Row1; + vec4 g_ViewProj_Row2; + vec4 g_ViewProj_Row3; + vec4 g_CamPos; + vec4 g_CamRight; + vec4 g_CamUp; + vec4 g_CamFront; + vec4 g_SunDir; + vec4 g_SunColor; + vec4 g_TimeParams; + vec4 g_ResolutionParams; + vec4 g_CamAxisRight; + vec4 g_FogColor_Distance; + vec4 g_ShadowVP_Row0; + vec4 g_ShadowVP_Row1; + vec4 g_ShadowVP_Row2; + vec4 g_ShadowVP_Row3; +}; + +vec4 ComputeFogFactor(vec3 WorldPos) +{ + vec4 FogData; + vec3 vEye = WorldPos - g_CamPos.xyz; + vec3 nEye = normalize(vEye); + FogData.w = exp(-dot(vEye, vEye) * g_FogColor_Distance.w * 0.75); + + float fog_sun_factor = pow(saturate(dot(nEye, g_SunDir.xyz)), 8.0); + FogData.xyz = mix(vec3(1.0, 1.0, 1.0), vec3(0.6, 0.6, 0.9), nEye.y * 0.5 + 0.5); + FogData.xyz = mix(FogData.xyz, vec3(0.95, 0.87, 0.78), fog_sun_factor); + return FogData; +} + +void ApplyFog(inout vec3 Color, vec4 FogData) +{ + Color = mix(FogData.xyz, Color, FogData.w); +} + +void ApplyLighting(inout mediump vec3 Color, mediump float DiffuseFactor) +{ + mediump vec3 DiffuseLight = g_SunColor.xyz * DiffuseFactor; + mediump vec3 AmbientLight = vec3(0.2, 0.35, 0.55) * 0.5; + mediump vec3 Lighting = DiffuseLight + AmbientLight; +#if DEBUG == DEBUG_LIGHTING + Color = Lighting; +#else + Color *= Lighting; +#endif +} + +void ApplySpecular(inout mediump vec3 Color, mediump vec3 EyeVec, mediump vec3 Normal, mediump vec3 SpecularColor, mediump float Shininess, mediump float FresnelAmount) +{ + mediump vec3 HalfAngle = normalize(-EyeVec + g_SunDir.xyz); + + mediump float v_dot_h = saturate(dot(HalfAngle, -EyeVec)); + mediump float n_dot_l = saturate(dot(Normal, g_SunDir.xyz)); + mediump float n_dot_h = saturate(dot(Normal, HalfAngle)); + mediump float n_dot_v = saturate(dot(-EyeVec, Normal)); + mediump float h_dot_l = saturate(dot(g_SunDir.xyz, HalfAngle)); + + const mediump float roughness_value = 0.25; + + mediump float r_sq = roughness_value * roughness_value; + mediump float n_dot_h_sq = n_dot_h * n_dot_h; + mediump float roughness_a = 1.0 / (4.0 * r_sq * n_dot_h_sq * n_dot_h_sq); + mediump float roughness_b = n_dot_h_sq - 1.0; + mediump float roughness_c = r_sq * n_dot_h_sq; + mediump float roughness = saturate(roughness_a * exp(roughness_b / roughness_c)); + + FresnelAmount = 0.5; + mediump float fresnel_term = pow(1.0 - n_dot_v, 5.0) * (1.0 - FresnelAmount) + FresnelAmount; + + mediump float geo_numerator = 2.0 * n_dot_h; + mediump float geo_denominator = 1.0 / v_dot_h; + mediump float geo_term = min(1.0, min(n_dot_v, n_dot_l) * geo_numerator * geo_denominator); + +#if SPECULAR || GLOSSMAP + Color += SpecularColor * g_SunColor.xyz * fresnel_term * roughness * n_dot_l * geo_term / (n_dot_v * n_dot_l + 0.0001); +#endif + + //Color = vec3(0.025 * 1.0 / (n_dot_v * n_dot_l)); +} + + +precision highp int; + +layout(binding = 0) uniform mediump sampler2D TexDisplacement; +layout(binding = 1) uniform mediump sampler2D TexLOD; + +layout(location = 0) in vec4 Position; +layout(location = 1) in vec4 LODWeights; + +layout(location = 0) out highp vec3 EyeVec; +layout(location = 1) out highp vec4 TexCoord; + +layout(std140, binding = 4) uniform GlobalOcean +{ + vec4 OceanScale; + vec4 OceanPosition; + vec4 InvOceanSize_PatchScale; + vec4 NormalTexCoordScale; +}; + +struct PatchData +{ + vec4 Position; + vec4 LODs; +}; + +layout(std140, binding = 0) uniform Offsets +{ + PatchData Patches[256]; +}; + +vec2 lod_factor(vec2 uv) +{ + float level = textureLod(TexLOD, uv, 0.0).x * (255.0 / 32.0); + float floor_level = floor(level); + float fract_level = level - floor_level; + return vec2(floor_level, fract_level); +} + +#ifdef VULKAN +#define INSTANCE_ID gl_InstanceIndex +#else +#define INSTANCE_ID gl_InstanceID +#endif + +vec2 warp_position() +{ + float vlod = dot(LODWeights, Patches[INSTANCE_ID].LODs); + vlod = mix(vlod, Patches[INSTANCE_ID].Position.w, all(equal(LODWeights, vec4(0.0)))); + + float floor_lod = floor(vlod); + float fract_lod = vlod - floor_lod; + uint ufloor_lod = uint(floor_lod); + + uvec4 uPosition = uvec4(Position); + uvec2 mask = (uvec2(1u) << uvec2(ufloor_lod, ufloor_lod + 1u)) - 1u; + + uvec4 rounding; + rounding.x = uPosition.x < 32u ? mask.x : 0u; + rounding.y = uPosition.y < 32u ? mask.x : 0u; + rounding.z = uPosition.x < 32u ? mask.y : 0u; + rounding.w = uPosition.y < 32u ? mask.y : 0u; + + //rounding = uPosition.xyxy * mask.xxyy; + vec4 lower_upper_snapped = vec4((uPosition.xyxy + rounding) & (~mask).xxyy); + return mix(lower_upper_snapped.xy, lower_upper_snapped.zw, fract_lod); +} + +void main() +{ + vec2 PatchPos = Patches[INSTANCE_ID].Position.xz * InvOceanSize_PatchScale.zw; + vec2 WarpedPos = warp_position(); + vec2 VertexPos = PatchPos + WarpedPos; + vec2 NormalizedPos = VertexPos * InvOceanSize_PatchScale.xy; + vec2 NormalizedTex = NormalizedPos * NormalTexCoordScale.zw; + vec2 lod = lod_factor(NormalizedPos); + vec2 Offset = exp2(lod.x) * InvOceanSize_PatchScale.xy * NormalTexCoordScale.zw; + + vec3 Displacement = + mix(textureLod(TexDisplacement, NormalizedTex + 0.5 * Offset, lod.x).yxz, + textureLod(TexDisplacement, NormalizedTex + 1.0 * Offset, lod.x + 1.0).yxz, + lod.y); + + vec3 WorldPos = vec3(NormalizedPos.x, 0.0, NormalizedPos.y) + Displacement; + WorldPos *= OceanScale.xyz; + WorldPos += OceanPosition.xyz; + + EyeVec = WorldPos - g_CamPos.xyz; + TexCoord = vec4(NormalizedTex, NormalizedTex * NormalTexCoordScale.xy) + 0.5 * InvOceanSize_PatchScale.xyxy * NormalTexCoordScale.zwzw; + + gl_Position = WorldPos.x * g_ViewProj_Row0 + WorldPos.y * g_ViewProj_Row1 + WorldPos.z * g_ViewProj_Row2 + g_ViewProj_Row3; +#if YFLIP + gl_Position *= vec4(1.0, -1.0, 1.0, 1.0); +#endif +} + diff --git a/shaders/vert/texture_buffer.vert b/shaders/vert/texture_buffer.vert new file mode 100644 index 0000000000..6bc7ddfae2 --- /dev/null +++ b/shaders/vert/texture_buffer.vert @@ -0,0 +1,10 @@ +#version 310 es +#extension GL_OES_texture_buffer : require + +layout(binding = 4) uniform highp samplerBuffer uSamp; +layout(rgba32f, binding = 5) uniform readonly highp imageBuffer uSampo; + +void main() +{ + gl_Position = texelFetch(uSamp, 10) + imageLoad(uSampo, 100); +} diff --git a/shaders/vert/ubo.vert b/shaders/vert/ubo.vert new file mode 100644 index 0000000000..f304c1e561 --- /dev/null +++ b/shaders/vert/ubo.vert @@ -0,0 +1,16 @@ +#version 310 es + +layout(binding = 0, std140) uniform UBO +{ + mat4 mvp; +}; + +in vec4 aVertex; +in vec3 aNormal; +out vec3 vNormal; + +void main() +{ + gl_Position = mvp * aVertex; + vNormal = aNormal; +} diff --git a/shaders/vulkan/frag/combined-texture-sampler.vk.frag b/shaders/vulkan/frag/combined-texture-sampler.vk.frag new file mode 100644 index 0000000000..b7de8d47e9 --- /dev/null +++ b/shaders/vulkan/frag/combined-texture-sampler.vk.frag @@ -0,0 +1,47 @@ +#version 310 es +precision mediump float; + +layout(set = 0, binding = 0) uniform mediump sampler uSampler0; +layout(set = 0, binding = 1) uniform mediump sampler uSampler1; +layout(set = 0, binding = 2) uniform mediump texture2D uTexture0; +layout(set = 0, binding = 3) uniform mediump texture2D uTexture1; + +layout(location = 0) out vec4 FragColor; +layout(location = 0) in vec2 vTex; + +vec4 sample_dual(mediump sampler samp, mediump texture2D tex) +{ + return texture(sampler2D(tex, samp), vTex); +} + +vec4 sample_global_tex(mediump sampler samp) +{ + vec4 a = texture(sampler2D(uTexture0, samp), vTex); + vec4 b = sample_dual(samp, uTexture1); + return a + b; +} + +vec4 sample_global_sampler(mediump texture2D tex) +{ + vec4 a = texture(sampler2D(tex, uSampler0), vTex); + vec4 b = sample_dual(uSampler1, tex); + return a + b; +} + +vec4 sample_duals() +{ + vec4 a = sample_dual(uSampler0, uTexture0); + vec4 b = sample_dual(uSampler1, uTexture1); + return a + b; +} + +void main() +{ + vec4 c0 = sample_duals(); + vec4 c1 = sample_global_tex(uSampler0); + vec4 c2 = sample_global_tex(uSampler1); + vec4 c3 = sample_global_sampler(uTexture0); + vec4 c4 = sample_global_sampler(uTexture1); + + FragColor = c0 + c1 + c2 + c3 + c4; +} diff --git a/shaders/vulkan/frag/input-attachment-ms.vk.frag b/shaders/vulkan/frag/input-attachment-ms.vk.frag new file mode 100644 index 0000000000..206cbe48f2 --- /dev/null +++ b/shaders/vulkan/frag/input-attachment-ms.vk.frag @@ -0,0 +1,11 @@ +#version 310 es +precision mediump float; + +layout(input_attachment_index = 0, set = 0, binding = 0) uniform mediump subpassInputMS uSubpass0; +layout(input_attachment_index = 1, set = 0, binding = 1) uniform mediump subpassInputMS uSubpass1; +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = subpassLoad(uSubpass0, 1) + subpassLoad(uSubpass1, 2); +} diff --git a/shaders/vulkan/frag/input-attachment.vk.frag b/shaders/vulkan/frag/input-attachment.vk.frag new file mode 100644 index 0000000000..f082d15b2a --- /dev/null +++ b/shaders/vulkan/frag/input-attachment.vk.frag @@ -0,0 +1,11 @@ +#version 310 es +precision mediump float; + +layout(input_attachment_index = 0, set = 0, binding = 0) uniform mediump subpassInput uSubpass0; +layout(input_attachment_index = 1, set = 0, binding = 1) uniform mediump subpassInput uSubpass1; +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = subpassLoad(uSubpass0) + subpassLoad(uSubpass1); +} diff --git a/shaders/vulkan/frag/push-constant.vk.frag b/shaders/vulkan/frag/push-constant.vk.frag new file mode 100644 index 0000000000..6180faba31 --- /dev/null +++ b/shaders/vulkan/frag/push-constant.vk.frag @@ -0,0 +1,16 @@ +#version 310 es +precision mediump float; + +layout(push_constant, std430) uniform PushConstants +{ + vec4 value0; + vec4 value1; +} push; + +layout(location = 0) in vec4 vColor; +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = vColor + push.value0 + push.value1; +} diff --git a/shaders/vulkan/frag/separate-sampler-texture.vk.frag b/shaders/vulkan/frag/separate-sampler-texture.vk.frag new file mode 100644 index 0000000000..cedf114ef8 --- /dev/null +++ b/shaders/vulkan/frag/separate-sampler-texture.vk.frag @@ -0,0 +1,36 @@ +#version 310 es +precision mediump float; + +layout(set = 0, binding = 0) uniform mediump sampler uSampler; +layout(set = 0, binding = 1) uniform mediump texture2D uTexture; +layout(set = 0, binding = 2) uniform mediump texture3D uTexture3D; +layout(set = 0, binding = 3) uniform mediump textureCube uTextureCube; +layout(set = 0, binding = 4) uniform mediump texture2DArray uTextureArray; + +layout(location = 0) out vec4 FragColor; +layout(location = 0) in vec2 vTex; +layout(location = 1) in vec3 vTex3; + +vec4 sample_func(mediump sampler samp, vec2 uv) +{ + return texture(sampler2D(uTexture, samp), uv); +} + +vec4 sample_func_dual(mediump sampler samp, mediump texture2D tex, vec2 uv) +{ + return texture(sampler2D(tex, samp), uv); +} + +void main() +{ + vec2 off = 1.0 / vec2(textureSize(sampler2D(uTexture, uSampler), 0)); + vec2 off2 = 1.0 / vec2(textureSize(sampler2D(uTexture, uSampler), 1)); + + vec4 c0 = sample_func(uSampler, vTex + off + off2); + vec4 c1 = sample_func_dual(uSampler, uTexture, vTex + off + off2); + vec4 c2 = texture(sampler2DArray(uTextureArray, uSampler), vTex3); + vec4 c3 = texture(samplerCube(uTextureCube, uSampler), vTex3); + vec4 c4 = texture(sampler3D(uTexture3D, uSampler), vTex3); + + FragColor = c0 + c1 + c2 + c3 + c4; +} diff --git a/shaders/vulkan/frag/spec-constant.vk.frag b/shaders/vulkan/frag/spec-constant.vk.frag new file mode 100644 index 0000000000..03b625bf58 --- /dev/null +++ b/shaders/vulkan/frag/spec-constant.vk.frag @@ -0,0 +1,77 @@ +#version 310 es +precision mediump float; + +layout(location = 0) out vec4 FragColor; +layout(constant_id = 1) const float a = 1.0; +layout(constant_id = 2) const float b = 2.0; +layout(constant_id = 3) const int c = 3; +layout(constant_id = 4) const int d = 4; +layout(constant_id = 5) const uint e = 5u; +layout(constant_id = 6) const uint f = 6u; +layout(constant_id = 7) const bool g = false; +layout(constant_id = 8) const bool h = true; +// glslang doesn't seem to support partial spec constants or composites yet, so only test the basics. + +struct Foo +{ + float elems[d + 2]; +}; + +void main() +{ + float t0 = a; + float t1 = b; + + uint c0 = uint(c); // OpIAdd with different types. + // FConvert, float-to-double. + int c1 = -c; // SNegate + int c2 = ~c; // OpNot + int c3 = c + d; // OpIAdd + int c4 = c - d; // OpISub + int c5 = c * d; // OpIMul + int c6 = c / d; // OpSDiv + uint c7 = e / f; // OpUDiv + int c8 = c % d; // OpSMod + uint c9 = e % f; // OpUMod + // TODO: OpSRem, any way to access this in GLSL? + int c10 = c >> d; // OpShiftRightArithmetic + uint c11 = e >> f; // OpShiftRightLogical + int c12 = c << d; // OpShiftLeftLogical + int c13 = c | d; // OpBitwiseOr + int c14 = c ^ d; // OpBitwiseXor + int c15 = c & d; // OpBitwiseAnd + // VectorShuffle, CompositeExtract, CompositeInsert, not testable atm. + bool c16 = g || h; // OpLogicalOr + bool c17 = g && h; // OpLogicalAnd + bool c18 = !g; // OpLogicalNot + bool c19 = g == h; // OpLogicalEqual + bool c20 = g != h; // OpLogicalNotEqual + // OpSelect not testable atm. + bool c21 = c == d; // OpIEqual + bool c22 = c != d; // OpINotEqual + bool c23 = c < d; // OpSLessThan + bool c24 = e < f; // OpULessThan + bool c25 = c > d; // OpSGreaterThan + bool c26 = e > f; // OpUGreaterThan + bool c27 = c <= d; // OpSLessThanEqual + bool c28 = e <= f; // OpULessThanEqual + bool c29 = c >= d; // OpSGreaterThanEqual + bool c30 = e >= f; // OpUGreaterThanEqual + // OpQuantizeToF16 not testable atm. + + int c31 = c8 + c3; + + int c32 = int(e); // OpIAdd with different types. + bool c33 = bool(c); // int -> bool + bool c34 = bool(e); // uint -> bool + int c35 = int(g); // bool -> int + uint c36 = uint(g); // bool -> uint + float c37 = float(g); // bool -> float + + // Flexible sized arrays with spec constants and spec constant ops. + float vec0[d][c + 3]; + float vec1[c + 2][d + 5]; + + Foo foo; + FragColor = vec4(t0 + t1) + vec0[0][0] + vec1[0][0] + foo.elems[c]; +} diff --git a/shaders/vulkan/vert/vulkan-vertex.vk.vert b/shaders/vulkan/vert/vulkan-vertex.vk.vert new file mode 100644 index 0000000000..4d0438ace6 --- /dev/null +++ b/shaders/vulkan/vert/vulkan-vertex.vk.vert @@ -0,0 +1,6 @@ +#version 310 es + +void main() +{ + gl_Position = float(gl_VertexIndex + gl_InstanceIndex) * vec4(1.0, 2.0, 3.0, 4.0); +} diff --git a/spirv.hpp b/spirv.hpp new file mode 100644 index 0000000000..987f3c1d67 --- /dev/null +++ b/spirv.hpp @@ -0,0 +1,881 @@ +// Copyright (c) 2014-2016 The Khronos Group Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and/or associated documentation files (the "Materials"), +// to deal in the Materials without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Materials, and to permit persons to whom the +// Materials are furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Materials. +// +// MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +// STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +// HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +// +// THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +// IN THE MATERIALS. + +// This header is automatically generated by the same tool that creates +// the Binary Section of the SPIR-V specification. + +// Enumeration tokens for SPIR-V, in various styles: +// C, C++, C++11, JSON, Lua, Python +// +// - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL +// - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL +// - C++11 will use enum classes in the spv namespace, e.g.: spv::SourceLanguage::GLSL +// - Lua will use tables, e.g.: spv.SourceLanguage.GLSL +// - Python will use dictionaries, e.g.: spv['SourceLanguage']['GLSL'] +// +// Some tokens act like mask values, which can be OR'd together, +// while others are mutually exclusive. The mask-like ones have +// "Mask" in their name, and a parallel enum that has the shift +// amount (1 << x) for each corresponding enumerant. + +#ifndef spirv_HPP +#define spirv_HPP + +namespace spv { + +typedef unsigned int Id; + +#define SPV_VERSION 0x10000 +#define SPV_REVISION 5 + +static const unsigned int MagicNumber = 0x07230203; +static const unsigned int Version = 0x00010000; +static const unsigned int Revision = 5; +static const unsigned int OpCodeMask = 0xffff; +static const unsigned int WordCountShift = 16; + +enum SourceLanguage { + SourceLanguageUnknown = 0, + SourceLanguageESSL = 1, + SourceLanguageGLSL = 2, + SourceLanguageOpenCL_C = 3, + SourceLanguageOpenCL_CPP = 4, +}; + +enum ExecutionModel { + ExecutionModelVertex = 0, + ExecutionModelTessellationControl = 1, + ExecutionModelTessellationEvaluation = 2, + ExecutionModelGeometry = 3, + ExecutionModelFragment = 4, + ExecutionModelGLCompute = 5, + ExecutionModelKernel = 6, +}; + +enum AddressingModel { + AddressingModelLogical = 0, + AddressingModelPhysical32 = 1, + AddressingModelPhysical64 = 2, +}; + +enum MemoryModel { + MemoryModelSimple = 0, + MemoryModelGLSL450 = 1, + MemoryModelOpenCL = 2, +}; + +enum ExecutionMode { + ExecutionModeInvocations = 0, + ExecutionModeSpacingEqual = 1, + ExecutionModeSpacingFractionalEven = 2, + ExecutionModeSpacingFractionalOdd = 3, + ExecutionModeVertexOrderCw = 4, + ExecutionModeVertexOrderCcw = 5, + ExecutionModePixelCenterInteger = 6, + ExecutionModeOriginUpperLeft = 7, + ExecutionModeOriginLowerLeft = 8, + ExecutionModeEarlyFragmentTests = 9, + ExecutionModePointMode = 10, + ExecutionModeXfb = 11, + ExecutionModeDepthReplacing = 12, + ExecutionModeDepthGreater = 14, + ExecutionModeDepthLess = 15, + ExecutionModeDepthUnchanged = 16, + ExecutionModeLocalSize = 17, + ExecutionModeLocalSizeHint = 18, + ExecutionModeInputPoints = 19, + ExecutionModeInputLines = 20, + ExecutionModeInputLinesAdjacency = 21, + ExecutionModeTriangles = 22, + ExecutionModeInputTrianglesAdjacency = 23, + ExecutionModeQuads = 24, + ExecutionModeIsolines = 25, + ExecutionModeOutputVertices = 26, + ExecutionModeOutputPoints = 27, + ExecutionModeOutputLineStrip = 28, + ExecutionModeOutputTriangleStrip = 29, + ExecutionModeVecTypeHint = 30, + ExecutionModeContractionOff = 31, +}; + +enum StorageClass { + StorageClassUniformConstant = 0, + StorageClassInput = 1, + StorageClassUniform = 2, + StorageClassOutput = 3, + StorageClassWorkgroup = 4, + StorageClassCrossWorkgroup = 5, + StorageClassPrivate = 6, + StorageClassFunction = 7, + StorageClassGeneric = 8, + StorageClassPushConstant = 9, + StorageClassAtomicCounter = 10, + StorageClassImage = 11, +}; + +enum Dim { + Dim1D = 0, + Dim2D = 1, + Dim3D = 2, + DimCube = 3, + DimRect = 4, + DimBuffer = 5, + DimSubpassData = 6, +}; + +enum SamplerAddressingMode { + SamplerAddressingModeNone = 0, + SamplerAddressingModeClampToEdge = 1, + SamplerAddressingModeClamp = 2, + SamplerAddressingModeRepeat = 3, + SamplerAddressingModeRepeatMirrored = 4, +}; + +enum SamplerFilterMode { + SamplerFilterModeNearest = 0, + SamplerFilterModeLinear = 1, +}; + +enum ImageFormat { + ImageFormatUnknown = 0, + ImageFormatRgba32f = 1, + ImageFormatRgba16f = 2, + ImageFormatR32f = 3, + ImageFormatRgba8 = 4, + ImageFormatRgba8Snorm = 5, + ImageFormatRg32f = 6, + ImageFormatRg16f = 7, + ImageFormatR11fG11fB10f = 8, + ImageFormatR16f = 9, + ImageFormatRgba16 = 10, + ImageFormatRgb10A2 = 11, + ImageFormatRg16 = 12, + ImageFormatRg8 = 13, + ImageFormatR16 = 14, + ImageFormatR8 = 15, + ImageFormatRgba16Snorm = 16, + ImageFormatRg16Snorm = 17, + ImageFormatRg8Snorm = 18, + ImageFormatR16Snorm = 19, + ImageFormatR8Snorm = 20, + ImageFormatRgba32i = 21, + ImageFormatRgba16i = 22, + ImageFormatRgba8i = 23, + ImageFormatR32i = 24, + ImageFormatRg32i = 25, + ImageFormatRg16i = 26, + ImageFormatRg8i = 27, + ImageFormatR16i = 28, + ImageFormatR8i = 29, + ImageFormatRgba32ui = 30, + ImageFormatRgba16ui = 31, + ImageFormatRgba8ui = 32, + ImageFormatR32ui = 33, + ImageFormatRgb10a2ui = 34, + ImageFormatRg32ui = 35, + ImageFormatRg16ui = 36, + ImageFormatRg8ui = 37, + ImageFormatR16ui = 38, + ImageFormatR8ui = 39, +}; + +enum ImageChannelOrder { + ImageChannelOrderR = 0, + ImageChannelOrderA = 1, + ImageChannelOrderRG = 2, + ImageChannelOrderRA = 3, + ImageChannelOrderRGB = 4, + ImageChannelOrderRGBA = 5, + ImageChannelOrderBGRA = 6, + ImageChannelOrderARGB = 7, + ImageChannelOrderIntensity = 8, + ImageChannelOrderLuminance = 9, + ImageChannelOrderRx = 10, + ImageChannelOrderRGx = 11, + ImageChannelOrderRGBx = 12, + ImageChannelOrderDepth = 13, + ImageChannelOrderDepthStencil = 14, + ImageChannelOrdersRGB = 15, + ImageChannelOrdersRGBx = 16, + ImageChannelOrdersRGBA = 17, + ImageChannelOrdersBGRA = 18, + ImageChannelOrderABGR = 19, +}; + +enum ImageChannelDataType { + ImageChannelDataTypeSnormInt8 = 0, + ImageChannelDataTypeSnormInt16 = 1, + ImageChannelDataTypeUnormInt8 = 2, + ImageChannelDataTypeUnormInt16 = 3, + ImageChannelDataTypeUnormShort565 = 4, + ImageChannelDataTypeUnormShort555 = 5, + ImageChannelDataTypeUnormInt101010 = 6, + ImageChannelDataTypeSignedInt8 = 7, + ImageChannelDataTypeSignedInt16 = 8, + ImageChannelDataTypeSignedInt32 = 9, + ImageChannelDataTypeUnsignedInt8 = 10, + ImageChannelDataTypeUnsignedInt16 = 11, + ImageChannelDataTypeUnsignedInt32 = 12, + ImageChannelDataTypeHalfFloat = 13, + ImageChannelDataTypeFloat = 14, + ImageChannelDataTypeUnormInt24 = 15, + ImageChannelDataTypeUnormInt101010_2 = 16, +}; + +enum ImageOperandsShift { + ImageOperandsBiasShift = 0, + ImageOperandsLodShift = 1, + ImageOperandsGradShift = 2, + ImageOperandsConstOffsetShift = 3, + ImageOperandsOffsetShift = 4, + ImageOperandsConstOffsetsShift = 5, + ImageOperandsSampleShift = 6, + ImageOperandsMinLodShift = 7, +}; + +enum ImageOperandsMask { + ImageOperandsMaskNone = 0, + ImageOperandsBiasMask = 0x00000001, + ImageOperandsLodMask = 0x00000002, + ImageOperandsGradMask = 0x00000004, + ImageOperandsConstOffsetMask = 0x00000008, + ImageOperandsOffsetMask = 0x00000010, + ImageOperandsConstOffsetsMask = 0x00000020, + ImageOperandsSampleMask = 0x00000040, + ImageOperandsMinLodMask = 0x00000080, +}; + +enum FPFastMathModeShift { + FPFastMathModeNotNaNShift = 0, + FPFastMathModeNotInfShift = 1, + FPFastMathModeNSZShift = 2, + FPFastMathModeAllowRecipShift = 3, + FPFastMathModeFastShift = 4, +}; + +enum FPFastMathModeMask { + FPFastMathModeMaskNone = 0, + FPFastMathModeNotNaNMask = 0x00000001, + FPFastMathModeNotInfMask = 0x00000002, + FPFastMathModeNSZMask = 0x00000004, + FPFastMathModeAllowRecipMask = 0x00000008, + FPFastMathModeFastMask = 0x00000010, +}; + +enum FPRoundingMode { + FPRoundingModeRTE = 0, + FPRoundingModeRTZ = 1, + FPRoundingModeRTP = 2, + FPRoundingModeRTN = 3, +}; + +enum LinkageType { + LinkageTypeExport = 0, + LinkageTypeImport = 1, +}; + +enum AccessQualifier { + AccessQualifierReadOnly = 0, + AccessQualifierWriteOnly = 1, + AccessQualifierReadWrite = 2, +}; + +enum FunctionParameterAttribute { + FunctionParameterAttributeZext = 0, + FunctionParameterAttributeSext = 1, + FunctionParameterAttributeByVal = 2, + FunctionParameterAttributeSret = 3, + FunctionParameterAttributeNoAlias = 4, + FunctionParameterAttributeNoCapture = 5, + FunctionParameterAttributeNoWrite = 6, + FunctionParameterAttributeNoReadWrite = 7, +}; + +enum Decoration { + DecorationRelaxedPrecision = 0, + DecorationSpecId = 1, + DecorationBlock = 2, + DecorationBufferBlock = 3, + DecorationRowMajor = 4, + DecorationColMajor = 5, + DecorationArrayStride = 6, + DecorationMatrixStride = 7, + DecorationGLSLShared = 8, + DecorationGLSLPacked = 9, + DecorationCPacked = 10, + DecorationBuiltIn = 11, + DecorationNoPerspective = 13, + DecorationFlat = 14, + DecorationPatch = 15, + DecorationCentroid = 16, + DecorationSample = 17, + DecorationInvariant = 18, + DecorationRestrict = 19, + DecorationAliased = 20, + DecorationVolatile = 21, + DecorationConstant = 22, + DecorationCoherent = 23, + DecorationNonWritable = 24, + DecorationNonReadable = 25, + DecorationUniform = 26, + DecorationSaturatedConversion = 28, + DecorationStream = 29, + DecorationLocation = 30, + DecorationComponent = 31, + DecorationIndex = 32, + DecorationBinding = 33, + DecorationDescriptorSet = 34, + DecorationOffset = 35, + DecorationXfbBuffer = 36, + DecorationXfbStride = 37, + DecorationFuncParamAttr = 38, + DecorationFPRoundingMode = 39, + DecorationFPFastMathMode = 40, + DecorationLinkageAttributes = 41, + DecorationNoContraction = 42, + DecorationInputAttachmentIndex = 43, + DecorationAlignment = 44, +}; + +enum BuiltIn { + BuiltInPosition = 0, + BuiltInPointSize = 1, + BuiltInClipDistance = 3, + BuiltInCullDistance = 4, + BuiltInVertexId = 5, + BuiltInInstanceId = 6, + BuiltInPrimitiveId = 7, + BuiltInInvocationId = 8, + BuiltInLayer = 9, + BuiltInViewportIndex = 10, + BuiltInTessLevelOuter = 11, + BuiltInTessLevelInner = 12, + BuiltInTessCoord = 13, + BuiltInPatchVertices = 14, + BuiltInFragCoord = 15, + BuiltInPointCoord = 16, + BuiltInFrontFacing = 17, + BuiltInSampleId = 18, + BuiltInSamplePosition = 19, + BuiltInSampleMask = 20, + BuiltInFragDepth = 22, + BuiltInHelperInvocation = 23, + BuiltInNumWorkgroups = 24, + BuiltInWorkgroupSize = 25, + BuiltInWorkgroupId = 26, + BuiltInLocalInvocationId = 27, + BuiltInGlobalInvocationId = 28, + BuiltInLocalInvocationIndex = 29, + BuiltInWorkDim = 30, + BuiltInGlobalSize = 31, + BuiltInEnqueuedWorkgroupSize = 32, + BuiltInGlobalOffset = 33, + BuiltInGlobalLinearId = 34, + BuiltInSubgroupSize = 36, + BuiltInSubgroupMaxSize = 37, + BuiltInNumSubgroups = 38, + BuiltInNumEnqueuedSubgroups = 39, + BuiltInSubgroupId = 40, + BuiltInSubgroupLocalInvocationId = 41, + BuiltInVertexIndex = 42, + BuiltInInstanceIndex = 43, +}; + +enum SelectionControlShift { + SelectionControlFlattenShift = 0, + SelectionControlDontFlattenShift = 1, +}; + +enum SelectionControlMask { + SelectionControlMaskNone = 0, + SelectionControlFlattenMask = 0x00000001, + SelectionControlDontFlattenMask = 0x00000002, +}; + +enum LoopControlShift { + LoopControlUnrollShift = 0, + LoopControlDontUnrollShift = 1, +}; + +enum LoopControlMask { + LoopControlMaskNone = 0, + LoopControlUnrollMask = 0x00000001, + LoopControlDontUnrollMask = 0x00000002, +}; + +enum FunctionControlShift { + FunctionControlInlineShift = 0, + FunctionControlDontInlineShift = 1, + FunctionControlPureShift = 2, + FunctionControlConstShift = 3, +}; + +enum FunctionControlMask { + FunctionControlMaskNone = 0, + FunctionControlInlineMask = 0x00000001, + FunctionControlDontInlineMask = 0x00000002, + FunctionControlPureMask = 0x00000004, + FunctionControlConstMask = 0x00000008, +}; + +enum MemorySemanticsShift { + MemorySemanticsAcquireShift = 1, + MemorySemanticsReleaseShift = 2, + MemorySemanticsAcquireReleaseShift = 3, + MemorySemanticsSequentiallyConsistentShift = 4, + MemorySemanticsUniformMemoryShift = 6, + MemorySemanticsSubgroupMemoryShift = 7, + MemorySemanticsWorkgroupMemoryShift = 8, + MemorySemanticsCrossWorkgroupMemoryShift = 9, + MemorySemanticsAtomicCounterMemoryShift = 10, + MemorySemanticsImageMemoryShift = 11, +}; + +enum MemorySemanticsMask { + MemorySemanticsMaskNone = 0, + MemorySemanticsAcquireMask = 0x00000002, + MemorySemanticsReleaseMask = 0x00000004, + MemorySemanticsAcquireReleaseMask = 0x00000008, + MemorySemanticsSequentiallyConsistentMask = 0x00000010, + MemorySemanticsUniformMemoryMask = 0x00000040, + MemorySemanticsSubgroupMemoryMask = 0x00000080, + MemorySemanticsWorkgroupMemoryMask = 0x00000100, + MemorySemanticsCrossWorkgroupMemoryMask = 0x00000200, + MemorySemanticsAtomicCounterMemoryMask = 0x00000400, + MemorySemanticsImageMemoryMask = 0x00000800, +}; + +enum MemoryAccessShift { + MemoryAccessVolatileShift = 0, + MemoryAccessAlignedShift = 1, + MemoryAccessNontemporalShift = 2, +}; + +enum MemoryAccessMask { + MemoryAccessMaskNone = 0, + MemoryAccessVolatileMask = 0x00000001, + MemoryAccessAlignedMask = 0x00000002, + MemoryAccessNontemporalMask = 0x00000004, +}; + +enum Scope { + ScopeCrossDevice = 0, + ScopeDevice = 1, + ScopeWorkgroup = 2, + ScopeSubgroup = 3, + ScopeInvocation = 4, +}; + +enum GroupOperation { + GroupOperationReduce = 0, + GroupOperationInclusiveScan = 1, + GroupOperationExclusiveScan = 2, +}; + +enum KernelEnqueueFlags { + KernelEnqueueFlagsNoWait = 0, + KernelEnqueueFlagsWaitKernel = 1, + KernelEnqueueFlagsWaitWorkGroup = 2, +}; + +enum KernelProfilingInfoShift { + KernelProfilingInfoCmdExecTimeShift = 0, +}; + +enum KernelProfilingInfoMask { + KernelProfilingInfoMaskNone = 0, + KernelProfilingInfoCmdExecTimeMask = 0x00000001, +}; + +enum Capability { + CapabilityMatrix = 0, + CapabilityShader = 1, + CapabilityGeometry = 2, + CapabilityTessellation = 3, + CapabilityAddresses = 4, + CapabilityLinkage = 5, + CapabilityKernel = 6, + CapabilityVector16 = 7, + CapabilityFloat16Buffer = 8, + CapabilityFloat16 = 9, + CapabilityFloat64 = 10, + CapabilityInt64 = 11, + CapabilityInt64Atomics = 12, + CapabilityImageBasic = 13, + CapabilityImageReadWrite = 14, + CapabilityImageMipmap = 15, + CapabilityPipes = 17, + CapabilityGroups = 18, + CapabilityDeviceEnqueue = 19, + CapabilityLiteralSampler = 20, + CapabilityAtomicStorage = 21, + CapabilityInt16 = 22, + CapabilityTessellationPointSize = 23, + CapabilityGeometryPointSize = 24, + CapabilityImageGatherExtended = 25, + CapabilityStorageImageMultisample = 27, + CapabilityUniformBufferArrayDynamicIndexing = 28, + CapabilitySampledImageArrayDynamicIndexing = 29, + CapabilityStorageBufferArrayDynamicIndexing = 30, + CapabilityStorageImageArrayDynamicIndexing = 31, + CapabilityClipDistance = 32, + CapabilityCullDistance = 33, + CapabilityImageCubeArray = 34, + CapabilitySampleRateShading = 35, + CapabilityImageRect = 36, + CapabilitySampledRect = 37, + CapabilityGenericPointer = 38, + CapabilityInt8 = 39, + CapabilityInputAttachment = 40, + CapabilitySparseResidency = 41, + CapabilityMinLod = 42, + CapabilitySampled1D = 43, + CapabilityImage1D = 44, + CapabilitySampledCubeArray = 45, + CapabilitySampledBuffer = 46, + CapabilityImageBuffer = 47, + CapabilityImageMSArray = 48, + CapabilityStorageImageExtendedFormats = 49, + CapabilityImageQuery = 50, + CapabilityDerivativeControl = 51, + CapabilityInterpolationFunction = 52, + CapabilityTransformFeedback = 53, + CapabilityGeometryStreams = 54, + CapabilityStorageImageReadWithoutFormat = 55, + CapabilityStorageImageWriteWithoutFormat = 56, + CapabilityMultiViewport = 57, +}; + +enum Op { + OpNop = 0, + OpUndef = 1, + OpSourceContinued = 2, + OpSource = 3, + OpSourceExtension = 4, + OpName = 5, + OpMemberName = 6, + OpString = 7, + OpLine = 8, + OpExtension = 10, + OpExtInstImport = 11, + OpExtInst = 12, + OpMemoryModel = 14, + OpEntryPoint = 15, + OpExecutionMode = 16, + OpCapability = 17, + OpTypeVoid = 19, + OpTypeBool = 20, + OpTypeInt = 21, + OpTypeFloat = 22, + OpTypeVector = 23, + OpTypeMatrix = 24, + OpTypeImage = 25, + OpTypeSampler = 26, + OpTypeSampledImage = 27, + OpTypeArray = 28, + OpTypeRuntimeArray = 29, + OpTypeStruct = 30, + OpTypeOpaque = 31, + OpTypePointer = 32, + OpTypeFunction = 33, + OpTypeEvent = 34, + OpTypeDeviceEvent = 35, + OpTypeReserveId = 36, + OpTypeQueue = 37, + OpTypePipe = 38, + OpTypeForwardPointer = 39, + OpConstantTrue = 41, + OpConstantFalse = 42, + OpConstant = 43, + OpConstantComposite = 44, + OpConstantSampler = 45, + OpConstantNull = 46, + OpSpecConstantTrue = 48, + OpSpecConstantFalse = 49, + OpSpecConstant = 50, + OpSpecConstantComposite = 51, + OpSpecConstantOp = 52, + OpFunction = 54, + OpFunctionParameter = 55, + OpFunctionEnd = 56, + OpFunctionCall = 57, + OpVariable = 59, + OpImageTexelPointer = 60, + OpLoad = 61, + OpStore = 62, + OpCopyMemory = 63, + OpCopyMemorySized = 64, + OpAccessChain = 65, + OpInBoundsAccessChain = 66, + OpPtrAccessChain = 67, + OpArrayLength = 68, + OpGenericPtrMemSemantics = 69, + OpInBoundsPtrAccessChain = 70, + OpDecorate = 71, + OpMemberDecorate = 72, + OpDecorationGroup = 73, + OpGroupDecorate = 74, + OpGroupMemberDecorate = 75, + OpVectorExtractDynamic = 77, + OpVectorInsertDynamic = 78, + OpVectorShuffle = 79, + OpCompositeConstruct = 80, + OpCompositeExtract = 81, + OpCompositeInsert = 82, + OpCopyObject = 83, + OpTranspose = 84, + OpSampledImage = 86, + OpImageSampleImplicitLod = 87, + OpImageSampleExplicitLod = 88, + OpImageSampleDrefImplicitLod = 89, + OpImageSampleDrefExplicitLod = 90, + OpImageSampleProjImplicitLod = 91, + OpImageSampleProjExplicitLod = 92, + OpImageSampleProjDrefImplicitLod = 93, + OpImageSampleProjDrefExplicitLod = 94, + OpImageFetch = 95, + OpImageGather = 96, + OpImageDrefGather = 97, + OpImageRead = 98, + OpImageWrite = 99, + OpImage = 100, + OpImageQueryFormat = 101, + OpImageQueryOrder = 102, + OpImageQuerySizeLod = 103, + OpImageQuerySize = 104, + OpImageQueryLod = 105, + OpImageQueryLevels = 106, + OpImageQuerySamples = 107, + OpConvertFToU = 109, + OpConvertFToS = 110, + OpConvertSToF = 111, + OpConvertUToF = 112, + OpUConvert = 113, + OpSConvert = 114, + OpFConvert = 115, + OpQuantizeToF16 = 116, + OpConvertPtrToU = 117, + OpSatConvertSToU = 118, + OpSatConvertUToS = 119, + OpConvertUToPtr = 120, + OpPtrCastToGeneric = 121, + OpGenericCastToPtr = 122, + OpGenericCastToPtrExplicit = 123, + OpBitcast = 124, + OpSNegate = 126, + OpFNegate = 127, + OpIAdd = 128, + OpFAdd = 129, + OpISub = 130, + OpFSub = 131, + OpIMul = 132, + OpFMul = 133, + OpUDiv = 134, + OpSDiv = 135, + OpFDiv = 136, + OpUMod = 137, + OpSRem = 138, + OpSMod = 139, + OpFRem = 140, + OpFMod = 141, + OpVectorTimesScalar = 142, + OpMatrixTimesScalar = 143, + OpVectorTimesMatrix = 144, + OpMatrixTimesVector = 145, + OpMatrixTimesMatrix = 146, + OpOuterProduct = 147, + OpDot = 148, + OpIAddCarry = 149, + OpISubBorrow = 150, + OpUMulExtended = 151, + OpSMulExtended = 152, + OpAny = 154, + OpAll = 155, + OpIsNan = 156, + OpIsInf = 157, + OpIsFinite = 158, + OpIsNormal = 159, + OpSignBitSet = 160, + OpLessOrGreater = 161, + OpOrdered = 162, + OpUnordered = 163, + OpLogicalEqual = 164, + OpLogicalNotEqual = 165, + OpLogicalOr = 166, + OpLogicalAnd = 167, + OpLogicalNot = 168, + OpSelect = 169, + OpIEqual = 170, + OpINotEqual = 171, + OpUGreaterThan = 172, + OpSGreaterThan = 173, + OpUGreaterThanEqual = 174, + OpSGreaterThanEqual = 175, + OpULessThan = 176, + OpSLessThan = 177, + OpULessThanEqual = 178, + OpSLessThanEqual = 179, + OpFOrdEqual = 180, + OpFUnordEqual = 181, + OpFOrdNotEqual = 182, + OpFUnordNotEqual = 183, + OpFOrdLessThan = 184, + OpFUnordLessThan = 185, + OpFOrdGreaterThan = 186, + OpFUnordGreaterThan = 187, + OpFOrdLessThanEqual = 188, + OpFUnordLessThanEqual = 189, + OpFOrdGreaterThanEqual = 190, + OpFUnordGreaterThanEqual = 191, + OpShiftRightLogical = 194, + OpShiftRightArithmetic = 195, + OpShiftLeftLogical = 196, + OpBitwiseOr = 197, + OpBitwiseXor = 198, + OpBitwiseAnd = 199, + OpNot = 200, + OpBitFieldInsert = 201, + OpBitFieldSExtract = 202, + OpBitFieldUExtract = 203, + OpBitReverse = 204, + OpBitCount = 205, + OpDPdx = 207, + OpDPdy = 208, + OpFwidth = 209, + OpDPdxFine = 210, + OpDPdyFine = 211, + OpFwidthFine = 212, + OpDPdxCoarse = 213, + OpDPdyCoarse = 214, + OpFwidthCoarse = 215, + OpEmitVertex = 218, + OpEndPrimitive = 219, + OpEmitStreamVertex = 220, + OpEndStreamPrimitive = 221, + OpControlBarrier = 224, + OpMemoryBarrier = 225, + OpAtomicLoad = 227, + OpAtomicStore = 228, + OpAtomicExchange = 229, + OpAtomicCompareExchange = 230, + OpAtomicCompareExchangeWeak = 231, + OpAtomicIIncrement = 232, + OpAtomicIDecrement = 233, + OpAtomicIAdd = 234, + OpAtomicISub = 235, + OpAtomicSMin = 236, + OpAtomicUMin = 237, + OpAtomicSMax = 238, + OpAtomicUMax = 239, + OpAtomicAnd = 240, + OpAtomicOr = 241, + OpAtomicXor = 242, + OpPhi = 245, + OpLoopMerge = 246, + OpSelectionMerge = 247, + OpLabel = 248, + OpBranch = 249, + OpBranchConditional = 250, + OpSwitch = 251, + OpKill = 252, + OpReturn = 253, + OpReturnValue = 254, + OpUnreachable = 255, + OpLifetimeStart = 256, + OpLifetimeStop = 257, + OpGroupAsyncCopy = 259, + OpGroupWaitEvents = 260, + OpGroupAll = 261, + OpGroupAny = 262, + OpGroupBroadcast = 263, + OpGroupIAdd = 264, + OpGroupFAdd = 265, + OpGroupFMin = 266, + OpGroupUMin = 267, + OpGroupSMin = 268, + OpGroupFMax = 269, + OpGroupUMax = 270, + OpGroupSMax = 271, + OpReadPipe = 274, + OpWritePipe = 275, + OpReservedReadPipe = 276, + OpReservedWritePipe = 277, + OpReserveReadPipePackets = 278, + OpReserveWritePipePackets = 279, + OpCommitReadPipe = 280, + OpCommitWritePipe = 281, + OpIsValidReserveId = 282, + OpGetNumPipePackets = 283, + OpGetMaxPipePackets = 284, + OpGroupReserveReadPipePackets = 285, + OpGroupReserveWritePipePackets = 286, + OpGroupCommitReadPipe = 287, + OpGroupCommitWritePipe = 288, + OpEnqueueMarker = 291, + OpEnqueueKernel = 292, + OpGetKernelNDrangeSubGroupCount = 293, + OpGetKernelNDrangeMaxSubGroupSize = 294, + OpGetKernelWorkGroupSize = 295, + OpGetKernelPreferredWorkGroupSizeMultiple = 296, + OpRetainEvent = 297, + OpReleaseEvent = 298, + OpCreateUserEvent = 299, + OpIsValidEvent = 300, + OpSetUserEventStatus = 301, + OpCaptureEventProfilingInfo = 302, + OpGetDefaultQueue = 303, + OpBuildNDRange = 304, + OpImageSparseSampleImplicitLod = 305, + OpImageSparseSampleExplicitLod = 306, + OpImageSparseSampleDrefImplicitLod = 307, + OpImageSparseSampleDrefExplicitLod = 308, + OpImageSparseSampleProjImplicitLod = 309, + OpImageSparseSampleProjExplicitLod = 310, + OpImageSparseSampleProjDrefImplicitLod = 311, + OpImageSparseSampleProjDrefExplicitLod = 312, + OpImageSparseFetch = 313, + OpImageSparseGather = 314, + OpImageSparseDrefGather = 315, + OpImageSparseTexelsResident = 316, + OpNoLine = 317, + OpAtomicFlagTestAndSet = 318, + OpAtomicFlagClear = 319, + OpImageSparseRead = 320, +}; + +// Overload operator| for mask bit combining + +inline ImageOperandsMask operator|(ImageOperandsMask a, ImageOperandsMask b) { return ImageOperandsMask(unsigned(a) | unsigned(b)); } +inline FPFastMathModeMask operator|(FPFastMathModeMask a, FPFastMathModeMask b) { return FPFastMathModeMask(unsigned(a) | unsigned(b)); } +inline SelectionControlMask operator|(SelectionControlMask a, SelectionControlMask b) { return SelectionControlMask(unsigned(a) | unsigned(b)); } +inline LoopControlMask operator|(LoopControlMask a, LoopControlMask b) { return LoopControlMask(unsigned(a) | unsigned(b)); } +inline FunctionControlMask operator|(FunctionControlMask a, FunctionControlMask b) { return FunctionControlMask(unsigned(a) | unsigned(b)); } +inline MemorySemanticsMask operator|(MemorySemanticsMask a, MemorySemanticsMask b) { return MemorySemanticsMask(unsigned(a) | unsigned(b)); } +inline MemoryAccessMask operator|(MemoryAccessMask a, MemoryAccessMask b) { return MemoryAccessMask(unsigned(a) | unsigned(b)); } +inline KernelProfilingInfoMask operator|(KernelProfilingInfoMask a, KernelProfilingInfoMask b) { return KernelProfilingInfoMask(unsigned(a) | unsigned(b)); } + +} // end namespace spv + +#endif // #ifndef spirv_HPP + diff --git a/spirv_cfg.cpp b/spirv_cfg.cpp new file mode 100644 index 0000000000..db6b548289 --- /dev/null +++ b/spirv_cfg.cpp @@ -0,0 +1,229 @@ +/* + * Copyright 2016 ARM Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "spirv_cfg.hpp" +#include +#include + +using namespace std; + +namespace spirv_cross +{ +CFG::CFG(Compiler &compiler_, const SPIRFunction &func_) + : compiler(compiler_) + , func(func_) +{ + preceding_edges.resize(compiler.get_current_id_bound()); + succeeding_edges.resize(compiler.get_current_id_bound()); + visit_order.resize(compiler.get_current_id_bound()); + immediate_dominators.resize(compiler.get_current_id_bound()); + + build_post_order_visit_order(); + build_immediate_dominators(); +} + +uint32_t CFG::find_common_dominator(uint32_t a, uint32_t b) const +{ + while (a != b) + { + if (visit_order[a] < visit_order[b]) + a = immediate_dominators[a]; + else + b = immediate_dominators[b]; + } + return a; +} + +uint32_t CFG::update_common_dominator(uint32_t a, uint32_t b) +{ + auto dominator = find_common_dominator(immediate_dominators[a], immediate_dominators[b]); + immediate_dominators[a] = dominator; + immediate_dominators[b] = dominator; + return dominator; +} + +void CFG::build_immediate_dominators() +{ + // Traverse the post-order in reverse and build up the immediate dominator tree. + fill(begin(immediate_dominators), end(immediate_dominators), 0); + immediate_dominators[func.entry_block] = func.entry_block; + + for (auto i = post_order.size(); i; i--) + { + uint32_t block = post_order[i - 1]; + auto &pred = preceding_edges[block]; + if (pred.empty()) // This is for the entry block, but we've already set up the dominators. + continue; + + for (auto &edge : pred) + { + if (immediate_dominators[block]) + { + assert(immediate_dominators[edge]); + immediate_dominators[block] = update_common_dominator(block, edge); + } + else + immediate_dominators[block] = edge; + } + } +} + +bool CFG::is_back_edge(uint32_t to) const +{ + // We have a back edge if the visit order is set with the temporary magic value 0. + // Crossing edges will have already been recorded with a visit order. + return visit_order[to] == 0; +} + +bool CFG::post_order_visit(uint32_t block_id) +{ + // If we have already branched to this block (back edge), stop recursion. + // If our branches are back-edges, we do not record them. + // We have to record crossing edges however. + if (visit_order[block_id] >= 0) + return !is_back_edge(block_id); + + // Block back-edges from recursively revisiting ourselves. + visit_order[block_id] = 0; + + // First visit our branch targets. + auto &block = compiler.get(block_id); + switch (block.terminator) + { + case SPIRBlock::Direct: + if (post_order_visit(block.next_block)) + add_branch(block_id, block.next_block); + break; + + case SPIRBlock::Select: + if (post_order_visit(block.true_block)) + add_branch(block_id, block.true_block); + if (post_order_visit(block.false_block)) + add_branch(block_id, block.false_block); + break; + + case SPIRBlock::MultiSelect: + for (auto &target : block.cases) + { + if (post_order_visit(target.block)) + add_branch(block_id, target.block); + } + if (block.default_block && post_order_visit(block.default_block)) + add_branch(block_id, block.default_block); + break; + + default: + break; + } + + // Then visit ourselves. Start counting at one, to let 0 be a magic value for testing back vs. crossing edges. + visit_order[block_id] = ++visit_count; + post_order.push_back(block_id); + return true; +} + +void CFG::build_post_order_visit_order() +{ + uint32_t block = func.entry_block; + visit_count = 0; + fill(begin(visit_order), end(visit_order), -1); + post_order.clear(); + post_order_visit(block); +} + +void CFG::add_branch(uint32_t from, uint32_t to) +{ + const auto add_unique = [](vector &l, uint32_t value) { + auto itr = find(begin(l), end(l), value); + if (itr == end(l)) + l.push_back(value); + }; + add_unique(preceding_edges[to], from); + add_unique(succeeding_edges[from], to); +} + +DominatorBuilder::DominatorBuilder(const CFG &cfg_) + : cfg(cfg_) +{ +} + +void DominatorBuilder::add_block(uint32_t block) +{ + if (!cfg.get_immediate_dominator(block)) + { + // Unreachable block via the CFG, we will never emit this code anyways. + return; + } + + if (!dominator) + { + dominator = block; + return; + } + + if (block != dominator) + dominator = cfg.find_common_dominator(block, dominator); +} + +void DominatorBuilder::lift_continue_block_dominator() +{ + // It is possible for a continue block to be the dominator if a variable is only accessed inside the while block of a do-while loop. + // We cannot safely declare variables inside a continue block, so move any variable declared + // in a continue block to the entry block to simplify. + // It makes very little sense for a continue block to ever be a dominator, so fall back to the simplest + // solution. + + if (!dominator) + return; + + auto &block = cfg.get_compiler().get(dominator); + auto post_order = cfg.get_visit_order(dominator); + + // If we are branching to a block with a higher post-order traversal index (continue blocks), we have a problem + // since we cannot create sensible GLSL code for this, fallback to entry block. + bool back_edge_dominator = false; + switch (block.terminator) + { + case SPIRBlock::Direct: + if (cfg.get_visit_order(block.next_block) > post_order) + back_edge_dominator = true; + break; + + case SPIRBlock::Select: + if (cfg.get_visit_order(block.true_block) > post_order) + back_edge_dominator = true; + if (cfg.get_visit_order(block.false_block) > post_order) + back_edge_dominator = true; + break; + + case SPIRBlock::MultiSelect: + for (auto &target : block.cases) + { + if (cfg.get_visit_order(target.block) > post_order) + back_edge_dominator = true; + } + if (block.default_block && cfg.get_visit_order(block.default_block) > post_order) + back_edge_dominator = true; + break; + + default: + break; + } + + if (back_edge_dominator) + dominator = cfg.get_function().entry_block; +} +} diff --git a/spirv_cfg.hpp b/spirv_cfg.hpp new file mode 100644 index 0000000000..e6450a7123 --- /dev/null +++ b/spirv_cfg.hpp @@ -0,0 +1,115 @@ +/* + * Copyright 2016 ARM Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SPIRV_CROSS_CFG_HPP +#define SPIRV_CROSS_CFG_HPP + +#include "spirv_cross.hpp" +#include + +namespace spirv_cross +{ +class CFG +{ +public: + CFG(Compiler &compiler, const SPIRFunction &function); + + Compiler &get_compiler() + { + return compiler; + } + + const Compiler &get_compiler() const + { + return compiler; + } + + const SPIRFunction &get_function() const + { + return func; + } + + uint32_t get_immediate_dominator(uint32_t block) const + { + return immediate_dominators[block]; + } + + uint32_t get_visit_order(uint32_t block) const + { + int v = visit_order[block]; + assert(v > 0); + return uint32_t(v); + } + + uint32_t find_common_dominator(uint32_t a, uint32_t b) const; + + const std::vector &get_preceding_edges(uint32_t block) const + { + return preceding_edges[block]; + } + + const std::vector &get_succeeding_edges(uint32_t block) const + { + return succeeding_edges[block]; + } + + template + void walk_from(uint32_t block, const Op &op) const + { + op(block); + for (auto b : succeeding_edges[block]) + walk_from(b, op); + } + +private: + Compiler &compiler; + const SPIRFunction &func; + std::vector> preceding_edges; + std::vector> succeeding_edges; + std::vector immediate_dominators; + std::vector visit_order; + std::vector post_order; + + void add_branch(uint32_t from, uint32_t to); + void build_post_order_visit_order(); + void build_immediate_dominators(); + bool post_order_visit(uint32_t block); + uint32_t visit_count = 0; + + uint32_t update_common_dominator(uint32_t a, uint32_t b); + bool is_back_edge(uint32_t to) const; +}; + +class DominatorBuilder +{ +public: + DominatorBuilder(const CFG &cfg); + + void add_block(uint32_t block); + uint32_t get_dominator() const + { + return dominator; + } + + void lift_continue_block_dominator(); + +private: + const CFG &cfg; + uint32_t dominator = 0; +}; +} + +#endif diff --git a/spirv_common.hpp b/spirv_common.hpp new file mode 100644 index 0000000000..84257d5a21 --- /dev/null +++ b/spirv_common.hpp @@ -0,0 +1,911 @@ +/* + * Copyright 2015-2016 ARM Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SPIRV_CROSS_COMMON_HPP +#define SPIRV_CROSS_COMMON_HPP + +#include +#include +#include +#include + +namespace spirv_cross +{ + +#ifdef SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS +#ifndef _MSC_VER +[[noreturn]] +#endif + inline void + report_and_abort(const std::string &msg) +{ +#ifdef NDEBUG + (void)msg; +#else + fprintf(stderr, "There was a compiler error: %s\n", msg.c_str()); +#endif + abort(); +} + +#define SPIRV_CROSS_THROW(x) report_and_abort(x) +#else +class CompilerError : public std::runtime_error +{ +public: + CompilerError(const std::string &str) + : std::runtime_error(str) + { + } +}; + +#define SPIRV_CROSS_THROW(x) throw CompilerError(x) +#endif + +namespace inner +{ +template +void join_helper(std::ostringstream &stream, T &&t) +{ + stream << std::forward(t); +} + +template +void join_helper(std::ostringstream &stream, T &&t, Ts &&... ts) +{ + stream << std::forward(t); + join_helper(stream, std::forward(ts)...); +} +} + +// Helper template to avoid lots of nasty string temporary munging. +template +std::string join(Ts &&... ts) +{ + std::ostringstream stream; + inner::join_helper(stream, std::forward(ts)...); + return stream.str(); +} + +inline std::string merge(const std::vector &list) +{ + std::string s; + for (auto &elem : list) + { + s += elem; + if (&elem != &list.back()) + s += ", "; + } + return s; +} + +template +inline std::string convert_to_string(T &&t) +{ + return std::to_string(std::forward(t)); +} + +// Allow implementations to set a convenient standard precision +#ifndef SPIRV_CROSS_FLT_FMT +#define SPIRV_CROSS_FLT_FMT "%.32g" +#endif + +inline std::string convert_to_string(float t) +{ + // std::to_string for floating point values is broken. + // Fallback to something more sane. + char buf[64]; + sprintf(buf, SPIRV_CROSS_FLT_FMT, t); + // Ensure that the literal is float. + if (!strchr(buf, '.') && !strchr(buf, 'e')) + strcat(buf, ".0"); + return buf; +} + +inline std::string convert_to_string(double t) +{ + // std::to_string for floating point values is broken. + // Fallback to something more sane. + char buf[64]; + sprintf(buf, SPIRV_CROSS_FLT_FMT, t); + // Ensure that the literal is float. + if (!strchr(buf, '.') && !strchr(buf, 'e')) + strcat(buf, ".0"); + return buf; +} + +struct Instruction +{ + Instruction(const std::vector &spirv, uint32_t &index); + + uint16_t op; + uint16_t count; + uint32_t offset; + uint32_t length; +}; + +// Helper for Variant interface. +struct IVariant +{ + virtual ~IVariant() = default; + uint32_t self = 0; +}; + +enum Types +{ + TypeNone, + TypeType, + TypeVariable, + TypeConstant, + TypeFunction, + TypeFunctionPrototype, + TypePointer, + TypeBlock, + TypeExtension, + TypeExpression, + TypeConstantOp, + TypeUndef +}; + +struct SPIRUndef : IVariant +{ + enum + { + type = TypeUndef + }; + SPIRUndef(uint32_t basetype_) + : basetype(basetype_) + { + } + uint32_t basetype; +}; + +struct SPIRConstantOp : IVariant +{ + enum + { + type = TypeConstantOp + }; + + SPIRConstantOp(uint32_t result_type, spv::Op op, const uint32_t *args, uint32_t length) + : opcode(op) + , arguments(args, args + length) + , basetype(result_type) + { + } + + spv::Op opcode; + std::vector arguments; + uint32_t basetype; +}; + +struct SPIRType : IVariant +{ + enum + { + type = TypeType + }; + + enum BaseType + { + Unknown, + Void, + Boolean, + Char, + Int, + UInt, + Int64, + UInt64, + AtomicCounter, + Float, + Double, + Struct, + Image, + SampledImage, + Sampler + }; + + // Scalar/vector/matrix support. + BaseType basetype = Unknown; + uint32_t width = 0; + uint32_t vecsize = 1; + uint32_t columns = 1; + + // Arrays, support array of arrays by having a vector of array sizes. + std::vector array; + + // Array elements can be either specialization constants or specialization ops. + // This array determines how to interpret the array size. + // If an element is true, the element is a literal, + // otherwise, it's an expression, which must be resolved on demand. + // The actual size is not really known until runtime. + std::vector array_size_literal; + + // Pointers + bool pointer = false; + spv::StorageClass storage = spv::StorageClassGeneric; + + std::vector member_types; + + struct Image + { + uint32_t type; + spv::Dim dim; + bool depth; + bool arrayed; + bool ms; + uint32_t sampled; + spv::ImageFormat format; + } image; + + // Structs can be declared multiple times if they are used as part of interface blocks. + // We want to detect this so that we only emit the struct definition once. + // Since we cannot rely on OpName to be equal, we need to figure out aliases. + uint32_t type_alias = 0; + + // Used in backends to avoid emitting members with conflicting names. + std::unordered_set member_name_cache; +}; + +struct SPIRExtension : IVariant +{ + enum + { + type = TypeExtension + }; + + enum Extension + { + GLSL + }; + + SPIRExtension(Extension ext_) + : ext(ext_) + { + } + + Extension ext; +}; + +// SPIREntryPoint is not a variant since its IDs are used to decorate OpFunction, +// so in order to avoid conflicts, we can't stick them in the ids array. +struct SPIREntryPoint +{ + SPIREntryPoint(uint32_t self_, spv::ExecutionModel execution_model, std::string entry_name) + : self(self_) + , name(std::move(entry_name)) + , model(execution_model) + { + } + SPIREntryPoint() = default; + + uint32_t self = 0; + std::string name; + std::vector interface_variables; + + uint64_t flags = 0; + struct + { + uint32_t x = 0, y = 0, z = 0; + } workgroup_size; + uint32_t invocations = 0; + uint32_t output_vertices = 0; + spv::ExecutionModel model; +}; + +struct SPIRExpression : IVariant +{ + enum + { + type = TypeExpression + }; + + // Only created by the backend target to avoid creating tons of temporaries. + SPIRExpression(std::string expr, uint32_t expression_type_, bool immutable_) + : expression(move(expr)) + , expression_type(expression_type_) + , immutable(immutable_) + { + } + + // If non-zero, prepend expression with to_expression(base_expression). + // Used in amortizing multiple calls to to_expression() + // where in certain cases that would quickly force a temporary when not needed. + uint32_t base_expression = 0; + + std::string expression; + uint32_t expression_type = 0; + + // If this expression is a forwarded load, + // allow us to reference the original variable. + uint32_t loaded_from = 0; + + // If this expression will never change, we can avoid lots of temporaries + // in high level source. + // An expression being immutable can be speculative, + // it is assumed that this is true almost always. + bool immutable = false; + + // If this expression has been used while invalidated. + bool used_while_invalidated = false; + + // A list of expressions which this expression depends on. + std::vector expression_dependencies; +}; + +struct SPIRFunctionPrototype : IVariant +{ + enum + { + type = TypeFunctionPrototype + }; + + SPIRFunctionPrototype(uint32_t return_type_) + : return_type(return_type_) + { + } + + uint32_t return_type; + std::vector parameter_types; +}; + +struct SPIRBlock : IVariant +{ + enum + { + type = TypeBlock + }; + + enum Terminator + { + Unknown, + Direct, // Emit next block directly without a particular condition. + + Select, // Block ends with an if/else block. + MultiSelect, // Block ends with switch statement. + + Return, // Block ends with return. + Unreachable, // Noop + Kill // Discard + }; + + enum Merge + { + MergeNone, + MergeLoop, + MergeSelection + }; + + enum Method + { + MergeToSelectForLoop, + MergeToDirectForLoop + }; + + enum ContinueBlockType + { + ContinueNone, + + // Continue block is branchless and has at least one instruction. + ForLoop, + + // Noop continue block. + WhileLoop, + + // Continue block is conditional. + DoWhileLoop, + + // Highly unlikely that anything will use this, + // since it is really awkward/impossible to express in GLSL. + ComplexLoop + }; + + enum + { + NoDominator = 0xffffffffu + }; + + Terminator terminator = Unknown; + Merge merge = MergeNone; + uint32_t next_block = 0; + uint32_t merge_block = 0; + uint32_t continue_block = 0; + + uint32_t return_value = 0; // If 0, return nothing (void). + uint32_t condition = 0; + uint32_t true_block = 0; + uint32_t false_block = 0; + uint32_t default_block = 0; + + std::vector ops; + + struct Phi + { + uint32_t local_variable; // flush local variable ... + uint32_t parent; // If we're in from_block and want to branch into this block ... + uint32_t function_variable; // to this function-global "phi" variable first. + }; + + // Before entering this block flush out local variables to magical "phi" variables. + std::vector phi_variables; + + // Declare these temporaries before beginning the block. + // Used for handling complex continue blocks which have side effects. + std::vector> declare_temporary; + + struct Case + { + uint32_t value; + uint32_t block; + }; + std::vector cases; + + // If we have tried to optimize code for this block but failed, + // keep track of this. + bool disable_block_optimization = false; + + // If the continue block is complex, fallback to "dumb" for loops. + bool complex_continue = false; + + // The dominating block which this block might be within. + // Used in continue; blocks to determine if we really need to write continue. + uint32_t loop_dominator = 0; + + // All access to these variables are dominated by this block, + // so before branching anywhere we need to make sure that we declare these variables. + std::vector dominated_variables; + + // These are variables which should be declared in a for loop header, if we + // fail to use a classic for-loop, + // we remove these variables, and fall back to regular variables outside the loop. + std::vector loop_variables; +}; + +struct SPIRFunction : IVariant +{ + enum + { + type = TypeFunction + }; + + SPIRFunction(uint32_t return_type_, uint32_t function_type_) + : return_type(return_type_) + , function_type(function_type_) + { + } + + struct Parameter + { + uint32_t type; + uint32_t id; + uint32_t read_count; + uint32_t write_count; + }; + + // When calling a function, and we're remapping separate image samplers, + // resolve these arguments into combined image samplers and pass them + // as additional arguments in this order. + // It gets more complicated as functions can pull in their own globals + // and combine them with parameters, + // so we need to distinguish if something is local parameter index + // or a global ID. + struct CombinedImageSamplerParameter + { + uint32_t id; + uint32_t image_id; + uint32_t sampler_id; + bool global_image; + bool global_sampler; + }; + + uint32_t return_type; + uint32_t function_type; + std::vector arguments; + + // Can be used by backends to add magic arguments. + // Currently used by combined image/sampler implementation. + + std::vector shadow_arguments; + std::vector local_variables; + uint32_t entry_block = 0; + std::vector blocks; + std::vector combined_parameters; + + void add_local_variable(uint32_t id) + { + local_variables.push_back(id); + } + + void add_parameter(uint32_t parameter_type, uint32_t id) + { + // Arguments are read-only until proven otherwise. + arguments.push_back({ parameter_type, id, 0u, 0u }); + } + + bool active = false; + bool flush_undeclared = true; + bool do_combined_parameters = true; + bool analyzed_variable_scope = false; +}; + +struct SPIRVariable : IVariant +{ + enum + { + type = TypeVariable + }; + + SPIRVariable() = default; + SPIRVariable(uint32_t basetype_, spv::StorageClass storage_, uint32_t initializer_ = 0) + : basetype(basetype_) + , storage(storage_) + , initializer(initializer_) + { + } + + uint32_t basetype = 0; + spv::StorageClass storage = spv::StorageClassGeneric; + uint32_t decoration = 0; + uint32_t initializer = 0; + + std::vector dereference_chain; + bool compat_builtin = false; + + // If a variable is shadowed, we only statically assign to it + // and never actually emit a statement for it. + // When we read the variable as an expression, just forward + // shadowed_id as the expression. + bool statically_assigned = false; + uint32_t static_expression = 0; + + // Temporaries which can remain forwarded as long as this variable is not modified. + std::vector dependees; + bool forwardable = true; + + bool deferred_declaration = false; + bool phi_variable = false; + bool remapped_variable = false; + uint32_t remapped_components = 0; + + // The block which dominates all access to this variable. + uint32_t dominator = 0; + // If true, this variable is a loop variable, when accessing the variable + // outside a loop, + // we should statically forward it. + bool loop_variable = false; + // Set to true while we're inside the for loop. + bool loop_variable_enable = false; + + SPIRFunction::Parameter *parameter = nullptr; +}; + +struct SPIRConstant : IVariant +{ + enum + { + type = TypeConstant + }; + + union Constant { + uint32_t u32; + int32_t i32; + float f32; + + uint64_t u64; + int64_t i64; + double f64; + }; + + struct ConstantVector + { + Constant r[4]; + uint32_t vecsize; + }; + + struct ConstantMatrix + { + ConstantVector c[4]; + uint32_t columns; + }; + + inline uint32_t scalar(uint32_t col = 0, uint32_t row = 0) const + { + return m.c[col].r[row].u32; + } + + inline float scalar_f32(uint32_t col = 0, uint32_t row = 0) const + { + return m.c[col].r[row].f32; + } + + inline int32_t scalar_i32(uint32_t col = 0, uint32_t row = 0) const + { + return m.c[col].r[row].i32; + } + + inline double scalar_f64(uint32_t col = 0, uint32_t row = 0) const + { + return m.c[col].r[row].f64; + } + + inline int64_t scalar_i64(uint32_t col = 0, uint32_t row = 0) const + { + return m.c[col].r[row].i64; + } + + inline uint64_t scalar_u64(uint32_t col = 0, uint32_t row = 0) const + { + return m.c[col].r[row].u64; + } + + inline const ConstantVector &vector() const + { + return m.c[0]; + } + inline uint32_t vector_size() const + { + return m.c[0].vecsize; + } + inline uint32_t columns() const + { + return m.columns; + } + + SPIRConstant(uint32_t constant_type_, const uint32_t *elements, uint32_t num_elements) + : constant_type(constant_type_) + { + subconstants.insert(end(subconstants), elements, elements + num_elements); + } + + SPIRConstant(uint32_t constant_type_, uint32_t v0) + : constant_type(constant_type_) + { + m.c[0].r[0].u32 = v0; + m.c[0].vecsize = 1; + m.columns = 1; + } + + SPIRConstant(uint32_t constant_type_, uint32_t v0, uint32_t v1) + : constant_type(constant_type_) + { + m.c[0].r[0].u32 = v0; + m.c[0].r[1].u32 = v1; + m.c[0].vecsize = 2; + m.columns = 1; + } + + SPIRConstant(uint32_t constant_type_, uint32_t v0, uint32_t v1, uint32_t v2) + : constant_type(constant_type_) + { + m.c[0].r[0].u32 = v0; + m.c[0].r[1].u32 = v1; + m.c[0].r[2].u32 = v2; + m.c[0].vecsize = 3; + m.columns = 1; + } + + SPIRConstant(uint32_t constant_type_, uint32_t v0, uint32_t v1, uint32_t v2, uint32_t v3) + : constant_type(constant_type_) + { + m.c[0].r[0].u32 = v0; + m.c[0].r[1].u32 = v1; + m.c[0].r[2].u32 = v2; + m.c[0].r[3].u32 = v3; + m.c[0].vecsize = 4; + m.columns = 1; + } + + SPIRConstant(uint32_t constant_type_, uint64_t v0) + : constant_type(constant_type_) + { + m.c[0].r[0].u64 = v0; + m.c[0].vecsize = 1; + m.columns = 1; + } + + SPIRConstant(uint32_t constant_type_, uint64_t v0, uint64_t v1) + : constant_type(constant_type_) + { + m.c[0].r[0].u64 = v0; + m.c[0].r[1].u64 = v1; + m.c[0].vecsize = 2; + m.columns = 1; + } + + SPIRConstant(uint32_t constant_type_, uint64_t v0, uint64_t v1, uint64_t v2) + : constant_type(constant_type_) + { + m.c[0].r[0].u64 = v0; + m.c[0].r[1].u64 = v1; + m.c[0].r[2].u64 = v2; + m.c[0].vecsize = 3; + m.columns = 1; + } + + SPIRConstant(uint32_t constant_type_, uint64_t v0, uint64_t v1, uint64_t v2, uint64_t v3) + : constant_type(constant_type_) + { + m.c[0].r[0].u64 = v0; + m.c[0].r[1].u64 = v1; + m.c[0].r[2].u64 = v2; + m.c[0].r[3].u64 = v3; + m.c[0].vecsize = 4; + m.columns = 1; + } + + SPIRConstant(uint32_t constant_type_, const ConstantVector &vec0) + : constant_type(constant_type_) + { + m.columns = 1; + m.c[0] = vec0; + } + + SPIRConstant(uint32_t constant_type_, const ConstantVector &vec0, const ConstantVector &vec1) + : constant_type(constant_type_) + { + m.columns = 2; + m.c[0] = vec0; + m.c[1] = vec1; + } + + SPIRConstant(uint32_t constant_type_, const ConstantVector &vec0, const ConstantVector &vec1, + const ConstantVector &vec2) + : constant_type(constant_type_) + { + m.columns = 3; + m.c[0] = vec0; + m.c[1] = vec1; + m.c[2] = vec2; + } + + SPIRConstant(uint32_t constant_type_, const ConstantVector &vec0, const ConstantVector &vec1, + const ConstantVector &vec2, const ConstantVector &vec3) + : constant_type(constant_type_) + { + m.columns = 4; + m.c[0] = vec0; + m.c[1] = vec1; + m.c[2] = vec2; + m.c[3] = vec3; + } + + uint32_t constant_type; + ConstantMatrix m; + bool specialization = false; // If the constant is a specialization constant. + + // For composites which are constant arrays, etc. + std::vector subconstants; +}; + +class Variant +{ +public: + // MSVC 2013 workaround, we shouldn't need these constructors. + Variant() = default; + Variant(Variant &&other) + { + *this = std::move(other); + } + Variant &operator=(Variant &&other) + { + if (this != &other) + { + holder = move(other.holder); + type = other.type; + other.type = TypeNone; + } + return *this; + } + + void set(std::unique_ptr val, uint32_t new_type) + { + holder = std::move(val); + if (type != TypeNone && type != new_type) + SPIRV_CROSS_THROW("Overwriting a variant with new type."); + type = new_type; + } + + template + T &get() + { + if (!holder) + SPIRV_CROSS_THROW("nullptr"); + if (T::type != type) + SPIRV_CROSS_THROW("Bad cast"); + return *static_cast(holder.get()); + } + + template + const T &get() const + { + if (!holder) + SPIRV_CROSS_THROW("nullptr"); + if (T::type != type) + SPIRV_CROSS_THROW("Bad cast"); + return *static_cast(holder.get()); + } + + uint32_t get_type() const + { + return type; + } + bool empty() const + { + return !holder; + } + void reset() + { + holder.reset(); + type = TypeNone; + } + +private: + std::unique_ptr holder; + uint32_t type = TypeNone; +}; + +template +T &variant_get(Variant &var) +{ + return var.get(); +} + +template +const T &variant_get(const Variant &var) +{ + return var.get(); +} + +template +T &variant_set(Variant &var, P &&... args) +{ + auto uptr = std::unique_ptr(new T(std::forward

(args)...)); + auto ptr = uptr.get(); + var.set(std::move(uptr), T::type); + return *ptr; +} + +struct Meta +{ + struct Decoration + { + std::string alias; + std::string qualified_alias; + uint64_t decoration_flags = 0; + spv::BuiltIn builtin_type; + uint32_t location = 0; + uint32_t set = 0; + uint32_t binding = 0; + uint32_t offset = 0; + uint32_t array_stride = 0; + uint32_t input_attachment = 0; + uint32_t spec_id = 0; + bool builtin = false; + bool per_instance = false; + }; + + Decoration decoration; + std::vector members; + uint32_t sampler = 0; +}; + +// A user callback that remaps the type of any variable. +// var_name is the declared name of the variable. +// name_of_type is the textual name of the type which will be used in the code unless written to by the callback. +using VariableTypeRemapCallback = + std::function; +} + +#endif diff --git a/spirv_cpp.cpp b/spirv_cpp.cpp new file mode 100644 index 0000000000..223a954131 --- /dev/null +++ b/spirv_cpp.cpp @@ -0,0 +1,511 @@ +/* + * Copyright 2015-2016 ARM Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "spirv_cpp.hpp" + +using namespace spv; +using namespace spirv_cross; +using namespace std; + +void CompilerCPP::emit_buffer_block(const SPIRVariable &var) +{ + add_resource_name(var.self); + + auto &type = get(var.basetype); + auto instance_name = to_name(var.self); + + uint32_t descriptor_set = meta[var.self].decoration.set; + uint32_t binding = meta[var.self].decoration.binding; + + emit_block_struct(type); + auto buffer_name = to_name(type.self); + + statement("internal::Resource<", buffer_name, type_to_array_glsl(type), "> ", instance_name, "__;"); + statement_no_indent("#define ", instance_name, " __res->", instance_name, "__.get()"); + resource_registrations.push_back( + join("s.register_resource(", instance_name, "__", ", ", descriptor_set, ", ", binding, ");")); + statement(""); +} + +void CompilerCPP::emit_interface_block(const SPIRVariable &var) +{ + add_resource_name(var.self); + + auto &type = get(var.basetype); + + const char *qual = var.storage == StorageClassInput ? "StageInput" : "StageOutput"; + const char *lowerqual = var.storage == StorageClassInput ? "stage_input" : "stage_output"; + auto instance_name = to_name(var.self); + uint32_t location = meta[var.self].decoration.location; + + string buffer_name; + auto flags = meta[type.self].decoration.decoration_flags; + if (flags & (1ull << DecorationBlock)) + { + emit_block_struct(type); + buffer_name = to_name(type.self); + } + else + buffer_name = type_to_glsl(type); + + statement("internal::", qual, "<", buffer_name, type_to_array_glsl(type), "> ", instance_name, "__;"); + statement_no_indent("#define ", instance_name, " __res->", instance_name, "__.get()"); + resource_registrations.push_back(join("s.register_", lowerqual, "(", instance_name, "__", ", ", location, ");")); + statement(""); +} + +void CompilerCPP::emit_shared(const SPIRVariable &var) +{ + add_resource_name(var.self); + + auto instance_name = to_name(var.self); + statement(CompilerGLSL::variable_decl(var), ";"); + statement_no_indent("#define ", instance_name, " __res->", instance_name); +} + +void CompilerCPP::emit_uniform(const SPIRVariable &var) +{ + add_resource_name(var.self); + + auto &type = get(var.basetype); + auto instance_name = to_name(var.self); + + uint32_t descriptor_set = meta[var.self].decoration.set; + uint32_t binding = meta[var.self].decoration.binding; + uint32_t location = meta[var.self].decoration.location; + + string type_name = type_to_glsl(type); + remap_variable_type_name(type, instance_name, type_name); + + if (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage || + type.basetype == SPIRType::AtomicCounter) + { + statement("internal::Resource<", type_name, type_to_array_glsl(type), "> ", instance_name, "__;"); + statement_no_indent("#define ", instance_name, " __res->", instance_name, "__.get()"); + resource_registrations.push_back( + join("s.register_resource(", instance_name, "__", ", ", descriptor_set, ", ", binding, ");")); + } + else + { + statement("internal::UniformConstant<", type_name, type_to_array_glsl(type), "> ", instance_name, "__;"); + statement_no_indent("#define ", instance_name, " __res->", instance_name, "__.get()"); + resource_registrations.push_back( + join("s.register_uniform_constant(", instance_name, "__", ", ", location, ");")); + } + + statement(""); +} + +void CompilerCPP::emit_push_constant_block(const SPIRVariable &var) +{ + add_resource_name(var.self); + + auto &type = get(var.basetype); + auto &flags = meta[var.self].decoration.decoration_flags; + if ((flags & (1ull << DecorationBinding)) || (flags & (1ull << DecorationDescriptorSet))) + SPIRV_CROSS_THROW("Push constant blocks cannot be compiled to GLSL with Binding or Set syntax. " + "Remap to location with reflection API first or disable these decorations."); + + emit_block_struct(type); + auto buffer_name = to_name(type.self); + auto instance_name = to_name(var.self); + + statement("internal::PushConstant<", buffer_name, type_to_array_glsl(type), "> ", instance_name, ";"); + statement_no_indent("#define ", instance_name, " __res->", instance_name, ".get()"); + resource_registrations.push_back(join("s.register_push_constant(", instance_name, "__", ");")); + statement(""); +} + +void CompilerCPP::emit_block_struct(SPIRType &type) +{ + // C++ can't do interface blocks, so we fake it by emitting a separate struct. + // However, these structs are not allowed to alias anything, so remove it before + // emitting the struct. + // + // The type we have here needs to be resolved to the non-pointer type so we can remove aliases. + auto &self = get(type.self); + self.type_alias = 0; + emit_struct(self); +} + +void CompilerCPP::emit_resources() +{ + // Output all basic struct types which are not Block or BufferBlock as these are declared inplace + // when such variables are instantiated. + for (auto &id : ids) + { + if (id.get_type() == TypeType) + { + auto &type = id.get(); + if (type.basetype == SPIRType::Struct && type.array.empty() && !type.pointer && + (meta[type.self].decoration.decoration_flags & + ((1ull << DecorationBlock) | (1ull << DecorationBufferBlock))) == 0) + { + emit_struct(type); + } + } + } + + statement("struct Resources : ", resource_type); + begin_scope(); + + // Output UBOs and SSBOs + for (auto &id : ids) + { + if (id.get_type() == TypeVariable) + { + auto &var = id.get(); + auto &type = get(var.basetype); + + if (var.storage != StorageClassFunction && type.pointer && type.storage == StorageClassUniform && + !is_hidden_variable(var) && (meta[type.self].decoration.decoration_flags & + ((1ull << DecorationBlock) | (1ull << DecorationBufferBlock)))) + { + emit_buffer_block(var); + } + } + } + + // Output push constant blocks + for (auto &id : ids) + { + if (id.get_type() == TypeVariable) + { + auto &var = id.get(); + auto &type = get(var.basetype); + if (!is_hidden_variable(var) && var.storage != StorageClassFunction && type.pointer && + type.storage == StorageClassPushConstant) + { + emit_push_constant_block(var); + } + } + } + + // Output in/out interfaces. + for (auto &id : ids) + { + if (id.get_type() == TypeVariable) + { + auto &var = id.get(); + auto &type = get(var.basetype); + + if (var.storage != StorageClassFunction && !is_hidden_variable(var) && type.pointer && + (var.storage == StorageClassInput || var.storage == StorageClassOutput) && + interface_variable_exists_in_entry_point(var.self)) + { + emit_interface_block(var); + } + } + } + + // Output Uniform Constants (values, samplers, images, etc). + for (auto &id : ids) + { + if (id.get_type() == TypeVariable) + { + auto &var = id.get(); + auto &type = get(var.basetype); + + if (var.storage != StorageClassFunction && !is_hidden_variable(var) && type.pointer && + (type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter)) + { + emit_uniform(var); + } + } + } + + // Global variables. + bool emitted = false; + for (auto global : global_variables) + { + auto &var = get(global); + if (var.storage == StorageClassWorkgroup) + { + emit_shared(var); + emitted = true; + } + } + + if (emitted) + statement(""); + + statement("inline void init(spirv_cross_shader& s)"); + begin_scope(); + statement(resource_type, "::init(s);"); + for (auto ® : resource_registrations) + statement(reg); + end_scope(); + resource_registrations.clear(); + + end_scope_decl(); + + statement(""); + statement("Resources* __res;"); + if (get_entry_point().model == ExecutionModelGLCompute) + statement("ComputePrivateResources __priv_res;"); + statement(""); + + // Emit regular globals which are allocated per invocation. + emitted = false; + for (auto global : global_variables) + { + auto &var = get(global); + if (var.storage == StorageClassPrivate) + { + if (var.storage == StorageClassWorkgroup) + emit_shared(var); + else + statement(CompilerGLSL::variable_decl(var), ";"); + emitted = true; + } + } + + if (emitted) + statement(""); +} + +string CompilerCPP::compile() +{ + // Do not deal with ES-isms like precision, older extensions and such. + options.es = false; + options.version = 450; + backend.float_literal_suffix = true; + backend.double_literal_suffix = false; + backend.long_long_literal_suffix = true; + backend.uint32_t_literal_suffix = true; + backend.basic_int_type = "int32_t"; + backend.basic_uint_type = "uint32_t"; + backend.swizzle_is_function = true; + backend.shared_is_implied = true; + backend.flexible_member_array_supported = false; + backend.explicit_struct_type = true; + backend.use_initializer_list = true; + + uint32_t pass_count = 0; + do + { + if (pass_count >= 3) + SPIRV_CROSS_THROW("Over 3 compilation loops detected. Must be a bug!"); + + resource_registrations.clear(); + reset(); + + // Move constructor for this type is broken on GCC 4.9 ... + buffer = unique_ptr(new ostringstream()); + + emit_header(); + emit_resources(); + + emit_function(get(entry_point), 0); + + pass_count++; + } while (force_recompile); + + // Match opening scope of emit_header(). + end_scope_decl(); + // namespace + end_scope(); + + // Emit C entry points + emit_c_linkage(); + + return buffer->str(); +} + +void CompilerCPP::emit_c_linkage() +{ + statement(""); + + statement("spirv_cross_shader_t *spirv_cross_construct(void)"); + begin_scope(); + statement("return new ", impl_type, "();"); + end_scope(); + + statement(""); + statement("void spirv_cross_destruct(spirv_cross_shader_t *shader)"); + begin_scope(); + statement("delete static_cast<", impl_type, "*>(shader);"); + end_scope(); + + statement(""); + statement("void spirv_cross_invoke(spirv_cross_shader_t *shader)"); + begin_scope(); + statement("static_cast<", impl_type, "*>(shader)->invoke();"); + end_scope(); + + statement(""); + statement("static const struct spirv_cross_interface vtable ="); + begin_scope(); + statement("spirv_cross_construct,"); + statement("spirv_cross_destruct,"); + statement("spirv_cross_invoke,"); + end_scope_decl(); + + statement(""); + statement("const struct spirv_cross_interface *", + interface_name.empty() ? string("spirv_cross_get_interface") : interface_name, "(void)"); + begin_scope(); + statement("return &vtable;"); + end_scope(); +} + +void CompilerCPP::emit_function_prototype(SPIRFunction &func, uint64_t) +{ + local_variable_names = resource_names; + string decl; + + auto &type = get(func.return_type); + decl += "inline "; + decl += type_to_glsl(type); + decl += " "; + + if (func.self == entry_point) + { + decl += "main"; + processing_entry_point = true; + } + else + decl += to_name(func.self); + + decl += "("; + for (auto &arg : func.arguments) + { + add_local_variable_name(arg.id); + + decl += argument_decl(arg); + if (&arg != &func.arguments.back()) + decl += ", "; + + // Hold a pointer to the parameter so we can invalidate the readonly field if needed. + auto *var = maybe_get(arg.id); + if (var) + var->parameter = &arg; + } + + decl += ")"; + statement(decl); +} + +string CompilerCPP::argument_decl(const SPIRFunction::Parameter &arg) +{ + auto &type = expression_type(arg.id); + bool constref = !type.pointer || arg.write_count == 0; + + auto &var = get(arg.id); + + string base = type_to_glsl(type); + string variable_name = to_name(var.self); + remap_variable_type_name(type, variable_name, base); + + for (uint32_t i = 0; i < type.array.size(); i++) + base = join("std::array<", base, ", ", to_array_size(type, i), ">"); + + return join(constref ? "const " : "", base, " &", variable_name); +} + +string CompilerCPP::variable_decl(const SPIRType &type, const string &name) +{ + string base = type_to_glsl(type); + remap_variable_type_name(type, name, base); + bool runtime = false; + + for (uint32_t i = 0; i < type.array.size(); i++) + { + auto &array = type.array[i]; + if (!array && type.array_size_literal[i]) + { + // Avoid using runtime arrays with std::array since this is undefined. + // Runtime arrays cannot be passed around as values, so this is fine. + runtime = true; + } + else + base = join("std::array<", base, ", ", to_array_size(type, i), ">"); + } + base += ' '; + return base + name + (runtime ? "[1]" : ""); +} + +void CompilerCPP::emit_header() +{ + auto &execution = get_entry_point(); + + statement("// This C++ shader is autogenerated by spirv-cross."); + statement("#include \"spirv_cross/internal_interface.hpp\""); + statement("#include \"spirv_cross/external_interface.h\""); + // Needed to properly implement GLSL-style arrays. + statement("#include "); + statement("#include "); + statement(""); + statement("using namespace spirv_cross;"); + statement("using namespace glm;"); + statement(""); + + statement("namespace Impl"); + begin_scope(); + + switch (execution.model) + { + case ExecutionModelGeometry: + case ExecutionModelTessellationControl: + case ExecutionModelTessellationEvaluation: + case ExecutionModelGLCompute: + case ExecutionModelFragment: + case ExecutionModelVertex: + statement("struct Shader"); + begin_scope(); + break; + + default: + SPIRV_CROSS_THROW("Unsupported execution model."); + } + + switch (execution.model) + { + case ExecutionModelGeometry: + impl_type = "GeometryShader"; + resource_type = "GeometryResources"; + break; + + case ExecutionModelVertex: + impl_type = "VertexShader"; + resource_type = "VertexResources"; + break; + + case ExecutionModelFragment: + impl_type = "FragmentShader"; + resource_type = "FragmentResources"; + break; + + case ExecutionModelGLCompute: + impl_type = join("ComputeShader"); + resource_type = "ComputeResources"; + break; + + case ExecutionModelTessellationControl: + impl_type = "TessControlShader"; + resource_type = "TessControlResources"; + break; + + case ExecutionModelTessellationEvaluation: + impl_type = "TessEvaluationShader"; + resource_type = "TessEvaluationResources"; + break; + + default: + SPIRV_CROSS_THROW("Unsupported execution model."); + } +} diff --git a/spirv_cpp.hpp b/spirv_cpp.hpp new file mode 100644 index 0000000000..eb77c0b10e --- /dev/null +++ b/spirv_cpp.hpp @@ -0,0 +1,71 @@ +/* + * Copyright 2015-2016 ARM Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SPIRV_CROSS_CPP_HPP +#define SPIRV_CROSS_CPP_HPP + +#include "spirv_glsl.hpp" +#include +#include + +namespace spirv_cross +{ +class CompilerCPP : public CompilerGLSL +{ +public: + CompilerCPP(std::vector spirv_) + : CompilerGLSL(move(spirv_)) + { + } + std::string compile() override; + + // Sets a custom symbol name that can override + // spirv_cross_get_interface. + // + // Useful when several shader interfaces are linked + // statically into the same binary. + void set_interface_name(std::string name) + { + interface_name = std::move(name); + } + +private: + void emit_header() override; + void emit_c_linkage(); + void emit_function_prototype(SPIRFunction &func, uint64_t return_flags) override; + + void emit_resources(); + void emit_buffer_block(const SPIRVariable &type); + void emit_push_constant_block(const SPIRVariable &var); + void emit_interface_block(const SPIRVariable &type); + void emit_block_chain(SPIRBlock &block); + void emit_uniform(const SPIRVariable &var); + void emit_shared(const SPIRVariable &var); + void emit_block_struct(SPIRType &type); + std::string variable_decl(const SPIRType &type, const std::string &name) override; + + std::string argument_decl(const SPIRFunction::Parameter &arg); + + std::vector resource_registrations; + std::string impl_type; + std::string resource_type; + uint32_t shared_counter = 0; + + std::string interface_name; +}; +} + +#endif diff --git a/spirv_cross.cpp b/spirv_cross.cpp new file mode 100644 index 0000000000..7b2d9836a6 --- /dev/null +++ b/spirv_cross.cpp @@ -0,0 +1,3033 @@ +/* + * Copyright 2015-2016 ARM Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "spirv_cross.hpp" +#include "GLSL.std.450.h" +#include "spirv_cfg.hpp" +#include +#include +#include + +using namespace std; +using namespace spv; +using namespace spirv_cross; + +#define log(...) fprintf(stderr, __VA_ARGS__) + +Instruction::Instruction(const vector &spirv, uint32_t &index) +{ + op = spirv[index] & 0xffff; + count = (spirv[index] >> 16) & 0xffff; + + if (count == 0) + SPIRV_CROSS_THROW("SPIR-V instructions cannot consume 0 words. Invalid SPIR-V file."); + + offset = index + 1; + length = count - 1; + + index += count; + + if (index > spirv.size()) + SPIRV_CROSS_THROW("SPIR-V instruction goes out of bounds."); +} + +Compiler::Compiler(vector ir) + : spirv(move(ir)) +{ + parse(); +} + +string Compiler::compile() +{ + return ""; +} + +bool Compiler::variable_storage_is_aliased(const SPIRVariable &v) +{ + auto &type = get(v.basetype); + bool ssbo = (meta[type.self].decoration.decoration_flags & (1ull << DecorationBufferBlock)) != 0; + bool image = type.basetype == SPIRType::Image; + bool counter = type.basetype == SPIRType::AtomicCounter; + bool is_restrict = (meta[v.self].decoration.decoration_flags & (1ull << DecorationRestrict)) != 0; + return !is_restrict && (ssbo || image || counter); +} + +bool Compiler::block_is_pure(const SPIRBlock &block) +{ + for (auto &i : block.ops) + { + auto ops = stream(i); + auto op = static_cast(i.op); + + switch (op) + { + case OpFunctionCall: + { + uint32_t func = ops[2]; + if (!function_is_pure(get(func))) + return false; + break; + } + + case OpCopyMemory: + case OpStore: + { + auto &type = expression_type(ops[0]); + if (type.storage != StorageClassFunction) + return false; + break; + } + + case OpImageWrite: + return false; + + // Atomics are impure. + case OpAtomicLoad: + case OpAtomicStore: + case OpAtomicExchange: + case OpAtomicCompareExchange: + case OpAtomicIIncrement: + case OpAtomicIDecrement: + case OpAtomicIAdd: + case OpAtomicISub: + case OpAtomicSMin: + case OpAtomicUMin: + case OpAtomicSMax: + case OpAtomicUMax: + case OpAtomicAnd: + case OpAtomicOr: + case OpAtomicXor: + return false; + + // Geometry shader builtins modify global state. + case OpEndPrimitive: + case OpEmitStreamVertex: + case OpEndStreamPrimitive: + case OpEmitVertex: + return false; + + // Barriers disallow any reordering, so we should treat blocks with barrier as writing. + case OpControlBarrier: + case OpMemoryBarrier: + return false; + + // OpExtInst is potentially impure depending on extension, but GLSL builtins are at least pure. + + default: + break; + } + } + + return true; +} + +string Compiler::to_name(uint32_t id, bool allow_alias) +{ + if (allow_alias && ids.at(id).get_type() == TypeType) + { + // If this type is a simple alias, emit the + // name of the original type instead. + // We don't want to override the meta alias + // as that can be overridden by the reflection APIs after parse. + auto &type = get(id); + if (type.type_alias) + return to_name(type.type_alias); + } + + if (meta[id].decoration.alias.empty()) + return join("_", id); + else + return meta.at(id).decoration.alias; +} + +bool Compiler::function_is_pure(const SPIRFunction &func) +{ + for (auto block : func.blocks) + { + if (!block_is_pure(get(block))) + { + //fprintf(stderr, "Function %s is impure!\n", to_name(func.self).c_str()); + return false; + } + } + + //fprintf(stderr, "Function %s is pure!\n", to_name(func.self).c_str()); + return true; +} + +void Compiler::register_global_read_dependencies(const SPIRBlock &block, uint32_t id) +{ + for (auto &i : block.ops) + { + auto ops = stream(i); + auto op = static_cast(i.op); + + switch (op) + { + case OpFunctionCall: + { + uint32_t func = ops[2]; + register_global_read_dependencies(get(func), id); + break; + } + + case OpLoad: + case OpImageRead: + { + // If we're in a storage class which does not get invalidated, adding dependencies here is no big deal. + auto *var = maybe_get_backing_variable(ops[2]); + if (var && var->storage != StorageClassFunction) + { + auto &type = get(var->basetype); + + // InputTargets are immutable. + if (type.basetype != SPIRType::Image && type.image.dim != DimSubpassData) + var->dependees.push_back(id); + } + break; + } + + default: + break; + } + } +} + +void Compiler::register_global_read_dependencies(const SPIRFunction &func, uint32_t id) +{ + for (auto block : func.blocks) + register_global_read_dependencies(get(block), id); +} + +SPIRVariable *Compiler::maybe_get_backing_variable(uint32_t chain) +{ + auto *var = maybe_get(chain); + if (!var) + { + auto *cexpr = maybe_get(chain); + if (cexpr) + var = maybe_get(cexpr->loaded_from); + } + + return var; +} + +void Compiler::register_read(uint32_t expr, uint32_t chain, bool forwarded) +{ + auto &e = get(expr); + auto *var = maybe_get_backing_variable(chain); + + if (var) + { + e.loaded_from = var->self; + + // If the backing variable is immutable, we do not need to depend on the variable. + if (forwarded && !is_immutable(var->self)) + var->dependees.push_back(e.self); + + // If we load from a parameter, make sure we create "inout" if we also write to the parameter. + // The default is "in" however, so we never invalidate our compilation by reading. + if (var && var->parameter) + var->parameter->read_count++; + } +} + +void Compiler::register_write(uint32_t chain) +{ + auto *var = maybe_get(chain); + if (!var) + { + // If we're storing through an access chain, invalidate the backing variable instead. + auto *expr = maybe_get(chain); + if (expr && expr->loaded_from) + var = maybe_get(expr->loaded_from); + } + + if (var) + { + // If our variable is in a storage class which can alias with other buffers, + // invalidate all variables which depend on aliased variables. + if (variable_storage_is_aliased(*var)) + flush_all_aliased_variables(); + else if (var) + flush_dependees(*var); + + // We tried to write to a parameter which is not marked with out qualifier, force a recompile. + if (var->parameter && var->parameter->write_count == 0) + { + var->parameter->write_count++; + force_recompile = true; + } + } +} + +void Compiler::flush_dependees(SPIRVariable &var) +{ + for (auto expr : var.dependees) + invalid_expressions.insert(expr); + var.dependees.clear(); +} + +void Compiler::flush_all_aliased_variables() +{ + for (auto aliased : aliased_variables) + flush_dependees(get(aliased)); +} + +void Compiler::flush_all_atomic_capable_variables() +{ + for (auto global : global_variables) + flush_dependees(get(global)); + flush_all_aliased_variables(); +} + +void Compiler::flush_all_active_variables() +{ + // Invalidate all temporaries we read from variables in this block since they were forwarded. + // Invalidate all temporaries we read from globals. + for (auto &v : current_function->local_variables) + flush_dependees(get(v)); + for (auto &arg : current_function->arguments) + flush_dependees(get(arg.id)); + for (auto global : global_variables) + flush_dependees(get(global)); + + flush_all_aliased_variables(); +} + +const SPIRType &Compiler::expression_type(uint32_t id) const +{ + switch (ids[id].get_type()) + { + case TypeVariable: + return get(get(id).basetype); + + case TypeExpression: + return get(get(id).expression_type); + + case TypeConstant: + return get(get(id).constant_type); + + case TypeConstantOp: + return get(get(id).basetype); + + case TypeUndef: + return get(get(id).basetype); + + default: + SPIRV_CROSS_THROW("Cannot resolve expression type."); + } +} + +bool Compiler::expression_is_lvalue(uint32_t id) const +{ + auto &type = expression_type(id); + switch (type.basetype) + { + case SPIRType::SampledImage: + case SPIRType::Image: + case SPIRType::Sampler: + return false; + + default: + return true; + } +} + +bool Compiler::is_immutable(uint32_t id) const +{ + if (ids[id].get_type() == TypeVariable) + { + auto &var = get(id); + + // Anything we load from the UniformConstant address space is guaranteed to be immutable. + bool pointer_to_const = var.storage == StorageClassUniformConstant; + return pointer_to_const || var.phi_variable || !expression_is_lvalue(id); + } + else if (ids[id].get_type() == TypeExpression) + return get(id).immutable; + else if (ids[id].get_type() == TypeConstant || ids[id].get_type() == TypeConstantOp || + ids[id].get_type() == TypeUndef) + return true; + else + return false; +} + +static inline bool storage_class_is_interface(spv::StorageClass storage) +{ + switch (storage) + { + case StorageClassInput: + case StorageClassOutput: + case StorageClassUniform: + case StorageClassUniformConstant: + case StorageClassAtomicCounter: + case StorageClassPushConstant: + return true; + + default: + return false; + } +} + +bool Compiler::is_hidden_variable(const SPIRVariable &var, bool include_builtins) const +{ + if ((is_builtin_variable(var) && !include_builtins) || var.remapped_variable) + return true; + + // Combined image samplers are always considered active as they are "magic" variables. + if (find_if(begin(combined_image_samplers), end(combined_image_samplers), [&var](const CombinedImageSampler &samp) { + return samp.combined_id == var.self; + }) != end(combined_image_samplers)) + { + return false; + } + + bool hidden = false; + if (check_active_interface_variables && storage_class_is_interface(var.storage)) + hidden = active_interface_variables.find(var.self) == end(active_interface_variables); + return hidden; +} + +bool Compiler::is_builtin_variable(const SPIRVariable &var) const +{ + if (var.compat_builtin || meta[var.self].decoration.builtin) + return true; + + // We can have builtin structs as well. If one member of a struct is builtin, the struct must also be builtin. + for (auto &m : meta[get(var.basetype).self].members) + if (m.builtin) + return true; + + return false; +} + +bool Compiler::is_member_builtin(const SPIRType &type, uint32_t index, BuiltIn *builtin) const +{ + auto &memb = meta[type.self].members; + if (index < memb.size() && memb[index].builtin) + { + if (builtin) + *builtin = memb[index].builtin_type; + return true; + } + + return false; +} + +bool Compiler::is_scalar(const SPIRType &type) const +{ + return type.vecsize == 1 && type.columns == 1; +} + +bool Compiler::is_vector(const SPIRType &type) const +{ + return type.vecsize > 1 && type.columns == 1; +} + +bool Compiler::is_matrix(const SPIRType &type) const +{ + return type.vecsize > 1 && type.columns > 1; +} + +ShaderResources Compiler::get_shader_resources() const +{ + return get_shader_resources(nullptr); +} + +ShaderResources Compiler::get_shader_resources(const unordered_set &active_variables) const +{ + return get_shader_resources(&active_variables); +} + +bool Compiler::InterfaceVariableAccessHandler::handle(Op opcode, const uint32_t *args, uint32_t length) +{ + uint32_t variable = 0; + switch (opcode) + { + // Need this first, otherwise, GCC complains about unhandled switch statements. + default: + break; + + case OpFunctionCall: + { + // Invalid SPIR-V. + if (length < 3) + return false; + + uint32_t count = length - 3; + args += 3; + for (uint32_t i = 0; i < count; i++) + { + auto *var = compiler.maybe_get(args[i]); + if (var && storage_class_is_interface(var->storage)) + variables.insert(args[i]); + } + break; + } + + case OpAtomicStore: + case OpStore: + // Invalid SPIR-V. + if (length < 1) + return false; + variable = args[0]; + break; + + case OpCopyMemory: + { + if (length < 3) + return false; + + auto *var = compiler.maybe_get(args[0]); + if (var && storage_class_is_interface(var->storage)) + variables.insert(variable); + + var = compiler.maybe_get(args[1]); + if (var && storage_class_is_interface(var->storage)) + variables.insert(variable); + break; + } + + case OpAccessChain: + case OpInBoundsAccessChain: + case OpLoad: + case OpCopyObject: + case OpImageTexelPointer: + case OpAtomicLoad: + case OpAtomicExchange: + case OpAtomicCompareExchange: + case OpAtomicIIncrement: + case OpAtomicIDecrement: + case OpAtomicIAdd: + case OpAtomicISub: + case OpAtomicSMin: + case OpAtomicUMin: + case OpAtomicSMax: + case OpAtomicUMax: + case OpAtomicAnd: + case OpAtomicOr: + case OpAtomicXor: + // Invalid SPIR-V. + if (length < 3) + return false; + variable = args[2]; + break; + } + + if (variable) + { + auto *var = compiler.maybe_get(variable); + if (var && storage_class_is_interface(var->storage)) + variables.insert(variable); + } + return true; +} + +unordered_set Compiler::get_active_interface_variables() const +{ + // Traverse the call graph and find all interface variables which are in use. + unordered_set variables; + InterfaceVariableAccessHandler handler(*this, variables); + traverse_all_reachable_opcodes(get(entry_point), handler); + return variables; +} + +void Compiler::set_enabled_interface_variables(std::unordered_set active_variables) +{ + active_interface_variables = move(active_variables); + check_active_interface_variables = true; +} + +ShaderResources Compiler::get_shader_resources(const unordered_set *active_variables) const +{ + ShaderResources res; + + for (auto &id : ids) + { + if (id.get_type() != TypeVariable) + continue; + + auto &var = id.get(); + auto &type = get(var.basetype); + + // It is possible for uniform storage classes to be passed as function parameters, so detect + // that. To detect function parameters, check of StorageClass of variable is function scope. + if (var.storage == StorageClassFunction || !type.pointer || is_builtin_variable(var)) + continue; + + if (active_variables && active_variables->find(var.self) == end(*active_variables)) + continue; + + // Input + if (var.storage == StorageClassInput && interface_variable_exists_in_entry_point(var.self)) + { + if (meta[type.self].decoration.decoration_flags & (1ull << DecorationBlock)) + res.stage_inputs.push_back({ var.self, var.basetype, type.self, meta[type.self].decoration.alias }); + else + res.stage_inputs.push_back({ var.self, var.basetype, type.self, meta[var.self].decoration.alias }); + } + // Subpass inputs + else if (var.storage == StorageClassUniformConstant && type.image.dim == DimSubpassData) + { + res.subpass_inputs.push_back({ var.self, var.basetype, type.self, meta[var.self].decoration.alias }); + } + // Outputs + else if (var.storage == StorageClassOutput && interface_variable_exists_in_entry_point(var.self)) + { + if (meta[type.self].decoration.decoration_flags & (1ull << DecorationBlock)) + res.stage_outputs.push_back({ var.self, var.basetype, type.self, meta[type.self].decoration.alias }); + else + res.stage_outputs.push_back({ var.self, var.basetype, type.self, meta[var.self].decoration.alias }); + } + // UBOs + else if (type.storage == StorageClassUniform && + (meta[type.self].decoration.decoration_flags & (1ull << DecorationBlock))) + { + res.uniform_buffers.push_back({ var.self, var.basetype, type.self, meta[type.self].decoration.alias }); + } + // SSBOs + else if (type.storage == StorageClassUniform && + (meta[type.self].decoration.decoration_flags & (1ull << DecorationBufferBlock))) + { + res.storage_buffers.push_back({ var.self, var.basetype, type.self, meta[type.self].decoration.alias }); + } + // Push constant blocks + else if (type.storage == StorageClassPushConstant) + { + // There can only be one push constant block, but keep the vector in case this restriction is lifted + // in the future. + res.push_constant_buffers.push_back({ var.self, var.basetype, type.self, meta[var.self].decoration.alias }); + } + // Images + else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::Image && + type.image.sampled == 2) + { + res.storage_images.push_back({ var.self, var.basetype, type.self, meta[var.self].decoration.alias }); + } + // Separate images + else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::Image && + type.image.sampled == 1) + { + res.separate_images.push_back({ var.self, var.basetype, type.self, meta[var.self].decoration.alias }); + } + // Separate samplers + else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::Sampler) + { + res.separate_samplers.push_back({ var.self, var.basetype, type.self, meta[var.self].decoration.alias }); + } + // Textures + else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::SampledImage) + { + res.sampled_images.push_back({ var.self, var.basetype, type.self, meta[var.self].decoration.alias }); + } + // Atomic counters + else if (type.storage == StorageClassAtomicCounter) + { + res.atomic_counters.push_back({ var.self, var.basetype, type.self, meta[var.self].decoration.alias }); + } + } + + return res; +} + +static inline uint32_t swap_endian(uint32_t v) +{ + return ((v >> 24) & 0x000000ffu) | ((v >> 8) & 0x0000ff00u) | ((v << 8) & 0x00ff0000u) | ((v << 24) & 0xff000000u); +} + +static string extract_string(const vector &spirv, uint32_t offset) +{ + string ret; + for (uint32_t i = offset; i < spirv.size(); i++) + { + uint32_t w = spirv[i]; + + for (uint32_t j = 0; j < 4; j++, w >>= 8) + { + char c = w & 0xff; + if (c == '\0') + return ret; + ret += c; + } + } + + SPIRV_CROSS_THROW("String was not terminated before EOF"); +} + +static bool is_valid_spirv_version(uint32_t version) +{ + switch (version) + { + // Allow v99 since it tends to just work. + case 99: + case 0x10000: // SPIR-V 1.0 + case 0x10100: // SPIR-V 1.1 + return true; + + default: + return false; + } +} + +void Compiler::parse() +{ + auto len = spirv.size(); + if (len < 5) + SPIRV_CROSS_THROW("SPIRV file too small."); + + auto s = spirv.data(); + + // Endian-swap if we need to. + if (s[0] == swap_endian(MagicNumber)) + transform(begin(spirv), end(spirv), begin(spirv), [](uint32_t c) { return swap_endian(c); }); + + if (s[0] != MagicNumber || !is_valid_spirv_version(s[1])) + SPIRV_CROSS_THROW("Invalid SPIRV format."); + + uint32_t bound = s[3]; + ids.resize(bound); + meta.resize(bound); + + uint32_t offset = 5; + while (offset < len) + inst.emplace_back(spirv, offset); + + for (auto &i : inst) + parse(i); + + if (current_function) + SPIRV_CROSS_THROW("Function was not terminated."); + if (current_block) + SPIRV_CROSS_THROW("Block was not terminated."); +} + +void Compiler::flatten_interface_block(uint32_t id) +{ + auto &var = get(id); + auto &type = get(var.basetype); + auto flags = meta.at(type.self).decoration.decoration_flags; + + if (!type.array.empty()) + SPIRV_CROSS_THROW("Type is array of UBOs."); + if (type.basetype != SPIRType::Struct) + SPIRV_CROSS_THROW("Type is not a struct."); + if ((flags & (1ull << DecorationBlock)) == 0) + SPIRV_CROSS_THROW("Type is not a block."); + if (type.member_types.empty()) + SPIRV_CROSS_THROW("Member list of struct is empty."); + + uint32_t t = type.member_types[0]; + for (auto &m : type.member_types) + if (t != m) + SPIRV_CROSS_THROW("Types in block differ."); + + auto &mtype = get(t); + if (!mtype.array.empty()) + SPIRV_CROSS_THROW("Member type cannot be arrays."); + if (mtype.basetype == SPIRType::Struct) + SPIRV_CROSS_THROW("Member type cannot be struct."); + + // Inherit variable name from interface block name. + meta.at(var.self).decoration.alias = meta.at(type.self).decoration.alias; + + auto storage = var.storage; + if (storage == StorageClassUniform) + storage = StorageClassUniformConstant; + + // Change type definition in-place into an array instead. + // Access chains will still work as-is. + uint32_t array_size = uint32_t(type.member_types.size()); + type = mtype; + type.array.push_back(array_size); + type.pointer = true; + type.storage = storage; + var.storage = storage; +} + +void Compiler::update_name_cache(unordered_set &cache, string &name) +{ + if (name.empty()) + return; + + if (cache.find(name) == end(cache)) + { + cache.insert(name); + return; + } + + uint32_t counter = 0; + auto tmpname = name; + + // If there is a collision (very rare), + // keep tacking on extra identifier until it's unique. + do + { + counter++; + name = tmpname + "_" + convert_to_string(counter); + } while (cache.find(name) != end(cache)); + cache.insert(name); +} + +void Compiler::set_name(uint32_t id, const std::string &name) +{ + auto &str = meta.at(id).decoration.alias; + str.clear(); + + if (name.empty()) + return; + // Reserved for temporaries. + if (name[0] == '_' && name.size() >= 2 && isdigit(name[1])) + return; + + // Functions in glslangValidator are mangled with name( stuff. + // Normally, we would never see '(' in any legal indentifiers, so just strip them out. + str = name.substr(0, name.find('(')); + + for (uint32_t i = 0; i < str.size(); i++) + { + auto &c = str[i]; + + // _ variables are reserved by the internal implementation, + // otherwise, make sure the name is a valid identifier. + if (i == 0 || (str[0] == '_' && i == 1)) + c = isalpha(c) ? c : '_'; + else + c = isalnum(c) ? c : '_'; + } +} + +const SPIRType &Compiler::get_type(uint32_t id) const +{ + return get(id); +} + +void Compiler::set_member_decoration(uint32_t id, uint32_t index, Decoration decoration, uint32_t argument) +{ + meta.at(id).members.resize(max(meta[id].members.size(), size_t(index) + 1)); + auto &dec = meta.at(id).members[index]; + dec.decoration_flags |= 1ull << decoration; + + switch (decoration) + { + case DecorationBuiltIn: + dec.builtin = true; + dec.builtin_type = static_cast(argument); + break; + + case DecorationLocation: + dec.location = argument; + break; + + case DecorationOffset: + dec.offset = argument; + break; + + case DecorationSpecId: + dec.spec_id = argument; + break; + + default: + break; + } +} + +void Compiler::set_member_name(uint32_t id, uint32_t index, const std::string &name) +{ + meta.at(id).members.resize(max(meta[id].members.size(), size_t(index) + 1)); + meta.at(id).members[index].alias = name; +} + +const std::string &Compiler::get_member_name(uint32_t id, uint32_t index) const +{ + auto &m = meta.at(id); + if (index >= m.members.size()) + { + static string empty; + return empty; + } + + return m.members[index].alias; +} + +void Compiler::set_member_qualified_name(uint32_t id, uint32_t index, const std::string &name) +{ + meta.at(id).members.resize(max(meta[id].members.size(), size_t(index) + 1)); + meta.at(id).members[index].qualified_alias = name; +} + +uint32_t Compiler::get_member_decoration(uint32_t id, uint32_t index, Decoration decoration) const +{ + auto &m = meta.at(id); + if (index >= m.members.size()) + return 0; + + auto &dec = m.members[index]; + if (!(dec.decoration_flags & (1ull << decoration))) + return 0; + + switch (decoration) + { + case DecorationBuiltIn: + return dec.builtin_type; + case DecorationLocation: + return dec.location; + case DecorationOffset: + return dec.offset; + case DecorationSpecId: + return dec.spec_id; + default: + return 0; + } +} + +uint64_t Compiler::get_member_decoration_mask(uint32_t id, uint32_t index) const +{ + auto &m = meta.at(id); + if (index >= m.members.size()) + return 0; + + return m.members[index].decoration_flags; +} + +void Compiler::unset_member_decoration(uint32_t id, uint32_t index, Decoration decoration) +{ + auto &m = meta.at(id); + if (index >= m.members.size()) + return; + + auto &dec = m.members[index]; + + dec.decoration_flags &= ~(1ull << decoration); + switch (decoration) + { + case DecorationBuiltIn: + dec.builtin = false; + break; + + case DecorationLocation: + dec.location = 0; + break; + + case DecorationOffset: + dec.offset = 0; + break; + + case DecorationSpecId: + dec.spec_id = 0; + break; + + default: + break; + } +} + +void Compiler::set_decoration(uint32_t id, Decoration decoration, uint32_t argument) +{ + auto &dec = meta.at(id).decoration; + dec.decoration_flags |= 1ull << decoration; + + switch (decoration) + { + case DecorationBuiltIn: + dec.builtin = true; + dec.builtin_type = static_cast(argument); + break; + + case DecorationLocation: + dec.location = argument; + break; + + case DecorationOffset: + dec.offset = argument; + break; + + case DecorationArrayStride: + dec.array_stride = argument; + break; + + case DecorationBinding: + dec.binding = argument; + break; + + case DecorationDescriptorSet: + dec.set = argument; + break; + + case DecorationInputAttachmentIndex: + dec.input_attachment = argument; + break; + + case DecorationSpecId: + dec.spec_id = argument; + break; + + default: + break; + } +} + +StorageClass Compiler::get_storage_class(uint32_t id) const +{ + return get(id).storage; +} + +const std::string &Compiler::get_name(uint32_t id) const +{ + return meta.at(id).decoration.alias; +} + +uint64_t Compiler::get_decoration_mask(uint32_t id) const +{ + auto &dec = meta.at(id).decoration; + return dec.decoration_flags; +} + +uint32_t Compiler::get_decoration(uint32_t id, Decoration decoration) const +{ + auto &dec = meta.at(id).decoration; + if (!(dec.decoration_flags & (1ull << decoration))) + return 0; + + switch (decoration) + { + case DecorationBuiltIn: + return dec.builtin_type; + case DecorationLocation: + return dec.location; + case DecorationOffset: + return dec.offset; + case DecorationBinding: + return dec.binding; + case DecorationDescriptorSet: + return dec.set; + case DecorationInputAttachmentIndex: + return dec.input_attachment; + case DecorationSpecId: + return dec.spec_id; + default: + return 0; + } +} + +void Compiler::unset_decoration(uint32_t id, Decoration decoration) +{ + auto &dec = meta.at(id).decoration; + dec.decoration_flags &= ~(1ull << decoration); + switch (decoration) + { + case DecorationBuiltIn: + dec.builtin = false; + break; + + case DecorationLocation: + dec.location = 0; + break; + + case DecorationOffset: + dec.offset = 0; + break; + + case DecorationBinding: + dec.binding = 0; + break; + + case DecorationDescriptorSet: + dec.set = 0; + break; + + case DecorationInputAttachmentIndex: + dec.input_attachment = 0; + break; + + case DecorationSpecId: + dec.spec_id = 0; + break; + + default: + break; + } +} + +void Compiler::parse(const Instruction &instruction) +{ + auto ops = stream(instruction); + auto op = static_cast(instruction.op); + uint32_t length = instruction.length; + + switch (op) + { + case OpMemoryModel: + case OpSourceExtension: + case OpNop: + case OpLine: + break; + + case OpSource: + { + auto lang = static_cast(ops[0]); + switch (lang) + { + case SourceLanguageESSL: + source.es = true; + source.version = ops[1]; + source.known = true; + break; + + case SourceLanguageGLSL: + source.es = false; + source.version = ops[1]; + source.known = true; + break; + + default: + source.known = false; + break; + } + break; + } + + case OpUndef: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + set(id, result_type); + break; + } + + case OpCapability: + { + uint32_t cap = ops[0]; + if (cap == CapabilityKernel) + SPIRV_CROSS_THROW("Kernel capability not supported."); + break; + } + + case OpExtInstImport: + { + uint32_t id = ops[0]; + auto ext = extract_string(spirv, instruction.offset + 1); + if (ext == "GLSL.std.450") + set(id, SPIRExtension::GLSL); + else + SPIRV_CROSS_THROW("Only GLSL.std.450 extension interface supported."); + + break; + } + + case OpEntryPoint: + { + auto itr = + entry_points.insert(make_pair(ops[1], SPIREntryPoint(ops[1], static_cast(ops[0]), + extract_string(spirv, instruction.offset + 2)))); + auto &e = itr.first->second; + + // Strings need nul-terminator and consume the whole word. + uint32_t strlen_words = uint32_t((e.name.size() + 1 + 3) >> 2); + e.interface_variables.insert(end(e.interface_variables), ops + strlen_words + 2, ops + instruction.length); + + // Set the name of the entry point in case OpName is not provided later + set_name(ops[1], e.name); + + // If we don't have an entry, make the first one our "default". + if (!entry_point) + entry_point = ops[1]; + break; + } + + case OpExecutionMode: + { + auto &execution = entry_points[ops[0]]; + auto mode = static_cast(ops[1]); + execution.flags |= 1ull << mode; + + switch (mode) + { + case ExecutionModeInvocations: + execution.invocations = ops[2]; + break; + + case ExecutionModeLocalSize: + execution.workgroup_size.x = ops[2]; + execution.workgroup_size.y = ops[3]; + execution.workgroup_size.z = ops[4]; + break; + + case ExecutionModeOutputVertices: + execution.output_vertices = ops[2]; + break; + + default: + break; + } + break; + } + + case OpName: + { + uint32_t id = ops[0]; + set_name(id, extract_string(spirv, instruction.offset + 1)); + break; + } + + case OpMemberName: + { + uint32_t id = ops[0]; + uint32_t member = ops[1]; + set_member_name(id, member, extract_string(spirv, instruction.offset + 2)); + break; + } + + case OpDecorate: + { + uint32_t id = ops[0]; + + auto decoration = static_cast(ops[1]); + if (length >= 3) + set_decoration(id, decoration, ops[2]); + else + set_decoration(id, decoration); + + break; + } + + case OpMemberDecorate: + { + uint32_t id = ops[0]; + uint32_t member = ops[1]; + auto decoration = static_cast(ops[2]); + if (length >= 4) + set_member_decoration(id, member, decoration, ops[3]); + else + set_member_decoration(id, member, decoration); + break; + } + + // Build up basic types. + case OpTypeVoid: + { + uint32_t id = ops[0]; + auto &type = set(id); + type.basetype = SPIRType::Void; + break; + } + + case OpTypeBool: + { + uint32_t id = ops[0]; + auto &type = set(id); + type.basetype = SPIRType::Boolean; + type.width = 1; + break; + } + + case OpTypeFloat: + { + uint32_t id = ops[0]; + uint32_t width = ops[1]; + auto &type = set(id); + type.basetype = width > 32 ? SPIRType::Double : SPIRType::Float; + type.width = width; + break; + } + + case OpTypeInt: + { + uint32_t id = ops[0]; + uint32_t width = ops[1]; + auto &type = set(id); + type.basetype = + ops[2] ? (width > 32 ? SPIRType::Int64 : SPIRType::Int) : (width > 32 ? SPIRType::UInt64 : SPIRType::UInt); + type.width = width; + break; + } + + // Build composite types by "inheriting". + // NOTE: The self member is also copied! For pointers and array modifiers this is a good thing + // since we can refer to decorations on pointee classes which is needed for UBO/SSBO, I/O blocks in geometry/tess etc. + case OpTypeVector: + { + uint32_t id = ops[0]; + uint32_t vecsize = ops[2]; + + auto &base = get(ops[1]); + auto &vecbase = set(id); + + vecbase = base; + vecbase.vecsize = vecsize; + vecbase.self = id; + break; + } + + case OpTypeMatrix: + { + uint32_t id = ops[0]; + uint32_t colcount = ops[2]; + + auto &base = get(ops[1]); + auto &matrixbase = set(id); + + matrixbase = base; + matrixbase.columns = colcount; + matrixbase.self = id; + break; + } + + case OpTypeArray: + { + uint32_t id = ops[0]; + + auto &base = get(ops[1]); + auto &arraybase = set(id); + + arraybase = base; + + auto *c = maybe_get(ops[2]); + bool literal = c && !c->specialization; + + arraybase.array_size_literal.push_back(literal); + arraybase.array.push_back(literal ? c->scalar() : ops[2]); + // Do NOT set arraybase.self! + break; + } + + case OpTypeRuntimeArray: + { + uint32_t id = ops[0]; + + auto &base = get(ops[1]); + auto &arraybase = set(id); + + arraybase = base; + arraybase.array.push_back(0); + arraybase.array_size_literal.push_back(true); + // Do NOT set arraybase.self! + break; + } + + case OpTypeImage: + { + uint32_t id = ops[0]; + auto &type = set(id); + type.basetype = SPIRType::Image; + type.image.type = ops[1]; + type.image.dim = static_cast(ops[2]); + type.image.depth = ops[3] != 0; + type.image.arrayed = ops[4] != 0; + type.image.ms = ops[5] != 0; + type.image.sampled = ops[6]; + type.image.format = static_cast(ops[7]); + break; + } + + case OpTypeSampledImage: + { + uint32_t id = ops[0]; + uint32_t imagetype = ops[1]; + auto &type = set(id); + type = get(imagetype); + type.basetype = SPIRType::SampledImage; + type.self = id; + break; + } + + // Not really used. + case OpTypeSampler: + { + uint32_t id = ops[0]; + auto &type = set(id); + type.basetype = SPIRType::Sampler; + break; + } + + case OpTypePointer: + { + uint32_t id = ops[0]; + + auto &base = get(ops[2]); + auto &ptrbase = set(id); + + ptrbase = base; + if (ptrbase.pointer) + SPIRV_CROSS_THROW("Cannot make pointer-to-pointer type."); + ptrbase.pointer = true; + ptrbase.storage = static_cast(ops[1]); + + if (ptrbase.storage == StorageClassAtomicCounter) + ptrbase.basetype = SPIRType::AtomicCounter; + + // Do NOT set ptrbase.self! + break; + } + + case OpTypeStruct: + { + uint32_t id = ops[0]; + auto &type = set(id); + type.basetype = SPIRType::Struct; + for (uint32_t i = 1; i < length; i++) + type.member_types.push_back(ops[i]); + + // Check if we have seen this struct type before, with just different + // decorations. + // + // Add workaround for issue #17 as well by looking at OpName for the struct + // types, which we shouldn't normally do. + // We should not normally have to consider type aliases like this to begin with + // however ... glslang issues #304, #307 cover this. + for (auto &other : global_struct_cache) + { + if (get_name(type.self) == get_name(other) && types_are_logically_equivalent(type, get(other))) + { + type.type_alias = other; + break; + } + } + + if (type.type_alias == 0) + global_struct_cache.push_back(id); + break; + } + + case OpTypeFunction: + { + uint32_t id = ops[0]; + uint32_t ret = ops[1]; + + auto &func = set(id, ret); + for (uint32_t i = 2; i < length; i++) + func.parameter_types.push_back(ops[i]); + break; + } + + // Variable declaration + // All variables are essentially pointers with a storage qualifier. + case OpVariable: + { + uint32_t type = ops[0]; + uint32_t id = ops[1]; + auto storage = static_cast(ops[2]); + uint32_t initializer = length == 4 ? ops[3] : 0; + + if (storage == StorageClassFunction) + { + if (!current_function) + SPIRV_CROSS_THROW("No function currently in scope"); + current_function->add_local_variable(id); + } + else if (storage == StorageClassPrivate || storage == StorageClassWorkgroup || storage == StorageClassOutput) + { + global_variables.push_back(id); + } + + auto &var = set(id, type, storage, initializer); + + if (variable_storage_is_aliased(var)) + aliased_variables.push_back(var.self); + + // glslangValidator does not emit required qualifiers here. + // Solve this by making the image access as restricted as possible + // and loosen up if we need to. + auto &vartype = expression_type(id); + if (vartype.basetype == SPIRType::Image) + { + auto &flags = meta.at(id).decoration.decoration_flags; + flags |= 1ull << DecorationNonWritable; + flags |= 1ull << DecorationNonReadable; + } + + break; + } + + // OpPhi + // OpPhi is a fairly magical opcode. + // It selects temporary variables based on which parent block we *came from*. + // In high-level languages we can "de-SSA" by creating a function local, and flush out temporaries to this function-local + // variable to emulate SSA Phi. + case OpPhi: + { + if (!current_function) + SPIRV_CROSS_THROW("No function currently in scope"); + if (!current_block) + SPIRV_CROSS_THROW("No block currently in scope"); + + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + // Instead of a temporary, create a new function-wide temporary with this ID instead. + auto &var = set(id, result_type, spv::StorageClassFunction); + var.phi_variable = true; + + current_function->add_local_variable(id); + + for (uint32_t i = 2; i + 2 <= length; i += 2) + current_block->phi_variables.push_back({ ops[i], ops[i + 1], id }); + break; + } + + // Constants + case OpSpecConstant: + case OpConstant: + { + uint32_t id = ops[1]; + auto &type = get(ops[0]); + if (type.width > 32) + set(id, ops[0], ops[2] | (uint64_t(ops[3]) << 32)).specialization = op == OpSpecConstant; + else + set(id, ops[0], ops[2]).specialization = op == OpSpecConstant; + break; + } + + case OpSpecConstantFalse: + case OpConstantFalse: + { + uint32_t id = ops[1]; + set(id, ops[0], uint32_t(0)).specialization = op == OpSpecConstantFalse; + break; + } + + case OpSpecConstantTrue: + case OpConstantTrue: + { + uint32_t id = ops[1]; + set(id, ops[0], uint32_t(1)).specialization = op == OpSpecConstantTrue; + break; + } + + case OpSpecConstantComposite: + case OpConstantComposite: + { + uint32_t id = ops[1]; + uint32_t type = ops[0]; + + auto &ctype = get(type); + SPIRConstant *constant = nullptr; + + // We can have constants which are structs and arrays. + // In this case, our SPIRConstant will be a list of other SPIRConstant ids which we + // can refer to. + if (ctype.basetype == SPIRType::Struct || !ctype.array.empty()) + { + constant = &set(id, type, ops + 2, length - 2); + constant->specialization = op == OpSpecConstantComposite; + break; + } + + bool type_64bit = ctype.width > 32; + bool matrix = ctype.columns > 1; + + if (matrix) + { + switch (length - 2) + { + case 1: + constant = &set(id, type, get(ops[2]).vector()); + break; + + case 2: + constant = &set(id, type, get(ops[2]).vector(), + get(ops[3]).vector()); + break; + + case 3: + constant = &set(id, type, get(ops[2]).vector(), + get(ops[3]).vector(), get(ops[4]).vector()); + break; + + case 4: + constant = + &set(id, type, get(ops[2]).vector(), get(ops[3]).vector(), + get(ops[4]).vector(), get(ops[5]).vector()); + break; + + default: + SPIRV_CROSS_THROW("OpConstantComposite only supports 1, 2, 3 and 4 columns."); + } + } + else + { + switch (length - 2) + { + case 1: + if (type_64bit) + constant = &set(id, type, get(ops[2]).scalar_u64()); + else + constant = &set(id, type, get(ops[2]).scalar()); + break; + + case 2: + if (type_64bit) + { + constant = &set(id, type, get(ops[2]).scalar_u64(), + get(ops[3]).scalar_u64()); + } + else + { + constant = &set(id, type, get(ops[2]).scalar(), + get(ops[3]).scalar()); + } + break; + + case 3: + if (type_64bit) + { + constant = &set(id, type, get(ops[2]).scalar_u64(), + get(ops[3]).scalar_u64(), + get(ops[4]).scalar_u64()); + } + else + { + constant = + &set(id, type, get(ops[2]).scalar(), + get(ops[3]).scalar(), get(ops[4]).scalar()); + } + break; + + case 4: + if (type_64bit) + { + constant = &set( + id, type, get(ops[2]).scalar_u64(), get(ops[3]).scalar_u64(), + get(ops[4]).scalar_u64(), get(ops[5]).scalar_u64()); + } + else + { + constant = &set( + id, type, get(ops[2]).scalar(), get(ops[3]).scalar(), + get(ops[4]).scalar(), get(ops[5]).scalar()); + } + break; + + default: + SPIRV_CROSS_THROW("OpConstantComposite only supports 1, 2, 3 and 4 components."); + } + } + + constant->specialization = op == OpSpecConstantComposite; + break; + } + + // Functions + case OpFunction: + { + uint32_t res = ops[0]; + uint32_t id = ops[1]; + // Control + uint32_t type = ops[3]; + + if (current_function) + SPIRV_CROSS_THROW("Must end a function before starting a new one!"); + + current_function = &set(id, res, type); + break; + } + + case OpFunctionParameter: + { + uint32_t type = ops[0]; + uint32_t id = ops[1]; + + if (!current_function) + SPIRV_CROSS_THROW("Must be in a function!"); + + current_function->add_parameter(type, id); + set(id, type, StorageClassFunction); + break; + } + + case OpFunctionEnd: + { + if (current_block) + { + // Very specific error message, but seems to come up quite often. + SPIRV_CROSS_THROW( + "Cannot end a function before ending the current block.\n" + "Likely cause: If this SPIR-V was created from glslang HLSL, make sure the entry point is valid."); + } + current_function = nullptr; + break; + } + + // Blocks + case OpLabel: + { + // OpLabel always starts a block. + if (!current_function) + SPIRV_CROSS_THROW("Blocks cannot exist outside functions!"); + + uint32_t id = ops[0]; + + current_function->blocks.push_back(id); + if (!current_function->entry_block) + current_function->entry_block = id; + + if (current_block) + SPIRV_CROSS_THROW("Cannot start a block before ending the current block."); + + current_block = &set(id); + break; + } + + // Branch instructions end blocks. + case OpBranch: + { + if (!current_block) + SPIRV_CROSS_THROW("Trying to end a non-existing block."); + + uint32_t target = ops[0]; + current_block->terminator = SPIRBlock::Direct; + current_block->next_block = target; + current_block = nullptr; + break; + } + + case OpBranchConditional: + { + if (!current_block) + SPIRV_CROSS_THROW("Trying to end a non-existing block."); + + current_block->condition = ops[0]; + current_block->true_block = ops[1]; + current_block->false_block = ops[2]; + + current_block->terminator = SPIRBlock::Select; + current_block = nullptr; + break; + } + + case OpSwitch: + { + if (!current_block) + SPIRV_CROSS_THROW("Trying to end a non-existing block."); + + if (current_block->merge == SPIRBlock::MergeNone) + SPIRV_CROSS_THROW("Switch statement is not structured"); + + current_block->terminator = SPIRBlock::MultiSelect; + + current_block->condition = ops[0]; + current_block->default_block = ops[1]; + + for (uint32_t i = 2; i + 2 <= length; i += 2) + current_block->cases.push_back({ ops[i], ops[i + 1] }); + + // If we jump to next block, make it break instead since we're inside a switch case block at that point. + multiselect_merge_targets.insert(current_block->next_block); + + current_block = nullptr; + break; + } + + case OpKill: + { + if (!current_block) + SPIRV_CROSS_THROW("Trying to end a non-existing block."); + current_block->terminator = SPIRBlock::Kill; + current_block = nullptr; + break; + } + + case OpReturn: + { + if (!current_block) + SPIRV_CROSS_THROW("Trying to end a non-existing block."); + current_block->terminator = SPIRBlock::Return; + current_block = nullptr; + break; + } + + case OpReturnValue: + { + if (!current_block) + SPIRV_CROSS_THROW("Trying to end a non-existing block."); + current_block->terminator = SPIRBlock::Return; + current_block->return_value = ops[0]; + current_block = nullptr; + break; + } + + case OpUnreachable: + { + if (!current_block) + SPIRV_CROSS_THROW("Trying to end a non-existing block."); + current_block->terminator = SPIRBlock::Unreachable; + current_block = nullptr; + break; + } + + case OpSelectionMerge: + { + if (!current_block) + SPIRV_CROSS_THROW("Trying to modify a non-existing block."); + + current_block->next_block = ops[0]; + current_block->merge = SPIRBlock::MergeSelection; + selection_merge_targets.insert(current_block->next_block); + break; + } + + case OpLoopMerge: + { + if (!current_block) + SPIRV_CROSS_THROW("Trying to modify a non-existing block."); + + current_block->merge_block = ops[0]; + current_block->continue_block = ops[1]; + current_block->merge = SPIRBlock::MergeLoop; + + loop_blocks.insert(current_block->self); + loop_merge_targets.insert(current_block->merge_block); + + // Don't add loop headers to continue blocks, + // which would make it impossible branch into the loop header since + // they are treated as continues. + if (current_block->continue_block != current_block->self) + continue_blocks.insert(current_block->continue_block); + break; + } + + case OpSpecConstantOp: + { + if (length < 3) + SPIRV_CROSS_THROW("OpSpecConstantOp not enough arguments."); + + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + auto spec_op = static_cast(ops[2]); + + set(id, result_type, spec_op, ops + 3, length - 3); + break; + } + + // Actual opcodes. + default: + { + if (!current_block) + SPIRV_CROSS_THROW("Currently no block to insert opcode."); + + current_block->ops.push_back(instruction); + break; + } + } +} + +bool Compiler::block_is_loop_candidate(const SPIRBlock &block, SPIRBlock::Method method) const +{ + // Tried and failed. + if (block.disable_block_optimization || block.complex_continue) + return false; + + if (method == SPIRBlock::MergeToSelectForLoop) + { + // Try to detect common for loop pattern + // which the code backend can use to create cleaner code. + // for(;;) { if (cond) { some_body; } else { break; } } + // is the pattern we're looking for. + bool ret = block.terminator == SPIRBlock::Select && block.merge == SPIRBlock::MergeLoop && + block.true_block != block.merge_block && block.true_block != block.self && + block.false_block == block.merge_block; + + // If we have OpPhi which depends on branches which came from our own block, + // we need to flush phi variables in else block instead of a trivial break, + // so we cannot assume this is a for loop candidate. + if (ret) + { + for (auto &phi : block.phi_variables) + if (phi.parent == block.self) + return false; + + auto *merge = maybe_get(block.merge_block); + if (merge) + for (auto &phi : merge->phi_variables) + if (phi.parent == block.self) + return false; + } + return ret; + } + else if (method == SPIRBlock::MergeToDirectForLoop) + { + // Empty loop header that just sets up merge target + // and branches to loop body. + bool ret = block.terminator == SPIRBlock::Direct && block.merge == SPIRBlock::MergeLoop && block.ops.empty(); + + if (!ret) + return false; + + auto &child = get(block.next_block); + ret = child.terminator == SPIRBlock::Select && child.merge == SPIRBlock::MergeNone && + child.false_block == block.merge_block && child.true_block != block.merge_block && + child.true_block != block.self; + + // If we have OpPhi which depends on branches which came from our own block, + // we need to flush phi variables in else block instead of a trivial break, + // so we cannot assume this is a for loop candidate. + if (ret) + { + for (auto &phi : block.phi_variables) + if (phi.parent == block.self || phi.parent == child.self) + return false; + + for (auto &phi : child.phi_variables) + if (phi.parent == block.self) + return false; + + auto *merge = maybe_get(block.merge_block); + if (merge) + for (auto &phi : merge->phi_variables) + if (phi.parent == block.self || phi.parent == child.false_block) + return false; + } + + return ret; + } + else + return false; +} + +bool Compiler::block_is_outside_flow_control_from_block(const SPIRBlock &from, const SPIRBlock &to) +{ + auto *start = &from; + + if (start->self == to.self) + return true; + + // Break cycles. + if (is_continue(start->self)) + return false; + + // If our select block doesn't merge, we must break or continue in these blocks, + // so if continues occur branchless within these blocks, consider them branchless as well. + // This is typically used for loop control. + if (start->terminator == SPIRBlock::Select && start->merge == SPIRBlock::MergeNone && + (block_is_outside_flow_control_from_block(get(start->true_block), to) || + block_is_outside_flow_control_from_block(get(start->false_block), to))) + { + return true; + } + else if (start->merge_block && block_is_outside_flow_control_from_block(get(start->merge_block), to)) + { + return true; + } + else if (start->next_block && block_is_outside_flow_control_from_block(get(start->next_block), to)) + { + return true; + } + else + return false; +} + +bool Compiler::execution_is_noop(const SPIRBlock &from, const SPIRBlock &to) const +{ + if (!execution_is_branchless(from, to)) + return false; + + auto *start = &from; + for (;;) + { + if (start->self == to.self) + return true; + + if (!start->ops.empty()) + return false; + + start = &get(start->next_block); + } +} + +bool Compiler::execution_is_branchless(const SPIRBlock &from, const SPIRBlock &to) const +{ + auto *start = &from; + for (;;) + { + if (start->self == to.self) + return true; + + if (start->terminator == SPIRBlock::Direct && start->merge == SPIRBlock::MergeNone) + start = &get(start->next_block); + else + return false; + } +} + +SPIRBlock::ContinueBlockType Compiler::continue_block_type(const SPIRBlock &block) const +{ + // The block was deemed too complex during code emit, pick conservative fallback paths. + if (block.complex_continue) + return SPIRBlock::ComplexLoop; + + // In older glslang output continue block can be equal to the loop header. + // In this case, execution is clearly branchless, so just assume a while loop header here. + if (block.merge == SPIRBlock::MergeLoop) + return SPIRBlock::WhileLoop; + + auto &dominator = get(block.loop_dominator); + + if (execution_is_noop(block, dominator)) + return SPIRBlock::WhileLoop; + else if (execution_is_branchless(block, dominator)) + return SPIRBlock::ForLoop; + else + { + if (block.merge == SPIRBlock::MergeNone && block.terminator == SPIRBlock::Select && + block.true_block == dominator.self && block.false_block == dominator.merge_block) + { + return SPIRBlock::DoWhileLoop; + } + else + return SPIRBlock::ComplexLoop; + } +} + +bool Compiler::traverse_all_reachable_opcodes(const SPIRBlock &block, OpcodeHandler &handler) const +{ + handler.set_current_block(block); + + // Ideally, perhaps traverse the CFG instead of all blocks in order to eliminate dead blocks, + // but this shouldn't be a problem in practice unless the SPIR-V is doing insane things like recursing + // inside dead blocks ... + for (auto &i : block.ops) + { + auto ops = stream(i); + auto op = static_cast(i.op); + + if (!handler.handle(op, ops, i.length)) + return false; + + if (op == OpFunctionCall) + { + auto &func = get(ops[2]); + if (handler.follow_function_call(func)) + { + if (!handler.begin_function_scope(ops, i.length)) + return false; + if (!traverse_all_reachable_opcodes(get(ops[2]), handler)) + return false; + if (!handler.end_function_scope(ops, i.length)) + return false; + } + } + } + + return true; +} + +bool Compiler::traverse_all_reachable_opcodes(const SPIRFunction &func, OpcodeHandler &handler) const +{ + for (auto block : func.blocks) + if (!traverse_all_reachable_opcodes(get(block), handler)) + return false; + + return true; +} + +uint32_t Compiler::type_struct_member_offset(const SPIRType &type, uint32_t index) const +{ + // Decoration must be set in valid SPIR-V, otherwise throw. + auto &dec = meta[type.self].members.at(index); + if (dec.decoration_flags & (1ull << DecorationOffset)) + return dec.offset; + else + SPIRV_CROSS_THROW("Struct member does not have Offset set."); +} + +uint32_t Compiler::type_struct_member_array_stride(const SPIRType &type, uint32_t index) const +{ + // Decoration must be set in valid SPIR-V, otherwise throw. + // ArrayStride is part of the array type not OpMemberDecorate. + auto &dec = meta[type.member_types[index]].decoration; + if (dec.decoration_flags & (1ull << DecorationArrayStride)) + return dec.array_stride; + else + SPIRV_CROSS_THROW("Struct member does not have ArrayStride set."); +} + +size_t Compiler::get_declared_struct_size(const SPIRType &type) const +{ + uint32_t last = uint32_t(type.member_types.size() - 1); + size_t offset = type_struct_member_offset(type, last); + size_t size = get_declared_struct_member_size(type, last); + return offset + size; +} + +size_t Compiler::get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const +{ + auto flags = get_member_decoration_mask(struct_type.self, index); + auto &type = get(struct_type.member_types[index]); + + if (type.basetype != SPIRType::Struct) + { + switch (type.basetype) + { + case SPIRType::Unknown: + case SPIRType::Void: + case SPIRType::Boolean: // Bools are purely logical, and cannot be used for externally visible types. + case SPIRType::AtomicCounter: + case SPIRType::Image: + case SPIRType::SampledImage: + case SPIRType::Sampler: + SPIRV_CROSS_THROW("Querying size for object with opaque size.\n"); + + default: + break; + } + + size_t component_size = type.width / 8; + unsigned vecsize = type.vecsize; + unsigned columns = type.columns; + + if (type.array.empty()) + { + // Vectors. + if (columns == 1) + return vecsize * component_size; + else + { + // Per SPIR-V spec, matrices must be tightly packed and aligned up for vec3 accesses. + if ((flags & (1ull << DecorationRowMajor)) && columns == 3) + columns = 4; + else if ((flags & (1ull << DecorationColMajor)) && vecsize == 3) + vecsize = 4; + + return vecsize * columns * component_size; + } + } + else + { + // For arrays, we can use ArrayStride to get an easy check. + return type_struct_member_array_stride(struct_type, index) * type.array.back(); + } + } + else + { + // Recurse. + uint32_t last = uint32_t(struct_type.member_types.size() - 1); + uint32_t offset = type_struct_member_offset(struct_type, last); + size_t size; + + // If we have an array of structs inside our struct, handle that with array strides instead. + auto &last_type = get(struct_type.member_types.back()); + if (last_type.array.empty()) + size = get_declared_struct_size(last_type); + else + size = type_struct_member_array_stride(struct_type, last) * last_type.array.back(); + return offset + size; + } +} + +bool Compiler::BufferAccessHandler::handle(Op opcode, const uint32_t *args, uint32_t length) +{ + if (opcode != OpAccessChain && opcode != OpInBoundsAccessChain) + return true; + + // Invalid SPIR-V. + if (length < 4) + return false; + + if (args[2] != id) + return true; + + // Don't bother traversing the entire access chain tree yet. + // If we access a struct member, assume we access the entire member. + uint32_t index = compiler.get(args[3]).scalar(); + + // Seen this index already. + if (seen.find(index) != end(seen)) + return true; + seen.insert(index); + + auto &type = compiler.expression_type(id); + uint32_t offset = compiler.type_struct_member_offset(type, index); + + size_t range; + // If we have another member in the struct, deduce the range by looking at the next member. + // This is okay since structs in SPIR-V can have padding, but Offset decoration must be + // monotonically increasing. + // Of course, this doesn't take into account if the SPIR-V for some reason decided to add + // very large amounts of padding, but that's not really a big deal. + if (index + 1 < type.member_types.size()) + { + range = compiler.type_struct_member_offset(type, index + 1) - offset; + } + else + { + // No padding, so just deduce it from the size of the member directly. + range = compiler.get_declared_struct_member_size(type, index); + } + + ranges.push_back({ index, offset, range }); + return true; +} + +std::vector Compiler::get_active_buffer_ranges(uint32_t id) const +{ + std::vector ranges; + BufferAccessHandler handler(*this, ranges, id); + traverse_all_reachable_opcodes(get(entry_point), handler); + return ranges; +} + +// Increase the number of IDs by the specified incremental amount. +// Returns the value of the first ID available for use in the expanded bound. +uint32_t Compiler::increase_bound_by(uint32_t incr_amount) +{ + auto curr_bound = ids.size(); + auto new_bound = curr_bound + incr_amount; + ids.resize(new_bound); + meta.resize(new_bound); + return uint32_t(curr_bound); +} + +bool Compiler::types_are_logically_equivalent(const SPIRType &a, const SPIRType &b) const +{ + if (a.basetype != b.basetype) + return false; + if (a.width != b.width) + return false; + if (a.vecsize != b.vecsize) + return false; + if (a.columns != b.columns) + return false; + if (a.array.size() != b.array.size()) + return false; + + size_t array_count = a.array.size(); + if (array_count && memcmp(a.array.data(), b.array.data(), array_count * sizeof(uint32_t)) != 0) + return false; + + if (a.basetype == SPIRType::Image || a.basetype == SPIRType::SampledImage) + { + if (memcmp(&a.image, &b.image, sizeof(SPIRType::Image)) != 0) + return false; + } + + if (a.member_types.size() != b.member_types.size()) + return false; + + size_t member_types = a.member_types.size(); + for (size_t i = 0; i < member_types; i++) + { + if (!types_are_logically_equivalent(get(a.member_types[i]), get(b.member_types[i]))) + return false; + } + + return true; +} + +uint64_t Compiler::get_execution_mode_mask() const +{ + return get_entry_point().flags; +} + +void Compiler::set_execution_mode(ExecutionMode mode, uint32_t arg0, uint32_t arg1, uint32_t arg2) +{ + auto &execution = get_entry_point(); + + execution.flags |= 1ull << mode; + switch (mode) + { + case ExecutionModeLocalSize: + execution.workgroup_size.x = arg0; + execution.workgroup_size.y = arg1; + execution.workgroup_size.z = arg2; + break; + + case ExecutionModeInvocations: + execution.invocations = arg0; + break; + + case ExecutionModeOutputVertices: + execution.output_vertices = arg0; + break; + + default: + break; + } +} + +void Compiler::unset_execution_mode(ExecutionMode mode) +{ + auto &execution = get_entry_point(); + execution.flags &= ~(1ull << mode); +} + +uint32_t Compiler::get_execution_mode_argument(spv::ExecutionMode mode, uint32_t index) const +{ + auto &execution = get_entry_point(); + switch (mode) + { + case ExecutionModeLocalSize: + switch (index) + { + case 0: + return execution.workgroup_size.x; + case 1: + return execution.workgroup_size.y; + case 2: + return execution.workgroup_size.z; + default: + return 0; + } + + case ExecutionModeInvocations: + return execution.invocations; + + case ExecutionModeOutputVertices: + return execution.output_vertices; + + default: + return 0; + } +} + +ExecutionModel Compiler::get_execution_model() const +{ + auto &execution = get_entry_point(); + return execution.model; +} + +void Compiler::set_remapped_variable_state(uint32_t id, bool remap_enable) +{ + get(id).remapped_variable = remap_enable; +} + +bool Compiler::get_remapped_variable_state(uint32_t id) const +{ + return get(id).remapped_variable; +} + +void Compiler::set_subpass_input_remapped_components(uint32_t id, uint32_t components) +{ + get(id).remapped_components = components; +} + +uint32_t Compiler::get_subpass_input_remapped_components(uint32_t id) const +{ + return get(id).remapped_components; +} + +void Compiler::inherit_expression_dependencies(uint32_t dst, uint32_t source_expression) +{ + auto &e = get(dst); + auto *s = maybe_get(source_expression); + if (!s) + return; + + auto &e_deps = e.expression_dependencies; + auto &s_deps = s->expression_dependencies; + + // If we depend on a expression, we also depend on all sub-dependencies from source. + e_deps.push_back(source_expression); + e_deps.insert(end(e_deps), begin(s_deps), end(s_deps)); + + // Eliminate duplicated dependencies. + e_deps.erase(unique(begin(e_deps), end(e_deps)), end(e_deps)); +} + +vector Compiler::get_entry_points() const +{ + vector entries; + for (auto &entry : entry_points) + entries.push_back(entry.second.name); + return entries; +} + +void Compiler::set_entry_point(const std::string &name) +{ + auto &entry = get_entry_point(name); + entry_point = entry.self; +} + +SPIREntryPoint &Compiler::get_entry_point(const std::string &name) +{ + auto itr = + find_if(begin(entry_points), end(entry_points), + [&](const std::pair &entry) -> bool { return entry.second.name == name; }); + + if (itr == end(entry_points)) + SPIRV_CROSS_THROW("Entry point does not exist."); + + return itr->second; +} + +const SPIREntryPoint &Compiler::get_entry_point(const std::string &name) const +{ + auto itr = + find_if(begin(entry_points), end(entry_points), + [&](const std::pair &entry) -> bool { return entry.second.name == name; }); + + if (itr == end(entry_points)) + SPIRV_CROSS_THROW("Entry point does not exist."); + + return itr->second; +} + +const SPIREntryPoint &Compiler::get_entry_point() const +{ + return entry_points.find(entry_point)->second; +} + +SPIREntryPoint &Compiler::get_entry_point() +{ + return entry_points.find(entry_point)->second; +} + +bool Compiler::interface_variable_exists_in_entry_point(uint32_t id) const +{ + auto &var = get(id); + if (var.storage != StorageClassInput && var.storage != StorageClassOutput) + SPIRV_CROSS_THROW("Only Input and Output variables are part of a shader linking interface."); + + // This is to avoid potential problems with very old glslang versions which did + // not emit input/output interfaces properly. + // We can assume they only had a single entry point, and single entry point + // shaders could easily be assumed to use every interface variable anyways. + if (entry_points.size() <= 1) + return true; + + auto &execution = get_entry_point(); + return find(begin(execution.interface_variables), end(execution.interface_variables), id) != + end(execution.interface_variables); +} + +void Compiler::CombinedImageSamplerHandler::push_remap_parameters(const SPIRFunction &func, const uint32_t *args, + uint32_t length) +{ + // If possible, pipe through a remapping table so that parameters know + // which variables they actually bind to in this scope. + unordered_map remapping; + for (uint32_t i = 0; i < length; i++) + remapping[func.arguments[i].id] = remap_parameter(args[i]); + parameter_remapping.push(move(remapping)); +} + +void Compiler::CombinedImageSamplerHandler::pop_remap_parameters() +{ + parameter_remapping.pop(); +} + +uint32_t Compiler::CombinedImageSamplerHandler::remap_parameter(uint32_t id) +{ + auto *var = compiler.maybe_get_backing_variable(id); + if (var) + id = var->self; + + if (parameter_remapping.empty()) + return id; + + auto &remapping = parameter_remapping.top(); + auto itr = remapping.find(id); + if (itr != end(remapping)) + return itr->second; + else + return id; +} + +bool Compiler::CombinedImageSamplerHandler::begin_function_scope(const uint32_t *args, uint32_t length) +{ + if (length < 3) + return false; + + auto &callee = compiler.get(args[2]); + args += 3; + length -= 3; + push_remap_parameters(callee, args, length); + functions.push(&callee); + return true; +} + +bool Compiler::CombinedImageSamplerHandler::end_function_scope(const uint32_t *args, uint32_t length) +{ + if (length < 3) + return false; + + auto &callee = compiler.get(args[2]); + args += 3; + length -= 3; + + // There are two types of cases we have to handle, + // a callee might call sampler2D(texture2D, sampler) directly where + // one or more parameters originate from parameters. + // Alternatively, we need to provide combined image samplers to our callees, + // and in this case we need to add those as well. + + pop_remap_parameters(); + + // Our callee has now been processed at least once. + // No point in doing it again. + callee.do_combined_parameters = false; + + auto ¶ms = functions.top()->combined_parameters; + functions.pop(); + if (functions.empty()) + return true; + + auto &caller = *functions.top(); + if (caller.do_combined_parameters) + { + for (auto ¶m : params) + { + uint32_t image_id = param.global_image ? param.image_id : args[param.image_id]; + uint32_t sampler_id = param.global_sampler ? param.sampler_id : args[param.sampler_id]; + + auto *i = compiler.maybe_get_backing_variable(image_id); + auto *s = compiler.maybe_get_backing_variable(sampler_id); + if (i) + image_id = i->self; + if (s) + sampler_id = s->self; + + register_combined_image_sampler(caller, image_id, sampler_id); + } + } + + return true; +} + +void Compiler::CombinedImageSamplerHandler::register_combined_image_sampler(SPIRFunction &caller, uint32_t image_id, + uint32_t sampler_id) +{ + // We now have a texture ID and a sampler ID which will either be found as a global + // or a parameter in our own function. If both are global, they will not need a parameter, + // otherwise, add it to our list. + SPIRFunction::CombinedImageSamplerParameter param = { + 0u, image_id, sampler_id, true, true, + }; + + auto texture_itr = find_if(begin(caller.arguments), end(caller.arguments), + [image_id](const SPIRFunction::Parameter &p) { return p.id == image_id; }); + auto sampler_itr = find_if(begin(caller.arguments), end(caller.arguments), + [sampler_id](const SPIRFunction::Parameter &p) { return p.id == sampler_id; }); + + if (texture_itr != end(caller.arguments)) + { + param.global_image = false; + param.image_id = uint32_t(texture_itr - begin(caller.arguments)); + } + + if (sampler_itr != end(caller.arguments)) + { + param.global_sampler = false; + param.sampler_id = uint32_t(sampler_itr - begin(caller.arguments)); + } + + if (param.global_image && param.global_sampler) + return; + + auto itr = find_if(begin(caller.combined_parameters), end(caller.combined_parameters), + [¶m](const SPIRFunction::CombinedImageSamplerParameter &p) { + return param.image_id == p.image_id && param.sampler_id == p.sampler_id && + param.global_image == p.global_image && param.global_sampler == p.global_sampler; + }); + + if (itr == end(caller.combined_parameters)) + { + uint32_t id = compiler.increase_bound_by(3); + auto type_id = id + 0; + auto ptr_type_id = id + 1; + auto combined_id = id + 2; + auto &base = compiler.expression_type(image_id); + auto &type = compiler.set(type_id); + auto &ptr_type = compiler.set(ptr_type_id); + + type = base; + type.self = type_id; + type.basetype = SPIRType::SampledImage; + type.pointer = false; + type.storage = StorageClassGeneric; + + ptr_type = type; + ptr_type.pointer = true; + ptr_type.storage = StorageClassUniformConstant; + + // Build new variable. + compiler.set(combined_id, ptr_type_id, StorageClassFunction, 0); + + // Inherit RelaxedPrecision (and potentially other useful flags if deemed relevant). + auto &new_flags = compiler.meta[combined_id].decoration.decoration_flags; + auto old_flags = compiler.meta[sampler_id].decoration.decoration_flags; + new_flags = old_flags & (1ull << DecorationRelaxedPrecision); + + param.id = combined_id; + + compiler.set_name(combined_id, + join("SPIRV_Cross_Combined", compiler.to_name(image_id), compiler.to_name(sampler_id))); + + caller.combined_parameters.push_back(param); + caller.shadow_arguments.push_back({ ptr_type_id, combined_id, 0u, 0u }); + } +} + +bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *args, uint32_t length) +{ + // We need to figure out where samplers and images are loaded from, so do only the bare bones compilation we need. + switch (opcode) + { + case OpLoad: + { + if (length < 3) + return false; + + uint32_t result_type = args[0]; + + auto &type = compiler.get(result_type); + bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1; + bool separate_sampler = type.basetype == SPIRType::Sampler; + + // If not separate image or sampler, don't bother. + if (!separate_image && !separate_sampler) + return true; + + uint32_t id = args[1]; + uint32_t ptr = args[2]; + compiler.set(id, "", result_type, true); + compiler.register_read(id, ptr, true); + return true; + } + + case OpInBoundsAccessChain: + case OpAccessChain: + { + if (length < 3) + return false; + + // Technically, it is possible to have arrays of textures and arrays of samplers and combine them, but this becomes essentially + // impossible to implement, since we don't know which concrete sampler we are accessing. + // One potential way is to create a combinatorial explosion where N textures and M samplers are combined into N * M sampler2Ds, + // but this seems ridiculously complicated for a problem which is easy to work around. + // Checking access chains like this assumes we don't have samplers or textures inside uniform structs, but this makes no sense. + + auto &type = compiler.get(args[0]); + bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1; + bool separate_sampler = type.basetype == SPIRType::Sampler; + if (separate_image) + SPIRV_CROSS_THROW( + "Attempting to use arrays of separate images. This is not possible to statically remap to plain GLSL."); + if (separate_sampler) + SPIRV_CROSS_THROW("Attempting to use arrays of separate samplers. This is not possible to statically " + "remap to plain GLSL."); + return true; + } + + case OpSampledImage: + // Do it outside. + break; + + default: + return true; + } + + if (length < 4) + return false; + + // Registers sampler2D calls used in case they are parameters so + // that their callees know which combined image samplers to propagate down the call stack. + if (!functions.empty()) + { + auto &callee = *functions.top(); + if (callee.do_combined_parameters) + { + uint32_t image_id = args[2]; + + auto *image = compiler.maybe_get_backing_variable(image_id); + if (image) + image_id = image->self; + + uint32_t sampler_id = args[3]; + auto *sampler = compiler.maybe_get_backing_variable(sampler_id); + if (sampler) + sampler_id = sampler->self; + + register_combined_image_sampler(callee, image_id, sampler_id); + } + } + + // For function calls, we need to remap IDs which are function parameters into global variables. + // This information is statically known from the current place in the call stack. + // Function parameters are not necessarily pointers, so if we don't have a backing variable, remapping will know + // which backing variable the image/sample came from. + uint32_t image_id = remap_parameter(args[2]); + uint32_t sampler_id = remap_parameter(args[3]); + + auto itr = find_if(begin(compiler.combined_image_samplers), end(compiler.combined_image_samplers), + [image_id, sampler_id](const CombinedImageSampler &combined) { + return combined.image_id == image_id && combined.sampler_id == sampler_id; + }); + + if (itr == end(compiler.combined_image_samplers)) + { + auto id = compiler.increase_bound_by(2); + auto type_id = id + 0; + auto combined_id = id + 1; + auto sampled_type = args[0]; + + // Make a new type, pointer to OpTypeSampledImage, so we can make a variable of this type. + // We will probably have this type lying around, but it doesn't hurt to make duplicates for internal purposes. + auto &type = compiler.set(type_id); + auto &base = compiler.get(sampled_type); + type = base; + type.pointer = true; + type.storage = StorageClassUniformConstant; + + // Build new variable. + compiler.set(combined_id, type_id, StorageClassUniformConstant, 0); + + // Inherit RelaxedPrecision (and potentially other useful flags if deemed relevant). + auto &new_flags = compiler.meta[combined_id].decoration.decoration_flags; + auto old_flags = compiler.meta[sampler_id].decoration.decoration_flags; + new_flags = old_flags & (1ull << DecorationRelaxedPrecision); + + compiler.combined_image_samplers.push_back({ combined_id, image_id, sampler_id }); + } + + return true; +} + +void Compiler::build_combined_image_samplers() +{ + for (auto &id : ids) + { + if (id.get_type() == TypeFunction) + { + auto &func = id.get(); + func.combined_parameters.clear(); + func.shadow_arguments.clear(); + func.do_combined_parameters = true; + } + } + + combined_image_samplers.clear(); + CombinedImageSamplerHandler handler(*this); + traverse_all_reachable_opcodes(get(entry_point), handler); +} + +vector Compiler::get_specialization_constants() const +{ + vector spec_consts; + for (auto &id : ids) + { + if (id.get_type() == TypeConstant) + { + auto &c = id.get(); + if (c.specialization) + { + spec_consts.push_back({ c.self, get_decoration(c.self, DecorationSpecId) }); + } + } + } + return spec_consts; +} + +SPIRConstant &Compiler::get_constant(uint32_t id) +{ + return get(id); +} + +const SPIRConstant &Compiler::get_constant(uint32_t id) const +{ + return get(id); +} + +void Compiler::analyze_variable_scope(SPIRFunction &entry) +{ + struct AccessHandler : OpcodeHandler + { + public: + AccessHandler(Compiler &compiler_) + : compiler(compiler_) + { + } + + bool follow_function_call(const SPIRFunction &) + { + // Only analyze within this function. + return false; + } + + void set_current_block(const SPIRBlock &block) + { + current_block = █ + + // If we're branching to a block which uses OpPhi, in GLSL + // this will be a variable write when we branch, + // so we need to track access to these variables as well to + // have a complete picture. + const auto test_phi = [this, &block](uint32_t to) { + auto &next = compiler.get(to); + for (auto &phi : next.phi_variables) + if (phi.parent == block.self) + accessed_variables_to_block[phi.function_variable].insert(block.self); + }; + + switch (block.terminator) + { + case SPIRBlock::Direct: + test_phi(block.next_block); + break; + + case SPIRBlock::Select: + test_phi(block.true_block); + test_phi(block.false_block); + break; + + case SPIRBlock::MultiSelect: + for (auto &target : block.cases) + test_phi(target.block); + if (block.default_block) + test_phi(block.default_block); + break; + + default: + break; + } + } + + bool handle(spv::Op op, const uint32_t *args, uint32_t length) + { + switch (op) + { + case OpStore: + { + if (length < 2) + return false; + + uint32_t ptr = args[0]; + auto *var = compiler.maybe_get_backing_variable(ptr); + if (var && var->storage == StorageClassFunction) + accessed_variables_to_block[var->self].insert(current_block->self); + break; + } + + case OpAccessChain: + case OpInBoundsAccessChain: + { + if (length < 3) + return false; + + uint32_t ptr = args[2]; + auto *var = compiler.maybe_get(ptr); + if (var && var->storage == StorageClassFunction) + accessed_variables_to_block[var->self].insert(current_block->self); + break; + } + + case OpCopyMemory: + { + if (length < 3) + return false; + + uint32_t lhs = args[0]; + uint32_t rhs = args[1]; + auto *var = compiler.maybe_get_backing_variable(lhs); + if (var && var->storage == StorageClassFunction) + accessed_variables_to_block[var->self].insert(current_block->self); + + var = compiler.maybe_get_backing_variable(rhs); + if (var && var->storage == StorageClassFunction) + accessed_variables_to_block[var->self].insert(current_block->self); + break; + } + + case OpCopyObject: + { + if (length < 3) + return false; + + auto *var = compiler.maybe_get_backing_variable(args[2]); + if (var && var->storage == StorageClassFunction) + accessed_variables_to_block[var->self].insert(current_block->self); + break; + } + + case OpLoad: + { + if (length < 3) + return false; + uint32_t ptr = args[2]; + auto *var = compiler.maybe_get_backing_variable(ptr); + if (var && var->storage == StorageClassFunction) + accessed_variables_to_block[var->self].insert(current_block->self); + break; + } + + case OpFunctionCall: + { + if (length < 3) + return false; + + length -= 3; + args += 3; + for (uint32_t i = 0; i < length; i++) + { + auto *var = compiler.maybe_get_backing_variable(args[i]); + if (var && var->storage == StorageClassFunction) + accessed_variables_to_block[var->self].insert(current_block->self); + } + break; + } + + case OpPhi: + { + if (length < 2) + return false; + + // Phi nodes are implemented as function variables, so register an access here. + accessed_variables_to_block[args[1]].insert(current_block->self); + break; + } + + // Atomics shouldn't be able to access function-local variables. + // Some GLSL builtins access a pointer. + + default: + break; + } + return true; + } + + Compiler &compiler; + std::unordered_map> accessed_variables_to_block; + const SPIRBlock *current_block = nullptr; + } handler(*this); + + // First, we map out all variable access within a function. + // Essentially a map of block -> { variables accessed in the basic block } + this->traverse_all_reachable_opcodes(entry, handler); + + // Compute the control flow graph for this function. + CFG cfg(*this, entry); + + unordered_map potential_loop_variables; + + // For each variable which is statically accessed. + for (auto &var : handler.accessed_variables_to_block) + { + DominatorBuilder builder(cfg); + auto &blocks = var.second; + auto &type = expression_type(var.first); + + // Figure out which block is dominating all accesses of those variables. + for (auto &block : blocks) + { + // If we're accessing a variable inside a continue block, this variable might be a loop variable. + // We can only use loop variables with scalars, as we cannot track static expressions for vectors. + if (is_continue(block) && type.vecsize == 1 && type.columns == 1) + { + // The variable is used in multiple continue blocks, this is not a loop + // candidate, signal that by setting block to -1u. + auto &potential = potential_loop_variables[var.first]; + + if (potential == 0) + potential = block; + else + potential = -1u; + } + builder.add_block(block); + } + + builder.lift_continue_block_dominator(); + + // Add it to a per-block list of variables. + uint32_t dominating_block = builder.get_dominator(); + // If all blocks here are dead code, this will be 0, so the variable in question + // will be completely eliminated. + if (dominating_block) + { + auto &block = this->get(dominating_block); + block.dominated_variables.push_back(var.first); + get(var.first).dominator = dominating_block; + } + } + + // Now, try to analyze whether or not these variables are actually loop variables. + for (auto &loop_variable : potential_loop_variables) + { + auto &var = get(loop_variable.first); + auto dominator = var.dominator; + auto block = loop_variable.second; + + // The variable was accessed in multiple continue blocks, ignore. + if (block == -1u || block == 0) + continue; + + // Dead code. + if (dominator == 0) + continue; + + uint32_t header = 0; + + // Find the loop header for this block. + for (auto b : loop_blocks) + { + auto &potential_header = get(b); + if (potential_header.continue_block == block) + { + header = b; + break; + } + } + + assert(header); + auto &header_block = get(header); + + // Now, there are two conditions we need to meet for the variable to be a loop variable. + // 1. The dominating block must have a branch-free path to the loop header, + // this way we statically know which expression should be part of the loop variable initializer. + + // Walk from the dominator, if there is one straight edge connecting + // dominator and loop header, we statically know the loop initializer. + bool static_loop_init = true; + while (dominator != header) + { + auto &succ = cfg.get_succeeding_edges(dominator); + if (succ.size() != 1) + { + static_loop_init = false; + break; + } + + auto &pred = cfg.get_preceding_edges(succ.front()); + if (pred.size() != 1 || pred.front() != dominator) + { + static_loop_init = false; + break; + } + + dominator = succ.front(); + } + + if (!static_loop_init) + continue; + + // The second condition we need to meet is that no access after the loop + // merge can occur. Walk the CFG to see if we find anything. + auto &blocks = handler.accessed_variables_to_block[loop_variable.first]; + cfg.walk_from(header_block.merge_block, [&](uint32_t walk_block) { + // We found a block which accesses the variable outside the loop. + if (blocks.find(walk_block) != end(blocks)) + static_loop_init = false; + }); + + if (!static_loop_init) + continue; + + // We have a loop variable. + header_block.loop_variables.push_back(loop_variable.first); + // Need to sort here as variables come from an unordered container, and pushing stuff in wrong order + // will break reproducability in regression runs. + sort(begin(header_block.loop_variables), end(header_block.loop_variables)); + get(loop_variable.first).loop_variable = true; + } +} diff --git a/spirv_cross.hpp b/spirv_cross.hpp new file mode 100644 index 0000000000..5ba62585fc --- /dev/null +++ b/spirv_cross.hpp @@ -0,0 +1,584 @@ +/* + * Copyright 2015-2016 ARM Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SPIRV_CROSS_HPP +#define SPIRV_CROSS_HPP + +#include "spirv.hpp" +#include +#include +#include +#include +#include +#include +#include +#include + +#include "spirv_common.hpp" + +namespace spirv_cross +{ +struct Resource +{ + // Resources are identified with their SPIR-V ID. + // This is the ID of the OpVariable. + uint32_t id; + + // The type ID of the variable which includes arrays and all type modifications. + // This type ID is not suitable for parsing OpMemberDecoration of a struct and other decorations in general + // since these modifications typically happen on the base_type_id. + uint32_t type_id; + + // The base type of the declared resource. + // This type is the base type which ignores pointers and arrays of the type_id. + // This is mostly useful to parse decorations of the underlying type. + // base_type_id can also be obtained with get_type(get_type(type_id).self). + uint32_t base_type_id; + + // The declared name (OpName) of the resource. + // For Buffer blocks, the name actually reflects the externally + // visible Block name. + // + // This name can be retrieved again by using either + // get_name(id) or get_name(base_type_id) depending if it's a buffer block or not. + // + // This name can be an empty string in which case get_fallback_name(id) can be + // used which obtains a suitable fallback identifier for an ID. + std::string name; +}; + +struct ShaderResources +{ + std::vector uniform_buffers; + std::vector storage_buffers; + std::vector stage_inputs; + std::vector stage_outputs; + std::vector subpass_inputs; + std::vector storage_images; + std::vector sampled_images; + std::vector atomic_counters; + + // There can only be one push constant block, + // but keep the vector in case this restriction is lifted in the future. + std::vector push_constant_buffers; + + // For Vulkan GLSL and HLSL source, + // these correspond to separate texture2D and samplers respectively. + std::vector separate_images; + std::vector separate_samplers; +}; + +struct CombinedImageSampler +{ + // The ID of the sampler2D variable. + uint32_t combined_id; + // The ID of the texture2D variable. + uint32_t image_id; + // The ID of the sampler variable. + uint32_t sampler_id; +}; + +struct SpecializationConstant +{ + // The ID of the specialization constant. + uint32_t id; + // The constant ID of the constant, used in Vulkan during pipeline creation. + uint32_t constant_id; +}; + +struct BufferRange +{ + unsigned index; + size_t offset; + size_t range; +}; + +class Compiler +{ +public: + friend class CFG; + friend class DominatorBuilder; + + // The constructor takes a buffer of SPIR-V words and parses it. + Compiler(std::vector ir); + + virtual ~Compiler() = default; + + // After parsing, API users can modify the SPIR-V via reflection and call this + // to disassemble the SPIR-V into the desired langauage. + // Sub-classes actually implement this. + virtual std::string compile(); + + // Gets the identifier (OpName) of an ID. If not defined, an empty string will be returned. + const std::string &get_name(uint32_t id) const; + + // Applies a decoration to an ID. Effectively injects OpDecorate. + void set_decoration(uint32_t id, spv::Decoration decoration, uint32_t argument = 0); + + // Overrides the identifier OpName of an ID. + // Identifiers beginning with underscores or identifiers which contain double underscores + // are reserved by the implementation. + void set_name(uint32_t id, const std::string &name); + + // Gets a bitmask for the decorations which are applied to ID. + // I.e. (1ull << spv::DecorationFoo) | (1ull << spv::DecorationBar) + uint64_t get_decoration_mask(uint32_t id) const; + + // Gets the value for decorations which take arguments. + // If decoration doesn't exist or decoration is not recognized, + // 0 will be returned. + uint32_t get_decoration(uint32_t id, spv::Decoration decoration) const; + + // Removes the decoration for a an ID. + void unset_decoration(uint32_t id, spv::Decoration decoration); + + // Gets the SPIR-V associated with ID. + // Mostly used with Resource::type_id and Resource::base_type_id to parse the underlying type of a resource. + const SPIRType &get_type(uint32_t id) const; + + // Gets the underlying storage class for an OpVariable. + spv::StorageClass get_storage_class(uint32_t id) const; + + // If get_name() is an empty string, get the fallback name which will be used + // instead in the disassembled source. + virtual const std::string get_fallback_name(uint32_t id) const + { + return join("_", id); + } + + // Given an OpTypeStruct in ID, obtain the identifier for member number "index". + // This may be an empty string. + const std::string &get_member_name(uint32_t id, uint32_t index) const; + + // Given an OpTypeStruct in ID, obtain the OpMemberDecoration for member number "index". + uint32_t get_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration) const; + + // Sets the member identifier for OpTypeStruct ID, member number "index". + void set_member_name(uint32_t id, uint32_t index, const std::string &name); + + // Sets the qualified member identifier for OpTypeStruct ID, member number "index". + void set_member_qualified_name(uint32_t id, uint32_t index, const std::string &name); + + // Gets the decoration mask for a member of a struct, similar to get_decoration_mask. + uint64_t get_member_decoration_mask(uint32_t id, uint32_t index) const; + + // Similar to set_decoration, but for struct members. + void set_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration, uint32_t argument = 0); + + // Unsets a member decoration, similar to unset_decoration. + void unset_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration); + + // Gets the fallback name for a member, similar to get_fallback_name. + virtual const std::string get_fallback_member_name(uint32_t index) const + { + return join("_", index); + } + + // Returns a vector of which members of a struct are potentially in use by a + // SPIR-V shader. The granularity of this analysis is per-member of a struct. + // This can be used for Buffer (UBO), BufferBlock (SSBO) and PushConstant blocks. + // ID is the Resource::id obtained from get_shader_resources(). + std::vector get_active_buffer_ranges(uint32_t id) const; + + // Returns the effective size of a buffer block. + size_t get_declared_struct_size(const SPIRType &struct_type) const; + + // Returns the effective size of a buffer block struct member. + virtual size_t get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const; + + // Legacy GLSL compatibility method. + // Takes a variable with a block interface and flattens it into a T array[N]; array instead. + // For this to work, all types in the block must not themselves be composites + // (except vectors and matrices), and all types must be the same. + // The name of the uniform will be the same as the interface block name. + void flatten_interface_block(uint32_t id); + + // Returns a set of all global variables which are statically accessed + // by the control flow graph from the current entry point. + // Only variables which change the interface for a shader are returned, that is, + // variables with storage class of Input, Output, Uniform, UniformConstant, PushConstant and AtomicCounter + // storage classes are returned. + // + // To use the returned set as the filter for which variables are used during compilation, + // this set can be moved to set_enabled_interface_variables(). + std::unordered_set get_active_interface_variables() const; + + // Sets the interface variables which are used during compilation. + // By default, all variables are used. + // Once set, compile() will only consider the set in active_variables. + void set_enabled_interface_variables(std::unordered_set active_variables); + + // Query shader resources, use ids with reflection interface to modify or query binding points, etc. + ShaderResources get_shader_resources() const; + + // Query shader resources, but only return the variables which are part of active_variables. + // E.g.: get_shader_resources(get_active_variables()) to only return the variables which are statically + // accessed. + ShaderResources get_shader_resources(const std::unordered_set &active_variables) const; + + // Remapped variables are considered built-in variables and a backend will + // not emit a declaration for this variable. + // This is mostly useful for making use of builtins which are dependent on extensions. + void set_remapped_variable_state(uint32_t id, bool remap_enable); + bool get_remapped_variable_state(uint32_t id) const; + + // For subpassInput variables which are remapped to plain variables, + // the number of components in the remapped + // variable must be specified as the backing type of subpass inputs are opaque. + void set_subpass_input_remapped_components(uint32_t id, uint32_t components); + uint32_t get_subpass_input_remapped_components(uint32_t id) const; + + // All operations work on the current entry point. + // Entry points can be swapped out with set_entry_point(). + // Entry points should be set right after the constructor completes as some reflection functions traverse the graph from the entry point. + // Resource reflection also depends on the entry point. + // By default, the current entry point is set to the first OpEntryPoint which appears in the SPIR-V module. + std::vector get_entry_points() const; + void set_entry_point(const std::string &name); + + // Returns the internal data structure for entry points to allow poking around. + const SPIREntryPoint &get_entry_point(const std::string &name) const; + SPIREntryPoint &get_entry_point(const std::string &name); + + // Query and modify OpExecutionMode. + uint64_t get_execution_mode_mask() const; + void unset_execution_mode(spv::ExecutionMode mode); + void set_execution_mode(spv::ExecutionMode mode, uint32_t arg0 = 0, uint32_t arg1 = 0, uint32_t arg2 = 0); + + // Gets argument for an execution mode (LocalSize, Invocations, OutputVertices). + // For LocalSize, the index argument is used to select the dimension (X = 0, Y = 1, Z = 2). + // For execution modes which do not have arguments, 0 is returned. + uint32_t get_execution_mode_argument(spv::ExecutionMode mode, uint32_t index = 0) const; + spv::ExecutionModel get_execution_model() const; + + // Analyzes all separate image and samplers used from the currently selected entry point, + // and re-routes them all to a combined image sampler instead. + // This is required to "support" separate image samplers in targets which do not natively support + // this feature, like GLSL/ESSL. + // + // This must be called before compile() if such remapping is desired. + // This call will add new sampled images to the SPIR-V, + // so it will appear in reflection if get_shader_resources() is called after build_combined_image_samplers. + // + // If any image/sampler remapping was found, no separate image/samplers will appear in the decompiled output, + // but will still appear in reflection. + // + // The resulting samplers will be void of any decorations like name, descriptor sets and binding points, + // so this can be added before compile() if desired. + // + // Combined image samplers originating from this set are always considered active variables. + void build_combined_image_samplers(); + + // Gets a remapping for the combined image samplers. + const std::vector &get_combined_image_samplers() const + { + return combined_image_samplers; + } + + // Set a new variable type remap callback. + // The type remapping is designed to allow global interface variable to assume more special types. + // A typical example here is to remap sampler2D into samplerExternalOES, which currently isn't supported + // directly by SPIR-V. + // + // In compile() while emitting code, + // for every variable that is declared, including function parameters, the callback will be called + // and the API user has a chance to change the textual representation of the type used to declare the variable. + // The API user can detect special patterns in names to guide the remapping. + void set_variable_type_remap_callback(VariableTypeRemapCallback cb) + { + variable_remap_callback = std::move(cb); + } + + // API for querying which specialization constants exist. + // To modify a specialization constant before compile(), use get_constant(constant.id), + // then update constants directly in the SPIRConstant data structure. + // For composite types, the subconstants can be iterated over and modified. + // constant_type is the SPIRType for the specialization constant, + // which can be queried to determine which fields in the unions should be poked at. + std::vector get_specialization_constants() const; + SPIRConstant &get_constant(uint32_t id); + const SPIRConstant &get_constant(uint32_t id) const; + + uint32_t get_current_id_bound() const + { + return uint32_t(ids.size()); + } + +protected: + const uint32_t *stream(const Instruction &instr) const + { + // If we're not going to use any arguments, just return nullptr. + // We want to avoid case where we return an out of range pointer + // that trips debug assertions on some platforms. + if (!instr.length) + return nullptr; + + if (instr.offset + instr.length > spirv.size()) + SPIRV_CROSS_THROW("Compiler::stream() out of range."); + return &spirv[instr.offset]; + } + std::vector spirv; + + std::vector inst; + std::vector ids; + std::vector meta; + + SPIRFunction *current_function = nullptr; + SPIRBlock *current_block = nullptr; + std::vector global_variables; + std::vector aliased_variables; + std::unordered_set active_interface_variables; + bool check_active_interface_variables = false; + + // If our IDs are out of range here as part of opcodes, throw instead of + // undefined behavior. + template + T &set(uint32_t id, P &&... args) + { + auto &var = variant_set(ids.at(id), std::forward

(args)...); + var.self = id; + return var; + } + + template + T &get(uint32_t id) + { + return variant_get(ids.at(id)); + } + + template + T *maybe_get(uint32_t id) + { + if (ids.at(id).get_type() == T::type) + return &get(id); + else + return nullptr; + } + + template + const T &get(uint32_t id) const + { + return variant_get(ids.at(id)); + } + + template + const T *maybe_get(uint32_t id) const + { + if (ids.at(id).get_type() == T::type) + return &get(id); + else + return nullptr; + } + + uint32_t entry_point = 0; + // Normally, we'd stick SPIREntryPoint in ids array, but it conflicts with SPIRFunction. + // Entry points can therefore be seen as some sort of meta structure. + std::unordered_map entry_points; + const SPIREntryPoint &get_entry_point() const; + SPIREntryPoint &get_entry_point(); + + struct Source + { + uint32_t version = 0; + bool es = false; + bool known = false; + + Source() = default; + } source; + + std::unordered_set loop_blocks; + std::unordered_set continue_blocks; + std::unordered_set loop_merge_targets; + std::unordered_set selection_merge_targets; + std::unordered_set multiselect_merge_targets; + + virtual std::string to_name(uint32_t id, bool allow_alias = true); + bool is_builtin_variable(const SPIRVariable &var) const; + bool is_hidden_variable(const SPIRVariable &var, bool include_builtins = false) const; + bool is_immutable(uint32_t id) const; + bool is_member_builtin(const SPIRType &type, uint32_t index, spv::BuiltIn *builtin) const; + bool is_scalar(const SPIRType &type) const; + bool is_vector(const SPIRType &type) const; + bool is_matrix(const SPIRType &type) const; + const SPIRType &expression_type(uint32_t id) const; + bool expression_is_lvalue(uint32_t id) const; + bool variable_storage_is_aliased(const SPIRVariable &var); + SPIRVariable *maybe_get_backing_variable(uint32_t chain); + + void register_read(uint32_t expr, uint32_t chain, bool forwarded); + void register_write(uint32_t chain); + + inline bool is_continue(uint32_t next) const + { + return continue_blocks.find(next) != end(continue_blocks); + } + + inline bool is_break(uint32_t next) const + { + return loop_merge_targets.find(next) != end(loop_merge_targets) || + multiselect_merge_targets.find(next) != end(multiselect_merge_targets); + } + + inline bool is_conditional(uint32_t next) const + { + return selection_merge_targets.find(next) != end(selection_merge_targets) && + multiselect_merge_targets.find(next) == end(multiselect_merge_targets); + } + + // Dependency tracking for temporaries read from variables. + void flush_dependees(SPIRVariable &var); + void flush_all_active_variables(); + void flush_all_atomic_capable_variables(); + void flush_all_aliased_variables(); + void register_global_read_dependencies(const SPIRBlock &func, uint32_t id); + void register_global_read_dependencies(const SPIRFunction &func, uint32_t id); + std::unordered_set invalid_expressions; + + void update_name_cache(std::unordered_set &cache, std::string &name); + + bool function_is_pure(const SPIRFunction &func); + bool block_is_pure(const SPIRBlock &block); + bool block_is_outside_flow_control_from_block(const SPIRBlock &from, const SPIRBlock &to); + + bool execution_is_branchless(const SPIRBlock &from, const SPIRBlock &to) const; + bool execution_is_noop(const SPIRBlock &from, const SPIRBlock &to) const; + SPIRBlock::ContinueBlockType continue_block_type(const SPIRBlock &continue_block) const; + + bool force_recompile = false; + + uint32_t type_struct_member_offset(const SPIRType &type, uint32_t index) const; + uint32_t type_struct_member_array_stride(const SPIRType &type, uint32_t index) const; + + bool block_is_loop_candidate(const SPIRBlock &block, SPIRBlock::Method method) const; + + uint32_t increase_bound_by(uint32_t incr_amount); + + bool types_are_logically_equivalent(const SPIRType &a, const SPIRType &b) const; + void inherit_expression_dependencies(uint32_t dst, uint32_t source); + + // For proper multiple entry point support, allow querying if an Input or Output + // variable is part of that entry points interface. + bool interface_variable_exists_in_entry_point(uint32_t id) const; + + std::vector combined_image_samplers; + + void remap_variable_type_name(const SPIRType &type, const std::string &var_name, std::string &type_name) const + { + if (variable_remap_callback) + variable_remap_callback(type, var_name, type_name); + } + + void analyze_variable_scope(SPIRFunction &function); + +private: + void parse(); + void parse(const Instruction &i); + + // Used internally to implement various traversals for queries. + struct OpcodeHandler + { + virtual ~OpcodeHandler() = default; + + // Return true if traversal should continue. + // If false, traversal will end immediately. + virtual bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) = 0; + + virtual bool follow_function_call(const SPIRFunction &) + { + return true; + } + + virtual void set_current_block(const SPIRBlock &) + { + } + + virtual bool begin_function_scope(const uint32_t *, uint32_t) + { + return true; + } + + virtual bool end_function_scope(const uint32_t *, uint32_t) + { + return true; + } + }; + + struct BufferAccessHandler : OpcodeHandler + { + BufferAccessHandler(const Compiler &compiler_, std::vector &ranges_, uint32_t id_) + : compiler(compiler_) + , ranges(ranges_) + , id(id_) + { + } + + bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) override; + + const Compiler &compiler; + std::vector &ranges; + uint32_t id; + + std::unordered_set seen; + }; + + struct InterfaceVariableAccessHandler : OpcodeHandler + { + InterfaceVariableAccessHandler(const Compiler &compiler_, std::unordered_set &variables_) + : compiler(compiler_) + , variables(variables_) + { + } + + bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) override; + + const Compiler &compiler; + std::unordered_set &variables; + }; + + struct CombinedImageSamplerHandler : OpcodeHandler + { + CombinedImageSamplerHandler(Compiler &compiler_) + : compiler(compiler_) + { + } + bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) override; + bool begin_function_scope(const uint32_t *args, uint32_t length) override; + bool end_function_scope(const uint32_t *args, uint32_t length) override; + + Compiler &compiler; + + // Each function in the call stack needs its own remapping for parameters so we can deduce which global variable each texture/sampler the parameter is statically bound to. + std::stack> parameter_remapping; + std::stack functions; + + uint32_t remap_parameter(uint32_t id); + void push_remap_parameters(const SPIRFunction &func, const uint32_t *args, uint32_t length); + void pop_remap_parameters(); + void register_combined_image_sampler(SPIRFunction &caller, uint32_t texture_id, uint32_t sampler_id); + }; + + bool traverse_all_reachable_opcodes(const SPIRBlock &block, OpcodeHandler &handler) const; + bool traverse_all_reachable_opcodes(const SPIRFunction &block, OpcodeHandler &handler) const; + // This must be an ordered data structure so we always pick the same type aliases. + std::vector global_struct_cache; + + ShaderResources get_shader_resources(const std::unordered_set *active_variables) const; + + VariableTypeRemapCallback variable_remap_callback; +}; +} + +#endif diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp new file mode 100644 index 0000000000..3e501d12c1 --- /dev/null +++ b/spirv_glsl.cpp @@ -0,0 +1,6092 @@ +/* + * Copyright 2015-2016 ARM Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "spirv_glsl.hpp" +#include "GLSL.std.450.h" +#include +#include + +using namespace spv; +using namespace spirv_cross; +using namespace std; + +// Returns true if an arithmetic operation does not change behavior depending on signedness. +static bool opcode_is_sign_invariant(Op opcode) +{ + switch (opcode) + { + case OpIEqual: + case OpINotEqual: + case OpISub: + case OpIAdd: + case OpIMul: + case OpShiftLeftLogical: + case OpBitwiseOr: + case OpBitwiseXor: + case OpBitwiseAnd: + return true; + + default: + return false; + } +} + +static const char *to_pls_layout(PlsFormat format) +{ + switch (format) + { + case PlsR11FG11FB10F: + return "layout(r11f_g11f_b10f) "; + case PlsR32F: + return "layout(r32f) "; + case PlsRG16F: + return "layout(rg16f) "; + case PlsRGB10A2: + return "layout(rgb10_a2) "; + case PlsRGBA8: + return "layout(rgba8) "; + case PlsRG16: + return "layout(rg16) "; + case PlsRGBA8I: + return "layout(rgba8i)"; + case PlsRG16I: + return "layout(rg16i) "; + case PlsRGB10A2UI: + return "layout(rgb10_a2ui) "; + case PlsRGBA8UI: + return "layout(rgba8ui) "; + case PlsRG16UI: + return "layout(rg16ui) "; + case PlsR32UI: + return "layout(r32ui) "; + default: + return ""; + } +} + +static SPIRType::BaseType pls_format_to_basetype(PlsFormat format) +{ + switch (format) + { + default: + case PlsR11FG11FB10F: + case PlsR32F: + case PlsRG16F: + case PlsRGB10A2: + case PlsRGBA8: + case PlsRG16: + return SPIRType::Float; + + case PlsRGBA8I: + case PlsRG16I: + return SPIRType::Int; + + case PlsRGB10A2UI: + case PlsRGBA8UI: + case PlsRG16UI: + case PlsR32UI: + return SPIRType::UInt; + } +} + +static uint32_t pls_format_to_components(PlsFormat format) +{ + switch (format) + { + default: + case PlsR32F: + case PlsR32UI: + return 1; + + case PlsRG16F: + case PlsRG16: + case PlsRG16UI: + case PlsRG16I: + return 2; + + case PlsR11FG11FB10F: + return 3; + + case PlsRGB10A2: + case PlsRGBA8: + case PlsRGBA8I: + case PlsRGB10A2UI: + case PlsRGBA8UI: + return 4; + } +} + +void CompilerGLSL::reset() +{ + // We do some speculative optimizations which should pretty much always work out, + // but just in case the SPIR-V is rather weird, recompile until it's happy. + // This typically only means one extra pass. + force_recompile = false; + + // Clear invalid expression tracking. + invalid_expressions.clear(); + current_function = nullptr; + + // Clear temporary usage tracking. + expression_usage_counts.clear(); + forwarded_temporaries.clear(); + + resource_names.clear(); + + for (auto &id : ids) + { + if (id.get_type() == TypeVariable) + { + // Clear unflushed dependees. + id.get().dependees.clear(); + } + else if (id.get_type() == TypeExpression) + { + // And remove all expressions. + id.reset(); + } + else if (id.get_type() == TypeFunction) + { + // Reset active state for all functions. + id.get().active = false; + id.get().flush_undeclared = true; + } + } + + statement_count = 0; + indent = 0; +} + +void CompilerGLSL::remap_pls_variables() +{ + for (auto &input : pls_inputs) + { + auto &var = get(input.id); + + bool input_is_target = false; + if (var.storage == StorageClassUniformConstant) + { + auto &type = get(var.basetype); + input_is_target = type.image.dim == DimSubpassData; + } + + if (var.storage != StorageClassInput && !input_is_target) + SPIRV_CROSS_THROW("Can only use in and target variables for PLS inputs."); + var.remapped_variable = true; + } + + for (auto &output : pls_outputs) + { + auto &var = get(output.id); + if (var.storage != StorageClassOutput) + SPIRV_CROSS_THROW("Can only use out variables for PLS outputs."); + var.remapped_variable = true; + } +} + +void CompilerGLSL::find_static_extensions() +{ + for (auto &id : ids) + { + if (id.get_type() == TypeType) + { + auto &type = id.get(); + if (type.basetype == SPIRType::Double) + { + if (options.es) + SPIRV_CROSS_THROW("FP64 not supported in ES profile."); + if (!options.es && options.version < 400) + require_extension("GL_ARB_gpu_shader_fp64"); + } + + if (type.basetype == SPIRType::Int64 || type.basetype == SPIRType::UInt64) + { + if (options.es) + SPIRV_CROSS_THROW("64-bit integers not supported in ES profile."); + if (!options.es) + require_extension("GL_ARB_gpu_shader_int64"); + } + } + } + + auto &execution = get_entry_point(); + switch (execution.model) + { + case ExecutionModelGLCompute: + if (!options.es && options.version < 430) + require_extension("GL_ARB_compute_shader"); + if (options.es && options.version < 310) + SPIRV_CROSS_THROW("At least ESSL 3.10 required for compute shaders."); + break; + + case ExecutionModelGeometry: + if (options.es && options.version < 320) + require_extension("GL_EXT_geometry_shader"); + if (!options.es && options.version < 320) + require_extension("GL_ARB_geometry_shader4"); + + if ((execution.flags & (1ull << ExecutionModeInvocations)) && execution.invocations != 1) + { + // Instanced GS is part of 400 core or this extension. + if (!options.es && options.version < 400) + require_extension("GL_ARB_gpu_shader5"); + } + break; + + case ExecutionModelTessellationEvaluation: + case ExecutionModelTessellationControl: + if (options.es && options.version < 320) + require_extension("GL_EXT_tessellation_shader"); + if (!options.es && options.version < 400) + require_extension("GL_ARB_tessellation_shader"); + break; + + default: + break; + } + + if (!pls_inputs.empty() || !pls_outputs.empty()) + require_extension("GL_EXT_shader_pixel_local_storage"); +} + +string CompilerGLSL::compile() +{ + // Scan the SPIR-V to find trivial uses of extensions. + find_static_extensions(); + + uint32_t pass_count = 0; + do + { + if (pass_count >= 3) + SPIRV_CROSS_THROW("Over 3 compilation loops detected. Must be a bug!"); + + reset(); + + // Move constructor for this type is broken on GCC 4.9 ... + buffer = unique_ptr(new ostringstream()); + + emit_header(); + emit_resources(); + + emit_function(get(entry_point), 0); + + pass_count++; + } while (force_recompile); + + return buffer->str(); +} + +std::string CompilerGLSL::get_partial_source() +{ + return buffer->str(); +} + +void CompilerGLSL::emit_header() +{ + auto &execution = get_entry_point(); + statement("#version ", options.version, options.es && options.version > 100 ? " es" : ""); + + // Needed for binding = # on UBOs, etc. + if (!options.es && options.version < 420) + { + statement("#ifdef GL_ARB_shading_language_420pack"); + statement("#extension GL_ARB_shading_language_420pack : require"); + statement("#endif"); + } + + for (auto &ext : forced_extensions) + statement("#extension ", ext, " : require"); + + for (auto &header : header_lines) + statement(header); + + vector inputs; + vector outputs; + + switch (execution.model) + { + case ExecutionModelGeometry: + outputs.push_back(join("max_vertices = ", execution.output_vertices)); + if ((execution.flags & (1ull << ExecutionModeInvocations)) && execution.invocations != 1) + inputs.push_back(join("invocations = ", execution.invocations)); + if (execution.flags & (1ull << ExecutionModeInputPoints)) + inputs.push_back("points"); + if (execution.flags & (1ull << ExecutionModeInputLines)) + inputs.push_back("lines"); + if (execution.flags & (1ull << ExecutionModeInputLinesAdjacency)) + inputs.push_back("lines_adjacency"); + if (execution.flags & (1ull << ExecutionModeTriangles)) + inputs.push_back("triangles"); + if (execution.flags & (1ull << ExecutionModeInputTrianglesAdjacency)) + inputs.push_back("triangles_adjacency"); + if (execution.flags & (1ull << ExecutionModeOutputTriangleStrip)) + outputs.push_back("triangle_strip"); + if (execution.flags & (1ull << ExecutionModeOutputPoints)) + outputs.push_back("points"); + if (execution.flags & (1ull << ExecutionModeOutputLineStrip)) + outputs.push_back("line_strip"); + break; + + case ExecutionModelTessellationControl: + if (execution.flags & (1ull << ExecutionModeOutputVertices)) + outputs.push_back(join("vertices = ", execution.output_vertices)); + break; + + case ExecutionModelTessellationEvaluation: + if (execution.flags & (1ull << ExecutionModeQuads)) + inputs.push_back("quads"); + if (execution.flags & (1ull << ExecutionModeTriangles)) + inputs.push_back("triangles"); + if (execution.flags & (1ull << ExecutionModeIsolines)) + inputs.push_back("isolines"); + if (execution.flags & (1ull << ExecutionModePointMode)) + inputs.push_back("point_mode"); + + if ((execution.flags & (1ull << ExecutionModeIsolines)) == 0) + { + if (execution.flags & (1ull << ExecutionModeVertexOrderCw)) + inputs.push_back("cw"); + if (execution.flags & (1ull << ExecutionModeVertexOrderCcw)) + inputs.push_back("ccw"); + } + + if (execution.flags & (1ull << ExecutionModeSpacingFractionalEven)) + inputs.push_back("fractional_even_spacing"); + if (execution.flags & (1ull << ExecutionModeSpacingFractionalOdd)) + inputs.push_back("fractional_odd_spacing"); + if (execution.flags & (1ull << ExecutionModeSpacingEqual)) + inputs.push_back("equal_spacing"); + break; + + case ExecutionModelGLCompute: + inputs.push_back(join("local_size_x = ", execution.workgroup_size.x)); + inputs.push_back(join("local_size_y = ", execution.workgroup_size.y)); + inputs.push_back(join("local_size_z = ", execution.workgroup_size.z)); + break; + + case ExecutionModelFragment: + if (options.es) + { + switch (options.fragment.default_float_precision) + { + case Options::Lowp: + statement("precision lowp float;"); + break; + + case Options::Mediump: + statement("precision mediump float;"); + break; + + case Options::Highp: + statement("precision highp float;"); + break; + + default: + break; + } + + switch (options.fragment.default_int_precision) + { + case Options::Lowp: + statement("precision lowp int;"); + break; + + case Options::Mediump: + statement("precision mediump int;"); + break; + + case Options::Highp: + statement("precision highp int;"); + break; + + default: + break; + } + } + + if (execution.flags & (1ull << ExecutionModeEarlyFragmentTests)) + inputs.push_back("early_fragment_tests"); + if (execution.flags & (1ull << ExecutionModeDepthGreater)) + inputs.push_back("depth_greater"); + if (execution.flags & (1ull << ExecutionModeDepthLess)) + inputs.push_back("depth_less"); + + break; + + default: + break; + } + + if (!inputs.empty()) + statement("layout(", merge(inputs), ") in;"); + if (!outputs.empty()) + statement("layout(", merge(outputs), ") out;"); + + statement(""); +} + +void CompilerGLSL::emit_struct(SPIRType &type) +{ + // Struct types can be stamped out multiple times + // with just different offsets, matrix layouts, etc ... + // Type-punning with these types is legal, which complicates things + // when we are storing struct and array types in an SSBO for example. + if (type.type_alias != 0) + return; + + add_resource_name(type.self); + auto name = type_to_glsl(type); + + statement(!backend.explicit_struct_type ? "struct " : "", name); + begin_scope(); + + type.member_name_cache.clear(); + + uint32_t i = 0; + bool emitted = false; + for (auto &member : type.member_types) + { + add_member_name(type, i); + + auto &membertype = get(member); + statement(member_decl(type, membertype, i), ";"); + i++; + emitted = true; + } + end_scope_decl(); + + if (emitted) + statement(""); +} + +uint64_t CompilerGLSL::combined_decoration_for_member(const SPIRType &type, uint32_t index) +{ + uint64_t flags = 0; + auto &memb = meta[type.self].members; + if (index >= memb.size()) + return 0; + auto &dec = memb[index]; + + // If our type is a struct, traverse all the members as well recursively. + flags |= dec.decoration_flags; + for (uint32_t i = 0; i < type.member_types.size(); i++) + flags |= combined_decoration_for_member(get(type.member_types[i]), i); + + return flags; +} + +string CompilerGLSL::to_interpolation_qualifiers(uint64_t flags) +{ + string res; + //if (flags & (1ull << DecorationSmooth)) + // res += "smooth "; + if (flags & (1ull << DecorationFlat)) + res += "flat "; + if (flags & (1ull << DecorationNoPerspective)) + res += "noperspective "; + if (flags & (1ull << DecorationCentroid)) + res += "centroid "; + if (flags & (1ull << DecorationPatch)) + res += "patch "; + if (flags & (1ull << DecorationSample)) + res += "sample "; + if (flags & (1ull << DecorationInvariant)) + res += "invariant "; + + return res; +} + +string CompilerGLSL::layout_for_member(const SPIRType &type, uint32_t index) +{ + bool is_block = (meta[type.self].decoration.decoration_flags & + ((1ull << DecorationBlock) | (1ull << DecorationBufferBlock))) != 0; + if (!is_block) + return ""; + + auto &memb = meta[type.self].members; + if (index >= memb.size()) + return ""; + auto &dec = memb[index]; + + vector attr; + + // We can only apply layouts on members in block interfaces. + // This is a bit problematic because in SPIR-V decorations are applied on the struct types directly. + // This is not supported on GLSL, so we have to make the assumption that if a struct within our buffer block struct + // has a decoration, it was originally caused by a top-level layout() qualifier in GLSL. + // + // We would like to go from (SPIR-V style): + // + // struct Foo { layout(row_major) mat4 matrix; }; + // buffer UBO { Foo foo; }; + // + // to + // + // struct Foo { mat4 matrix; }; // GLSL doesn't support any layout shenanigans in raw struct declarations. + // buffer UBO { layout(row_major) Foo foo; }; // Apply the layout on top-level. + auto flags = combined_decoration_for_member(type, index); + + if (flags & (1ull << DecorationRowMajor)) + attr.push_back("row_major"); + // We don't emit any global layouts, so column_major is default. + //if (flags & (1ull << DecorationColMajor)) + // attr.push_back("column_major"); + + if (dec.decoration_flags & (1ull << DecorationLocation)) + attr.push_back(join("location = ", dec.location)); + + if (attr.empty()) + return ""; + + string res = "layout("; + res += merge(attr); + res += ") "; + return res; +} + +const char *CompilerGLSL::format_to_glsl(spv::ImageFormat format) +{ + auto check_desktop = [this] { + if (options.es) + SPIRV_CROSS_THROW("Attempting to use image format not supported in ES profile."); + }; + + switch (format) + { + case ImageFormatRgba32f: + return "rgba32f"; + case ImageFormatRgba16f: + return "rgba16f"; + case ImageFormatR32f: + return "r32f"; + case ImageFormatRgba8: + return "rgba8"; + case ImageFormatRgba8Snorm: + return "rgba8_snorm"; + case ImageFormatRg32f: + return "rg32f"; + case ImageFormatRg16f: + return "rg16f"; + + case ImageFormatRgba32i: + return "rgba32i"; + case ImageFormatRgba16i: + return "rgba16i"; + case ImageFormatR32i: + return "r32i"; + case ImageFormatRgba8i: + return "rgba8i"; + case ImageFormatRg32i: + return "rg32i"; + case ImageFormatRg16i: + return "rg16i"; + + case ImageFormatRgba32ui: + return "rgba32ui"; + case ImageFormatRgba16ui: + return "rgba16ui"; + case ImageFormatR32ui: + return "r32ui"; + case ImageFormatRgba8ui: + return "rgba8ui"; + case ImageFormatRg32ui: + return "rg32ui"; + case ImageFormatRg16ui: + return "rg16ui"; + + // Desktop-only formats + case ImageFormatR11fG11fB10f: + check_desktop(); + return "r11f_g11f_b10f"; + case ImageFormatR16f: + check_desktop(); + return "r16f"; + case ImageFormatRgb10A2: + check_desktop(); + return "rgb10_a2"; + case ImageFormatR8: + check_desktop(); + return "r8"; + case ImageFormatRg8: + check_desktop(); + return "rg8"; + case ImageFormatR16: + check_desktop(); + return "r16"; + case ImageFormatRg16: + check_desktop(); + return "rg16"; + case ImageFormatRgba16: + check_desktop(); + return "rgba16"; + case ImageFormatR16Snorm: + check_desktop(); + return "r16_snorm"; + case ImageFormatRg16Snorm: + check_desktop(); + return "rg16_snorm"; + case ImageFormatRgba16Snorm: + check_desktop(); + return "rgba16_snorm"; + case ImageFormatR8Snorm: + check_desktop(); + return "r8_snorm"; + case ImageFormatRg8Snorm: + check_desktop(); + return "rg8_snorm"; + + case ImageFormatR8ui: + check_desktop(); + return "r8ui"; + case ImageFormatRg8ui: + check_desktop(); + return "rg8ui"; + case ImageFormatR16ui: + check_desktop(); + return "r16ui"; + case ImageFormatRgb10a2ui: + check_desktop(); + return "rgb10_a2ui"; + + case ImageFormatR8i: + check_desktop(); + return "r8i"; + case ImageFormatRg8i: + check_desktop(); + return "rg8i"; + case ImageFormatR16i: + check_desktop(); + return "r16i"; + + default: + case ImageFormatUnknown: + return nullptr; + } +} + +uint32_t CompilerGLSL::type_to_std430_base_size(const SPIRType &type) +{ + switch (type.basetype) + { + case SPIRType::Double: + case SPIRType::Int64: + case SPIRType::UInt64: + return 8; + default: + return 4; + } +} + +uint32_t CompilerGLSL::type_to_std430_alignment(const SPIRType &type, uint64_t flags) +{ + const uint32_t base_alignment = type_to_std430_base_size(type); + + if (type.basetype == SPIRType::Struct) + { + // Rule 9. Structs alignments are maximum alignment of its members. + uint32_t alignment = 0; + for (uint32_t i = 0; i < type.member_types.size(); i++) + { + auto member_flags = meta[type.self].members.at(i).decoration_flags; + alignment = max(alignment, type_to_std430_alignment(get(type.member_types[i]), member_flags)); + } + + return alignment; + } + else + { + // From 7.6.2.2 in GL 4.5 core spec. + // Rule 1 + if (type.vecsize == 1 && type.columns == 1) + return base_alignment; + + // Rule 2 + if ((type.vecsize == 2 || type.vecsize == 4) && type.columns == 1) + return type.vecsize * base_alignment; + + // Rule 3 + if (type.vecsize == 3 && type.columns == 1) + return 4 * base_alignment; + + // Rule 4 implied. Alignment does not change in std430. + + // Rule 5. Column-major matrices are stored as arrays of + // vectors. + if ((flags & (1ull << DecorationColMajor)) && type.columns > 1) + { + if (type.vecsize == 3) + return 4 * base_alignment; + else + return type.vecsize * base_alignment; + } + + // Rule 6 implied. + + // Rule 7. + if ((flags & (1ull << DecorationRowMajor)) && type.vecsize > 1) + { + if (type.columns == 3) + return 4 * base_alignment; + else + return type.columns * base_alignment; + } + + // Rule 8 implied. + } + + SPIRV_CROSS_THROW("Did not find suitable std430 rule for type. Bogus decorations?"); +} + +uint32_t CompilerGLSL::type_to_std430_array_stride(const SPIRType &type, uint64_t flags) +{ + // Array stride is equal to aligned size of the underlying type. + SPIRType tmp = type; + tmp.array.pop_back(); + tmp.array_size_literal.pop_back(); + uint32_t size = type_to_std430_size(tmp, flags); + uint32_t alignment = type_to_std430_alignment(tmp, flags); + return (size + alignment - 1) & ~(alignment - 1); +} + +uint32_t CompilerGLSL::type_to_std430_size(const SPIRType &type, uint64_t flags) +{ + if (!type.array.empty()) + return to_array_size_literal(type, uint32_t(type.array.size()) - 1) * type_to_std430_array_stride(type, flags); + + const uint32_t base_alignment = type_to_std430_base_size(type); + uint32_t size = 0; + + if (type.basetype == SPIRType::Struct) + { + uint32_t pad_alignment = 1; + + for (uint32_t i = 0; i < type.member_types.size(); i++) + { + auto member_flags = meta[type.self].members.at(i).decoration_flags; + auto &member_type = get(type.member_types[i]); + + uint32_t std430_alignment = type_to_std430_alignment(member_type, member_flags); + uint32_t alignment = max(std430_alignment, pad_alignment); + + // The next member following a struct member is aligned to the base alignment of the struct that came before. + // GL 4.5 spec, 7.6.2.2. + if (member_type.basetype == SPIRType::Struct) + pad_alignment = std430_alignment; + else + pad_alignment = 1; + + size = (size + alignment - 1) & ~(alignment - 1); + size += type_to_std430_size(member_type, member_flags); + } + } + else + { + if (type.columns == 1) + size = type.vecsize * base_alignment; + + if ((flags & (1ull << DecorationColMajor)) && type.columns > 1) + { + if (type.vecsize == 3) + size = type.columns * 4 * base_alignment; + else + size = type.columns * type.vecsize * base_alignment; + } + + if ((flags & (1ull << DecorationRowMajor)) && type.vecsize > 1) + { + if (type.columns == 3) + size = type.vecsize * 4 * base_alignment; + else + size = type.vecsize * type.columns * base_alignment; + } + } + + return size; +} + +bool CompilerGLSL::ssbo_is_std430_packing(const SPIRType &type) +{ + // This is very tricky and error prone, but try to be exhaustive and correct here. + // SPIR-V doesn't directly say if we're using std430 or std140. + // SPIR-V communicates this using Offset and ArrayStride decorations (which is what really matters), + // so we have to try to infer whether or not the original GLSL source was std140 or std430 based on this information. + // We do not have to consider shared or packed since these layouts are not allowed in Vulkan SPIR-V (they are useless anyways, and custom offsets would do the same thing). + // + // It is almost certain that we're using std430, but it gets tricky with arrays in particular. + // We will assume std430, but infer std140 if we can prove the struct is not compliant with std430. + // + // The only two differences between std140 and std430 are related to padding alignment/array stride + // in arrays and structs. In std140 they take minimum vec4 alignment. + // std430 only removes the vec4 requirement. + + uint32_t offset = 0; + uint32_t pad_alignment = 1; + + for (uint32_t i = 0; i < type.member_types.size(); i++) + { + auto &memb_type = get(type.member_types[i]); + auto member_flags = meta[type.self].members.at(i).decoration_flags; + + // Verify alignment rules. + uint32_t std430_alignment = type_to_std430_alignment(memb_type, member_flags); + uint32_t alignment = max(std430_alignment, pad_alignment); + offset = (offset + alignment - 1) & ~(alignment - 1); + + // The next member following a struct member is aligned to the base alignment of the struct that came before. + // GL 4.5 spec, 7.6.2.2. + if (memb_type.basetype == SPIRType::Struct) + pad_alignment = std430_alignment; + else + pad_alignment = 1; + + uint32_t actual_offset = type_struct_member_offset(type, i); + if (actual_offset != offset) // This cannot be std430. + return false; + + // Verify array stride rules. + if (!memb_type.array.empty() && + type_to_std430_array_stride(memb_type, member_flags) != type_struct_member_array_stride(type, i)) + return false; + + // Verify that sub-structs also follow std430 rules. + if (!memb_type.member_types.empty() && !ssbo_is_std430_packing(memb_type)) + return false; + + // Bump size. + offset += type_to_std430_size(memb_type, member_flags); + } + + return true; +} + +string CompilerGLSL::layout_for_variable(const SPIRVariable &var) +{ + // FIXME: Come up with a better solution for when to disable layouts. + // Having layouts depend on extensions as well as which types + // of layouts are used. For now, the simple solution is to just disable + // layouts for legacy versions. + if (is_legacy()) + return ""; + + vector attr; + + auto &dec = meta[var.self].decoration; + auto &type = get(var.basetype); + auto flags = dec.decoration_flags; + auto typeflags = meta[type.self].decoration.decoration_flags; + + if (options.vulkan_semantics && var.storage == StorageClassPushConstant) + attr.push_back("push_constant"); + + if (flags & (1ull << DecorationRowMajor)) + attr.push_back("row_major"); + if (flags & (1ull << DecorationColMajor)) + attr.push_back("column_major"); + + if (options.vulkan_semantics) + { + if (flags & (1ull << DecorationInputAttachmentIndex)) + attr.push_back(join("input_attachment_index = ", dec.input_attachment)); + } + + if (flags & (1ull << DecorationLocation)) + { + uint64_t combined_decoration = 0; + for (uint32_t i = 0; i < meta[type.self].members.size(); i++) + combined_decoration |= combined_decoration_for_member(type, i); + + // If our members have location decorations, we don't need to + // emit location decorations at the top as well (looks weird). + if ((combined_decoration & (1ull << DecorationLocation)) == 0) + attr.push_back(join("location = ", dec.location)); + } + + // set = 0 is the default. Do not emit set = decoration in regular GLSL output, but + // we should preserve it in Vulkan GLSL mode. + if (var.storage != StorageClassPushConstant) + { + if ((flags & (1ull << DecorationDescriptorSet)) && (dec.set != 0 || options.vulkan_semantics)) + attr.push_back(join("set = ", dec.set)); + } + + if (flags & (1ull << DecorationBinding)) + attr.push_back(join("binding = ", dec.binding)); + if (flags & (1ull << DecorationCoherent)) + attr.push_back("coherent"); + if (flags & (1ull << DecorationOffset)) + attr.push_back(join("offset = ", dec.offset)); + + // Instead of adding explicit offsets for every element here, just assume we're using std140 or std430. + // If SPIR-V does not comply with either layout, we cannot really work around it. + if (var.storage == StorageClassUniform && (typeflags & (1ull << DecorationBlock))) + attr.push_back("std140"); + else if (var.storage == StorageClassUniform && (typeflags & (1ull << DecorationBufferBlock))) + attr.push_back(ssbo_is_std430_packing(type) ? "std430" : "std140"); + else if (options.vulkan_semantics && var.storage == StorageClassPushConstant) + attr.push_back(ssbo_is_std430_packing(type) ? "std430" : "std140"); + + // For images, the type itself adds a layout qualifer. + if (type.basetype == SPIRType::Image) + { + const char *fmt = format_to_glsl(type.image.format); + if (fmt) + attr.push_back(fmt); + } + + if (attr.empty()) + return ""; + + string res = "layout("; + res += merge(attr); + res += ") "; + return res; +} + +void CompilerGLSL::emit_push_constant_block(const SPIRVariable &var) +{ + if (options.vulkan_semantics) + emit_push_constant_block_vulkan(var); + else + emit_push_constant_block_glsl(var); +} + +void CompilerGLSL::emit_push_constant_block_vulkan(const SPIRVariable &var) +{ + emit_buffer_block(var); +} + +void CompilerGLSL::emit_push_constant_block_glsl(const SPIRVariable &var) +{ + // OpenGL has no concept of push constant blocks, implement it as a uniform struct. + auto &type = get(var.basetype); + + auto &flags = meta[var.self].decoration.decoration_flags; + flags &= ~((1ull << DecorationBinding) | (1ull << DecorationDescriptorSet)); + +#if 0 + if (flags & ((1ull << DecorationBinding) | (1ull << DecorationDescriptorSet))) + SPIRV_CROSS_THROW("Push constant blocks cannot be compiled to GLSL with Binding or Set syntax. " + "Remap to location with reflection API first or disable these decorations."); +#endif + + // We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily. + // Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed. + auto &block_flags = meta[type.self].decoration.decoration_flags; + uint64_t block_flag = block_flags & (1ull << DecorationBlock); + block_flags &= ~block_flag; + + emit_struct(type); + + block_flags |= block_flag; + + emit_uniform(var); + statement(""); +} + +void CompilerGLSL::emit_buffer_block(const SPIRVariable &var) +{ + auto &type = get(var.basetype); + bool ssbo = (meta[type.self].decoration.decoration_flags & (1ull << DecorationBufferBlock)) != 0; + bool is_restrict = (meta[var.self].decoration.decoration_flags & (1ull << DecorationRestrict)) != 0; + + add_resource_name(var.self); + + // Block names should never alias. + auto buffer_name = to_name(type.self, false); + + // Shaders never use the block by interface name, so we don't + // have to track this other than updating name caches. + if (resource_names.find(buffer_name) != end(resource_names)) + buffer_name = get_fallback_name(type.self); + else + resource_names.insert(buffer_name); + + statement(layout_for_variable(var), is_restrict ? "restrict " : "", ssbo ? "buffer " : "uniform ", buffer_name); + begin_scope(); + + type.member_name_cache.clear(); + + uint32_t i = 0; + for (auto &member : type.member_types) + { + add_member_name(type, i); + + auto &membertype = get(member); + statement(member_decl(type, membertype, i), ";"); + i++; + } + + end_scope_decl(to_name(var.self) + type_to_array_glsl(type)); + statement(""); +} + +void CompilerGLSL::emit_interface_block(const SPIRVariable &var) +{ + auto &execution = get_entry_point(); + auto &type = get(var.basetype); + + // Either make it plain in/out or in/out blocks depending on what shader is doing ... + bool block = (meta[type.self].decoration.decoration_flags & (1ull << DecorationBlock)) != 0; + + const char *qual = nullptr; + if (is_legacy() && execution.model == ExecutionModelVertex) + qual = var.storage == StorageClassInput ? "attribute " : "varying "; + else if (is_legacy() && execution.model == ExecutionModelFragment) + qual = "varying "; // Fragment outputs are renamed so they never hit this case. + else + qual = var.storage == StorageClassInput ? "in " : "out "; + + if (block) + { + add_resource_name(var.self); + + // Block names should never alias. + auto block_name = to_name(type.self, false); + + // Shaders never use the block by interface name, so we don't + // have to track this other than updating name caches. + if (resource_names.find(block_name) != end(resource_names)) + block_name = get_fallback_name(type.self); + else + resource_names.insert(block_name); + + statement(layout_for_variable(var), qual, block_name); + begin_scope(); + + type.member_name_cache.clear(); + + uint32_t i = 0; + for (auto &member : type.member_types) + { + add_member_name(type, i); + + auto &membertype = get(member); + statement(member_decl(type, membertype, i), ";"); + i++; + } + + end_scope_decl(join(to_name(var.self), type_to_array_glsl(type))); + statement(""); + } + else + { + add_resource_name(var.self); + statement(layout_for_variable(var), qual, variable_decl(var), ";"); + } +} + +void CompilerGLSL::emit_uniform(const SPIRVariable &var) +{ + auto &type = get(var.basetype); + if (type.basetype == SPIRType::Image && type.image.sampled == 2) + { + if (!options.es && options.version < 420) + require_extension("GL_ARB_shader_image_load_store"); + else if (options.es && options.version < 310) + SPIRV_CROSS_THROW("At least ESSL 3.10 required for shader image load store."); + } + + add_resource_name(var.self); + statement(layout_for_variable(var), "uniform ", variable_decl(var), ";"); +} + +void CompilerGLSL::emit_specialization_constant(const SPIRConstant &constant) +{ + auto &type = get(constant.constant_type); + auto name = to_name(constant.self); + + statement("layout(constant_id = ", get_decoration(constant.self, DecorationSpecId), ") const ", + variable_decl(type, name), " = ", constant_expression(constant), ";"); +} + +void CompilerGLSL::replace_illegal_names() +{ + // clang-format off + static const unordered_set keywords = { + "active", "asm", "atomic_uint", "attribute", "bool", "break", + "bvec2", "bvec3", "bvec4", "case", "cast", "centroid", "class", "coherent", "common", "const", "continue", "default", "discard", + "dmat2", "dmat2x2", "dmat2x3", "dmat2x4", "dmat3", "dmat3x2", "dmat3x3", "dmat3x4", "dmat4", "dmat4x2", "dmat4x3", "dmat4x4", + "do", "double", "dvec2", "dvec3", "dvec4", "else", "enum", "extern", "external", "false", "filter", "fixed", "flat", "float", + "for", "fvec2", "fvec3", "fvec4", "goto", "half", "highp", "hvec2", "hvec3", "hvec4", "if", "iimage1D", "iimage1DArray", + "iimage2D", "iimage2DArray", "iimage2DMS", "iimage2DMSArray", "iimage2DRect", "iimage3D", "iimageBuffer", "iimageCube", + "iimageCubeArray", "image1D", "image1DArray", "image2D", "image2DArray", "image2DMS", "image2DMSArray", "image2DRect", + "image3D", "imageBuffer", "imageCube", "imageCubeArray", "in", "inline", "inout", "input", "int", "interface", "invariant", + "isampler1D", "isampler1DArray", "isampler2D", "isampler2DArray", "isampler2DMS", "isampler2DMSArray", "isampler2DRect", + "isampler3D", "isamplerBuffer", "isamplerCube", "isamplerCubeArray", "ivec2", "ivec3", "ivec4", "layout", "long", "lowp", + "mat2", "mat2x2", "mat2x3", "mat2x4", "mat3", "mat3x2", "mat3x3", "mat3x4", "mat4", "mat4x2", "mat4x3", "mat4x4", "mediump", + "namespace", "noinline", "noperspective", "out", "output", "packed", "partition", "patch", "precision", "public", "readonly", + "resource", "restrict", "return", "row_major", "sample", "sampler1D", "sampler1DArray", "sampler1DArrayShadow", + "sampler1DShadow", "sampler2D", "sampler2DArray", "sampler2DArrayShadow", "sampler2DMS", "sampler2DMSArray", + "sampler2DRect", "sampler2DRectShadow", "sampler2DShadow", "sampler3D", "sampler3DRect", "samplerBuffer", + "samplerCube", "samplerCubeArray", "samplerCubeArrayShadow", "samplerCubeShadow", "short", "sizeof", "smooth", "static", + "struct", "subroutine", "superp", "switch", "template", "this", "true", "typedef", "uimage1D", "uimage1DArray", "uimage2D", + "uimage2DArray", "uimage2DMS", "uimage2DMSArray", "uimage2DRect", "uimage3D", "uimageBuffer", "uimageCube", + "uimageCubeArray", "uint", "uniform", "union", "unsigned", "usampler1D", "usampler1DArray", "usampler2D", "usampler2DArray", + "usampler2DMS", "usampler2DMSArray", "usampler2DRect", "usampler3D", "usamplerBuffer", "usamplerCube", + "usamplerCubeArray", "using", "uvec2", "uvec3", "uvec4", "varying", "vec2", "vec3", "vec4", "void", "volatile", "volatile", + "while", "writeonly" + }; + // clang-format on + + for (auto &id : ids) + { + if (id.get_type() == TypeVariable) + { + auto &var = id.get(); + if (!is_hidden_variable(var)) + { + auto &m = meta[var.self].decoration; + if (m.alias.compare(0, 3, "gl_") == 0 || keywords.find(m.alias) != end(keywords)) + m.alias = join("_", m.alias); + } + } + } +} + +void CompilerGLSL::replace_fragment_output(SPIRVariable &var) +{ + auto &m = meta[var.self].decoration; + uint32_t location = 0; + if (m.decoration_flags & (1ull << DecorationLocation)) + location = m.location; + + // If our variable is arrayed, we must not emit the array part of this as the SPIR-V will + // do the access chain part of this for us. + auto &type = get(var.basetype); + + if (type.array.empty()) + { + // Redirect the write to a specific render target in legacy GLSL. + m.alias = join("gl_FragData[", location, "]"); + + if (is_legacy_es() && location != 0) + require_extension("GL_EXT_draw_buffers"); + } + else if (type.array.size() == 1) + { + // If location is non-zero, we probably have to add an offset. + // This gets really tricky since we'd have to inject an offset in the access chain. + // FIXME: This seems like an extremely odd-ball case, so it's probably fine to leave it like this for now. + m.alias = "gl_FragData"; + if (location != 0) + SPIRV_CROSS_THROW("Arrayed output variable used, but location is not 0. " + "This is unimplemented in SPIRV-Cross."); + + if (is_legacy_es()) + require_extension("GL_EXT_draw_buffers"); + } + else + SPIRV_CROSS_THROW("Array-of-array output variable used. This cannot be implemented in legacy GLSL."); + + var.compat_builtin = true; // We don't want to declare this variable, but use the name as-is. +} + +void CompilerGLSL::replace_fragment_outputs() +{ + for (auto &id : ids) + { + if (id.get_type() == TypeVariable) + { + auto &var = id.get(); + auto &type = get(var.basetype); + + if (!is_builtin_variable(var) && !var.remapped_variable && type.pointer && + var.storage == StorageClassOutput) + replace_fragment_output(var); + } + } +} + +string CompilerGLSL::remap_swizzle(uint32_t result_type, uint32_t input_components, uint32_t expr) +{ + auto &out_type = get(result_type); + + if (out_type.vecsize == input_components) + return to_expression(expr); + else if (input_components == 1) + return join(type_to_glsl(out_type), "(", to_expression(expr), ")"); + else + { + auto e = to_enclosed_expression(expr) + "."; + // Just clamp the swizzle index if we have more outputs than inputs. + for (uint32_t c = 0; c < out_type.vecsize; c++) + e += index_to_swizzle(min(c, input_components - 1)); + if (backend.swizzle_is_function && out_type.vecsize > 1) + e += "()"; + return e; + } +} + +void CompilerGLSL::emit_pls() +{ + auto &execution = get_entry_point(); + if (execution.model != ExecutionModelFragment) + SPIRV_CROSS_THROW("Pixel local storage only supported in fragment shaders."); + + if (!options.es) + SPIRV_CROSS_THROW("Pixel local storage only supported in OpenGL ES."); + + if (options.version < 300) + SPIRV_CROSS_THROW("Pixel local storage only supported in ESSL 3.0 and above."); + + if (!pls_inputs.empty()) + { + statement("__pixel_local_inEXT _PLSIn"); + begin_scope(); + for (auto &input : pls_inputs) + statement(pls_decl(input), ";"); + end_scope_decl(); + statement(""); + } + + if (!pls_outputs.empty()) + { + statement("__pixel_local_outEXT _PLSOut"); + begin_scope(); + for (auto &output : pls_outputs) + statement(pls_decl(output), ";"); + end_scope_decl(); + statement(""); + } +} + +void CompilerGLSL::emit_resources() +{ + auto &execution = get_entry_point(); + + replace_illegal_names(); + + // Legacy GL uses gl_FragData[], redeclare all fragment outputs + // with builtins. + if (execution.model == ExecutionModelFragment && is_legacy()) + replace_fragment_outputs(); + + // Emit PLS blocks if we have such variables. + if (!pls_inputs.empty() || !pls_outputs.empty()) + emit_pls(); + + bool emitted = false; + + // If emitted Vulkan GLSL, + // emit specialization constants as actual floats, + // spec op expressions will redirect to the constant name. + // + // TODO: If we have the fringe case that we create a spec constant which depends on a struct type, + // we'll have to deal with that, but there's currently no known way to express that. + if (options.vulkan_semantics) + { + for (auto &id : ids) + { + if (id.get_type() == TypeConstant) + { + auto &c = id.get(); + if (!c.specialization) + continue; + + emit_specialization_constant(c); + emitted = true; + } + } + } + + if (emitted) + statement(""); + emitted = false; + + // Output all basic struct types which are not Block or BufferBlock as these are declared inplace + // when such variables are instantiated. + for (auto &id : ids) + { + if (id.get_type() == TypeType) + { + auto &type = id.get(); + if (type.basetype == SPIRType::Struct && type.array.empty() && !type.pointer && + (meta[type.self].decoration.decoration_flags & + ((1ull << DecorationBlock) | (1ull << DecorationBufferBlock))) == 0) + { + emit_struct(type); + } + } + } + + // Output UBOs and SSBOs + for (auto &id : ids) + { + if (id.get_type() == TypeVariable) + { + auto &var = id.get(); + auto &type = get(var.basetype); + + if (var.storage != StorageClassFunction && type.pointer && type.storage == StorageClassUniform && + !is_hidden_variable(var) && (meta[type.self].decoration.decoration_flags & + ((1ull << DecorationBlock) | (1ull << DecorationBufferBlock)))) + { + emit_buffer_block(var); + } + } + } + + // Output push constant blocks + for (auto &id : ids) + { + if (id.get_type() == TypeVariable) + { + auto &var = id.get(); + auto &type = get(var.basetype); + if (var.storage != StorageClassFunction && type.pointer && type.storage == StorageClassPushConstant && + !is_hidden_variable(var)) + { + emit_push_constant_block(var); + } + } + } + + bool skip_separate_image_sampler = !combined_image_samplers.empty() || !options.vulkan_semantics; + + // Output Uniform Constants (values, samplers, images, etc). + for (auto &id : ids) + { + if (id.get_type() == TypeVariable) + { + auto &var = id.get(); + auto &type = get(var.basetype); + + // If we're remapping separate samplers and images, only emit the combined samplers. + if (skip_separate_image_sampler) + { + bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1; + bool separate_sampler = type.basetype == SPIRType::Sampler; + if (separate_image || separate_sampler) + continue; + } + + if (var.storage != StorageClassFunction && type.pointer && + (type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter) && + !is_hidden_variable(var)) + { + emit_uniform(var); + emitted = true; + } + } + } + + if (emitted) + statement(""); + emitted = false; + + // Output in/out interfaces. + for (auto &id : ids) + { + if (id.get_type() == TypeVariable) + { + auto &var = id.get(); + auto &type = get(var.basetype); + + if (var.storage != StorageClassFunction && type.pointer && + (var.storage == StorageClassInput || var.storage == StorageClassOutput) && + interface_variable_exists_in_entry_point(var.self) && !is_hidden_variable(var)) + { + emit_interface_block(var); + emitted = true; + } + else if (is_builtin_variable(var)) + { + // For gl_InstanceIndex emulation on GLES, the API user needs to + // supply this uniform. + if (meta[var.self].decoration.builtin_type == BuiltInInstanceIndex && !options.vulkan_semantics) + { + statement("uniform int SPIRV_Cross_BaseInstance;"); + emitted = true; + } + } + } + } + + // Global variables. + for (auto global : global_variables) + { + auto &var = get(global); + if (var.storage != StorageClassOutput) + { + add_resource_name(var.self); + statement(variable_decl(var), ";"); + emitted = true; + } + } + + if (emitted) + statement(""); +} + +// Returns a string representation of the ID, usable as a function arg. +// Default is to simply return the expression representation fo the arg ID. +// Subclasses may override to modify the return value. +string CompilerGLSL::to_func_call_arg(uint32_t id) +{ + return to_expression(id); +} + +void CompilerGLSL::handle_invalid_expression(uint32_t id) +{ + auto &expr = get(id); + + // This expression has been invalidated in the past. + // Be careful with this expression next pass ... + // Used for OpCompositeInsert forwarding atm. + expr.used_while_invalidated = true; + + // We tried to read an invalidated expression. + // This means we need another pass at compilation, but next time, force temporary variables so that they cannot be invalidated. + forced_temporaries.insert(id); + force_recompile = true; +} + +// Sometimes we proactively enclosed an expression where it turns out we might have not needed it after all. +void CompilerGLSL::strip_enclosed_expression(string &expr) +{ + if (expr.size() < 2 || expr.front() != '(' || expr.back() != ')') + return; + + // Have to make sure that our first and last parens actually enclose everything inside it. + uint32_t paren_count = 0; + for (auto &c : expr) + { + if (c == '(') + paren_count++; + else if (c == ')') + { + paren_count--; + + // If we hit 0 and this is not the final char, our first and final parens actually don't + // enclose the expression, and we cannot strip, e.g.: (a + b) * (c + d). + if (paren_count == 0 && &c != &expr.back()) + return; + } + } + expr.pop_back(); + expr.erase(begin(expr)); +} + +// Just like to_expression except that we enclose the expression inside parentheses if needed. +string CompilerGLSL::to_enclosed_expression(uint32_t id) +{ + auto expr = to_expression(id); + bool need_parens = false; + uint32_t paren_count = 0; + for (auto c : expr) + { + if (c == '(') + paren_count++; + else if (c == ')') + { + assert(paren_count); + paren_count--; + } + else if (c == ' ' && paren_count == 0) + { + need_parens = true; + break; + } + } + assert(paren_count == 0); + + // If this expression contains any spaces which are not enclosed by parentheses, + // we need to enclose it so we can treat the whole string as an expression. + // This happens when two expressions have been part of a binary op earlier. + if (need_parens) + return join('(', expr, ')'); + else + return expr; +} + +string CompilerGLSL::to_expression(uint32_t id) +{ + auto itr = invalid_expressions.find(id); + if (itr != end(invalid_expressions)) + handle_invalid_expression(id); + + if (ids[id].get_type() == TypeExpression) + { + // We might have a more complex chain of dependencies. + // A possible scenario is that we + // + // %1 = OpLoad + // %2 = OpDoSomething %1 %1. here %2 will have a dependency on %1. + // %3 = OpDoSomethingAgain %2 %2. Here %3 will lose the link to %1 since we don't propagate the dependencies like that. + // OpStore %1 %foo // Here we can invalidate %1, and hence all expressions which depend on %1. Only %2 will know since it's part of invalid_expressions. + // %4 = OpDoSomethingAnotherTime %3 %3 // If we forward all expressions we will see %1 expression after store, not before. + // + // However, we can propagate up a list of depended expressions when we used %2, so we can check if %2 is invalid when reading %3 after the store, + // and see that we should not forward reads of the original variable. + auto &expr = get(id); + for (uint32_t dep : expr.expression_dependencies) + if (invalid_expressions.find(dep) != end(invalid_expressions)) + handle_invalid_expression(dep); + } + + track_expression_read(id); + + switch (ids[id].get_type()) + { + case TypeExpression: + { + auto &e = get(id); + if (e.base_expression) + return to_enclosed_expression(e.base_expression) + e.expression; + else + return e.expression; + } + + case TypeConstant: + { + auto &c = get(id); + if (c.specialization && options.vulkan_semantics) + return to_name(id); + else + return constant_expression(c); + } + + case TypeConstantOp: + return constant_op_expression(get(id)); + + case TypeVariable: + { + auto &var = get(id); + // If we try to use a loop variable before the loop header, we have to redirect it to the static expression, + // the variable has not been declared yet. + if (var.statically_assigned || (var.loop_variable && !var.loop_variable_enable)) + return to_expression(var.static_expression); + else if (var.deferred_declaration) + { + var.deferred_declaration = false; + return variable_decl(var); + } + else + { + auto &dec = meta[var.self].decoration; + if (dec.builtin) + return builtin_to_glsl(dec.builtin_type); + else + return to_name(id); + } + } + + default: + return to_name(id); + } +} + +string CompilerGLSL::constant_op_expression(const SPIRConstantOp &cop) +{ + auto &type = get(cop.basetype); + bool binary = false; + bool unary = false; + string op; + + // TODO: Find a clean way to reuse emit_instruction. + switch (cop.opcode) + { + case OpSConvert: + case OpUConvert: + case OpFConvert: + op = type_to_glsl_constructor(type); + break; + +#define BOP(opname, x) \ + case Op##opname: \ + binary = true; \ + op = x; \ + break + +#define UOP(opname, x) \ + case Op##opname: \ + unary = true; \ + op = x; \ + break + + UOP(SNegate, "-"); + UOP(Not, "~"); + BOP(IAdd, "+"); + BOP(ISub, "-"); + BOP(IMul, "*"); + BOP(SDiv, "/"); + BOP(UDiv, "/"); + BOP(UMod, "%"); + BOP(SMod, "%"); + BOP(ShiftRightLogical, ">>"); + BOP(ShiftRightArithmetic, ">>"); + BOP(ShiftLeftLogical, "<<"); + BOP(BitwiseOr, "|"); + BOP(BitwiseXor, "^"); + BOP(BitwiseAnd, "&"); + BOP(LogicalOr, "||"); + BOP(LogicalAnd, "&&"); + UOP(LogicalNot, "!"); + BOP(LogicalEqual, "=="); + BOP(LogicalNotEqual, "!="); + BOP(IEqual, "=="); + BOP(INotEqual, "!="); + BOP(ULessThan, "<"); + BOP(SLessThan, "<"); + BOP(ULessThanEqual, "<="); + BOP(SLessThanEqual, "<="); + BOP(UGreaterThan, ">"); + BOP(SGreaterThan, ">"); + BOP(UGreaterThanEqual, ">="); + BOP(SGreaterThanEqual, ">="); + + case OpSelect: + { + if (cop.arguments.size() < 3) + SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp."); + + // This one is pretty annoying. It's triggered from + // uint(bool), int(bool) from spec constants. + // In order to preserve its compile-time constness in Vulkan GLSL, + // we need to reduce the OpSelect expression back to this simplified model. + // If we cannot, fail. + if (!to_trivial_mix_op(type, op, cop.arguments[2], cop.arguments[1], cop.arguments[0])) + { + SPIRV_CROSS_THROW( + "Cannot implement specialization constant op OpSelect. " + "Need trivial select implementation which can be resolved to a simple cast from boolean."); + } + break; + } + + default: + // Some opcodes are unimplemented here, these are currently not possible to test from glslang. + SPIRV_CROSS_THROW("Unimplemented spec constant op."); + } + + SPIRType::BaseType input_type; + bool skip_cast_if_equal_type = opcode_is_sign_invariant(cop.opcode); + + switch (cop.opcode) + { + case OpIEqual: + case OpINotEqual: + input_type = SPIRType::Int; + break; + + default: + input_type = type.basetype; + break; + } + +#undef BOP +#undef UOP + if (binary) + { + if (cop.arguments.size() < 2) + SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp."); + + string cast_op0; + string cast_op1; + auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, cop.arguments[0], + cop.arguments[1], skip_cast_if_equal_type); + + if (type.basetype != input_type && type.basetype != SPIRType::Boolean) + { + expected_type.basetype = input_type; + auto expr = bitcast_glsl_op(type, expected_type); + expr += '('; + expr += join(cast_op0, " ", op, " ", cast_op1); + expr += ')'; + return expr; + } + else + return join("(", cast_op0, " ", op, " ", cast_op1, ")"); + } + else if (unary) + { + if (cop.arguments.size() < 1) + SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp."); + + // Auto-bitcast to result type as needed. + // Works around various casting scenarios in glslang as there is no OpBitcast for specialization constants. + return join("(", op, bitcast_glsl(type, cop.arguments[0]), ")"); + } + else + { + if (cop.arguments.size() < 1) + SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp."); + return join(op, "(", to_expression(cop.arguments[0]), ")"); + } +} + +string CompilerGLSL::constant_expression(const SPIRConstant &c) +{ + if (!c.subconstants.empty()) + { + // Handles Arrays and structures. + string res; + if (backend.use_initializer_list) + res = "{ "; + else + res = type_to_glsl_constructor(get(c.constant_type)) + "("; + + for (auto &elem : c.subconstants) + { + auto &subc = get(elem); + if (subc.specialization && options.vulkan_semantics) + res += to_name(elem); + else + res += constant_expression(get(elem)); + + if (&elem != &c.subconstants.back()) + res += ", "; + } + + res += backend.use_initializer_list ? " }" : ")"; + return res; + } + else if (c.columns() == 1) + { + return constant_expression_vector(c, 0); + } + else + { + string res = type_to_glsl(get(c.constant_type)) + "("; + for (uint32_t col = 0; col < c.columns(); col++) + { + res += constant_expression_vector(c, col); + if (col + 1 < c.columns()) + res += ", "; + } + res += ")"; + return res; + } +} + +string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t vector) +{ + auto type = get(c.constant_type); + type.columns = 1; + + string res; + if (c.vector_size() > 1) + res += type_to_glsl(type) + "("; + + bool splat = c.vector_size() > 1; + if (splat) + { + if (type_to_std430_base_size(type) == 8) + { + uint64_t ident = c.scalar_u64(vector, 0); + for (uint32_t i = 1; i < c.vector_size(); i++) + if (ident != c.scalar_u64(vector, i)) + splat = false; + } + else + { + uint32_t ident = c.scalar(vector, 0); + for (uint32_t i = 1; i < c.vector_size(); i++) + if (ident != c.scalar(vector, i)) + splat = false; + } + } + + switch (type.basetype) + { + case SPIRType::Float: + if (splat) + { + res += convert_to_string(c.scalar_f32(vector, 0)); + if (backend.float_literal_suffix) + res += "f"; + } + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + res += convert_to_string(c.scalar_f32(vector, i)); + if (backend.float_literal_suffix) + res += "f"; + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + + case SPIRType::Double: + if (splat) + { + res += convert_to_string(c.scalar_f64(vector, 0)); + if (backend.double_literal_suffix) + res += "lf"; + } + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + res += convert_to_string(c.scalar_f64(vector, i)); + if (backend.double_literal_suffix) + res += "lf"; + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + + case SPIRType::Int64: + if (splat) + { + res += convert_to_string(c.scalar_i64(vector, 0)); + if (backend.long_long_literal_suffix) + res += "ll"; + else + res += "l"; + } + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + res += convert_to_string(c.scalar_i64(vector, i)); + if (backend.long_long_literal_suffix) + res += "ll"; + else + res += "l"; + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + + case SPIRType::UInt64: + if (splat) + { + res += convert_to_string(c.scalar_u64(vector, 0)); + if (backend.long_long_literal_suffix) + res += "ull"; + else + res += "ul"; + } + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + res += convert_to_string(c.scalar_u64(vector, i)); + if (backend.long_long_literal_suffix) + res += "ull"; + else + res += "ul"; + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + + case SPIRType::UInt: + if (splat) + { + res += convert_to_string(c.scalar(vector, 0)); + if (backend.uint32_t_literal_suffix) + res += "u"; + } + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + res += convert_to_string(c.scalar(vector, i)); + if (backend.uint32_t_literal_suffix) + res += "u"; + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + + case SPIRType::Int: + if (splat) + res += convert_to_string(c.scalar_i32(vector, 0)); + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + res += convert_to_string(c.scalar_i32(vector, i)); + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + + case SPIRType::Boolean: + if (splat) + res += c.scalar(vector, 0) ? "true" : "false"; + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + res += c.scalar(vector, i) ? "true" : "false"; + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + + default: + SPIRV_CROSS_THROW("Invalid constant expression basetype."); + } + + if (c.vector_size() > 1) + res += ")"; + + return res; +} + +string CompilerGLSL::declare_temporary(uint32_t result_type, uint32_t result_id) +{ + auto &type = get(result_type); + auto flags = meta[result_id].decoration.decoration_flags; + + // If we're declaring temporaries inside continue blocks, + // we must declare the temporary in the loop header so that the continue block can avoid declaring new variables. + if (current_continue_block) + { + auto &header = get(current_continue_block->loop_dominator); + if (find_if(begin(header.declare_temporary), end(header.declare_temporary), + [result_type, result_id](const pair &tmp) { + return tmp.first == result_type && tmp.second == result_id; + }) == end(header.declare_temporary)) + { + header.declare_temporary.emplace_back(result_type, result_id); + force_recompile = true; + } + + return join(to_name(result_id), " = "); + } + else + { + // The result_id has not been made into an expression yet, so use flags interface. + return join(flags_to_precision_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), " = "); + } +} + +bool CompilerGLSL::expression_is_forwarded(uint32_t id) +{ + return forwarded_temporaries.find(id) != end(forwarded_temporaries); +} + +SPIRExpression &CompilerGLSL::emit_op(uint32_t result_type, uint32_t result_id, const string &rhs, bool forwarding, + bool suppress_usage_tracking) +{ + if (forwarding && (forced_temporaries.find(result_id) == end(forced_temporaries))) + { + // Just forward it without temporary. + // If the forward is trivial, we do not force flushing to temporary for this expression. + if (!suppress_usage_tracking) + forwarded_temporaries.insert(result_id); + + return set(result_id, rhs, result_type, true); + } + else + { + // If expression isn't immutable, bind it to a temporary and make the new temporary immutable (they always are). + statement(declare_temporary(result_type, result_id), rhs, ";"); + return set(result_id, to_name(result_id), result_type, true); + } +} + +void CompilerGLSL::emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op) +{ + bool forward = should_forward(op0); + emit_op(result_type, result_id, join(op, to_enclosed_expression(op0)), forward); + + if (forward && forced_temporaries.find(result_id) == end(forced_temporaries)) + inherit_expression_dependencies(result_id, op0); +} + +void CompilerGLSL::emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op) +{ + bool forward = should_forward(op0) && should_forward(op1); + emit_op(result_type, result_id, join(to_enclosed_expression(op0), " ", op, " ", to_enclosed_expression(op1)), + forward); + + if (forward && forced_temporaries.find(result_id) == end(forced_temporaries)) + { + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); + } +} + +SPIRType CompilerGLSL::binary_op_bitcast_helper(string &cast_op0, string &cast_op1, SPIRType::BaseType &input_type, + uint32_t op0, uint32_t op1, bool skip_cast_if_equal_type) +{ + auto &type0 = expression_type(op0); + auto &type1 = expression_type(op1); + + // We have to bitcast if our inputs are of different type, or if our types are not equal to expected inputs. + // For some functions like OpIEqual and INotEqual, we don't care if inputs are of different types than expected + // since equality test is exactly the same. + bool cast = (type0.basetype != type1.basetype) || (!skip_cast_if_equal_type && type0.basetype != input_type); + + // Create a fake type so we can bitcast to it. + // We only deal with regular arithmetic types here like int, uints and so on. + SPIRType expected_type; + expected_type.basetype = input_type; + expected_type.vecsize = type0.vecsize; + expected_type.columns = type0.columns; + expected_type.width = type0.width; + + if (cast) + { + cast_op0 = bitcast_glsl(expected_type, op0); + cast_op1 = bitcast_glsl(expected_type, op1); + } + else + { + // If we don't cast, our actual input type is that of the first (or second) argument. + cast_op0 = to_enclosed_expression(op0); + cast_op1 = to_enclosed_expression(op1); + input_type = type0.basetype; + } + + return expected_type; +} + +void CompilerGLSL::emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type) +{ + string cast_op0, cast_op1; + auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type); + auto &out_type = get(result_type); + + // We might have casted away from the result type, so bitcast again. + // For example, arithmetic right shift with uint inputs. + // Special case boolean outputs since relational opcodes output booleans instead of int/uint. + string expr; + if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean) + { + expected_type.basetype = input_type; + expr = bitcast_glsl_op(out_type, expected_type); + expr += '('; + expr += join(cast_op0, " ", op, " ", cast_op1); + expr += ')'; + } + else + expr += join(cast_op0, " ", op, " ", cast_op1); + + emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1)); +} + +void CompilerGLSL::emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op) +{ + bool forward = should_forward(op0); + emit_op(result_type, result_id, join(op, "(", to_expression(op0), ")"), forward); + if (forward && forced_temporaries.find(result_id) == end(forced_temporaries)) + inherit_expression_dependencies(result_id, op0); +} + +void CompilerGLSL::emit_binary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + const char *op) +{ + bool forward = should_forward(op0) && should_forward(op1); + emit_op(result_type, result_id, join(op, "(", to_expression(op0), ", ", to_expression(op1), ")"), forward); + + if (forward && forced_temporaries.find(result_id) == end(forced_temporaries)) + { + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); + } +} + +void CompilerGLSL::emit_binary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type) +{ + string cast_op0, cast_op1; + auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type); + auto &out_type = get(result_type); + + // Special case boolean outputs since relational opcodes output booleans instead of int/uint. + string expr; + if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean) + { + expected_type.basetype = input_type; + expr = bitcast_glsl_op(out_type, expected_type); + expr += '('; + expr += join(op, "(", cast_op0, ", ", cast_op1, ")"); + expr += ')'; + } + else + { + expr += join(op, "(", cast_op0, ", ", cast_op1, ")"); + } + + emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1)); +} + +void CompilerGLSL::emit_trinary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + uint32_t op2, const char *op) +{ + bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2); + emit_op(result_type, result_id, + join(op, "(", to_expression(op0), ", ", to_expression(op1), ", ", to_expression(op2), ")"), forward); + + if (forward && forced_temporaries.find(result_id) == end(forced_temporaries)) + { + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); + inherit_expression_dependencies(result_id, op2); + } +} + +void CompilerGLSL::emit_quaternary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + uint32_t op2, uint32_t op3, const char *op) +{ + bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3); + emit_op(result_type, result_id, join(op, "(", to_expression(op0), ", ", to_expression(op1), ", ", + to_expression(op2), ", ", to_expression(op3), ")"), + forward); + + if (forward && forced_temporaries.find(result_id) == end(forced_temporaries)) + { + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); + inherit_expression_dependencies(result_id, op2); + inherit_expression_dependencies(result_id, op3); + } +} + +string CompilerGLSL::legacy_tex_op(const std::string &op, const SPIRType &imgtype) +{ + const char *type; + switch (imgtype.image.dim) + { + case spv::Dim1D: + type = (imgtype.image.arrayed && !options.es) ? "1DArray" : "1D"; + break; + case spv::Dim2D: + type = (imgtype.image.arrayed && !options.es) ? "2DArray" : "2D"; + break; + case spv::Dim3D: + type = "3D"; + break; + case spv::DimCube: + type = "Cube"; + break; + case spv::DimBuffer: + type = "Buffer"; + break; + case spv::DimSubpassData: + type = "2D"; + break; + default: + type = ""; + break; + } + + if (op == "textureLod" || op == "textureProjLod") + { + if (is_legacy_es()) + require_extension("GL_EXT_shader_texture_lod"); + else if (is_legacy()) + require_extension("GL_ARB_shader_texture_lod"); + } + + if (op == "texture") + return join("texture", type); + else if (op == "textureLod") + return join("texture", type, is_legacy_es() ? "LodEXT" : "Lod"); + else if (op == "textureProj") + return join("texture", type, "Proj"); + else if (op == "textureProjLod") + return join("texture", type, is_legacy_es() ? "ProjLodEXT" : "ProjLod"); + else + { + SPIRV_CROSS_THROW(join("Unsupported legacy texture op: ", op)); + } +} + +bool CompilerGLSL::to_trivial_mix_op(const SPIRType &type, string &op, uint32_t left, uint32_t right, uint32_t lerp) +{ + auto *cleft = maybe_get(left); + auto *cright = maybe_get(right); + auto &lerptype = expression_type(lerp); + + // If our targets aren't constants, we cannot use construction. + if (!cleft || !cright) + return false; + + // If our targets are spec constants, we cannot use construction. + if (cleft->specialization || cright->specialization) + return false; + + // We can only use trivial construction if we have a scalar + // (should be possible to do it for vectors as well, but that is overkill for now). + if (lerptype.basetype != SPIRType::Boolean || lerptype.vecsize > 1) + return false; + + // If our bool selects between 0 and 1, we can cast from bool instead, making our trivial constructor. + bool ret = false; + switch (type.basetype) + { + case SPIRType::Int: + case SPIRType::UInt: + ret = cleft->scalar() == 0 && cright->scalar() == 1; + break; + + case SPIRType::Float: + ret = cleft->scalar_f32() == 0.0f && cright->scalar_f32() == 1.0f; + break; + + case SPIRType::Double: + ret = cleft->scalar_f64() == 0.0 && cright->scalar_f64() == 1.0; + break; + + case SPIRType::Int64: + case SPIRType::UInt64: + ret = cleft->scalar_u64() == 0 && cright->scalar_u64() == 1; + break; + + default: + break; + } + + if (ret) + op = type_to_glsl_constructor(type); + return ret; +} + +void CompilerGLSL::emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, uint32_t right, uint32_t lerp) +{ + auto &lerptype = expression_type(lerp); + auto &restype = get(result_type); + + string mix_op; + bool has_boolean_mix = (options.es && options.version >= 310) || (!options.es && options.version >= 450); + bool trivial_mix = to_trivial_mix_op(restype, mix_op, left, right, lerp); + + // If we can reduce the mix to a simple cast, do so. + // This helps for cases like int(bool), uint(bool) which is implemented with + // OpSelect bool 1 0. + if (trivial_mix) + { + emit_unary_func_op(result_type, id, lerp, mix_op.c_str()); + } + else if (!has_boolean_mix && lerptype.basetype == SPIRType::Boolean) + { + // Boolean mix not supported on desktop without extension. + // Was added in OpenGL 4.5 with ES 3.1 compat. + // + // Could use GL_EXT_shader_integer_mix on desktop at least, + // but Apple doesn't support it. :( + // Just implement it as ternary expressions. + string expr; + if (lerptype.vecsize == 1) + expr = join(to_enclosed_expression(lerp), " ? ", to_enclosed_expression(right), " : ", + to_enclosed_expression(left)); + else + { + auto swiz = [this](uint32_t expression, uint32_t i) { + return join(to_enclosed_expression(expression), ".", index_to_swizzle(i)); + }; + + expr = type_to_glsl_constructor(restype); + expr += "("; + for (uint32_t i = 0; i < restype.vecsize; i++) + { + expr += swiz(lerp, i); + expr += " ? "; + expr += swiz(right, i); + expr += " : "; + expr += swiz(left, i); + if (i + 1 < restype.vecsize) + expr += ", "; + } + expr += ")"; + } + + emit_op(result_type, id, expr, should_forward(left) && should_forward(right) && should_forward(lerp)); + } + else + emit_trinary_func_op(result_type, id, left, right, lerp, "mix"); +} + +string CompilerGLSL::to_combined_image_sampler(uint32_t image_id, uint32_t samp_id) +{ + auto &args = current_function->arguments; + + // For GLSL and ESSL targets, we must enumerate all possible combinations for sampler2D(texture2D, sampler) and redirect + // all possible combinations into new sampler2D uniforms. + auto *image = maybe_get_backing_variable(image_id); + auto *samp = maybe_get_backing_variable(samp_id); + if (image) + image_id = image->self; + if (samp) + samp_id = samp->self; + + auto image_itr = find_if(begin(args), end(args), + [image_id](const SPIRFunction::Parameter ¶m) { return param.id == image_id; }); + + auto sampler_itr = find_if(begin(args), end(args), + [samp_id](const SPIRFunction::Parameter ¶m) { return param.id == samp_id; }); + + if (image_itr != end(args) || sampler_itr != end(args)) + { + // If any parameter originates from a parameter, we will find it in our argument list. + bool global_image = image_itr == end(args); + bool global_sampler = sampler_itr == end(args); + uint32_t iid = global_image ? image_id : uint32_t(image_itr - begin(args)); + uint32_t sid = global_sampler ? samp_id : uint32_t(sampler_itr - begin(args)); + + auto &combined = current_function->combined_parameters; + auto itr = find_if(begin(combined), end(combined), [=](const SPIRFunction::CombinedImageSamplerParameter &p) { + return p.global_image == global_image && p.global_sampler == global_sampler && p.image_id == iid && + p.sampler_id == sid; + }); + + if (itr != end(combined)) + return to_expression(itr->id); + else + { + SPIRV_CROSS_THROW( + "Cannot find mapping for combined sampler parameter, was build_combined_image_samplers() used " + "before compile() was called?"); + } + } + else + { + // For global sampler2D, look directly at the global remapping table. + auto &mapping = combined_image_samplers; + auto itr = find_if(begin(mapping), end(mapping), [image_id, samp_id](const CombinedImageSampler &combined) { + return combined.image_id == image_id && combined.sampler_id == samp_id; + }); + + if (itr != end(combined_image_samplers)) + return to_expression(itr->combined_id); + else + { + SPIRV_CROSS_THROW("Cannot find mapping for combined sampler, was build_combined_image_samplers() used " + "before compile() was called?"); + } + } +} + +void CompilerGLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id) +{ + if (options.vulkan_semantics && combined_image_samplers.empty()) + { + emit_binary_func_op(result_type, result_id, image_id, samp_id, + type_to_glsl(get(result_type)).c_str()); + } + else + emit_op(result_type, result_id, to_combined_image_sampler(image_id, samp_id), true); +} + +void CompilerGLSL::emit_texture_op(const Instruction &i) +{ + auto ops = stream(i); + auto op = static_cast(i.op); + uint32_t length = i.length; + + if (i.offset + length > spirv.size()) + SPIRV_CROSS_THROW("Compiler::parse() opcode out of range."); + + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t img = ops[2]; + uint32_t coord = ops[3]; + uint32_t dref = 0; + uint32_t comp = 0; + bool gather = false; + bool proj = false; + const uint32_t *opt = nullptr; + + switch (op) + { + case OpImageSampleDrefImplicitLod: + case OpImageSampleDrefExplicitLod: + dref = ops[4]; + opt = &ops[5]; + length -= 5; + break; + + case OpImageSampleProjDrefImplicitLod: + case OpImageSampleProjDrefExplicitLod: + dref = ops[4]; + proj = true; + opt = &ops[5]; + length -= 5; + break; + + case OpImageDrefGather: + dref = ops[4]; + opt = &ops[5]; + gather = true; + length -= 5; + break; + + case OpImageGather: + comp = ops[4]; + opt = &ops[5]; + gather = true; + length -= 5; + break; + + case OpImageSampleProjImplicitLod: + case OpImageSampleProjExplicitLod: + opt = &ops[4]; + length -= 4; + proj = true; + break; + + default: + opt = &ops[4]; + length -= 4; + break; + } + + auto &imgtype = expression_type(img); + uint32_t coord_components = 0; + switch (imgtype.image.dim) + { + case spv::Dim1D: + coord_components = 1; + break; + case spv::Dim2D: + coord_components = 2; + break; + case spv::Dim3D: + coord_components = 3; + break; + case spv::DimCube: + coord_components = 3; + break; + case spv::DimBuffer: + coord_components = 1; + break; + default: + coord_components = 2; + break; + } + + if (proj) + coord_components++; + if (imgtype.image.arrayed) + coord_components++; + + uint32_t bias = 0; + uint32_t lod = 0; + uint32_t grad_x = 0; + uint32_t grad_y = 0; + uint32_t coffset = 0; + uint32_t offset = 0; + uint32_t coffsets = 0; + uint32_t sample = 0; + uint32_t flags = 0; + + if (length) + { + flags = opt[0]; + opt++; + length--; + } + + auto test = [&](uint32_t &v, uint32_t flag) { + if (length && (flags & flag)) + { + v = *opt++; + length--; + } + }; + + test(bias, ImageOperandsBiasMask); + test(lod, ImageOperandsLodMask); + test(grad_x, ImageOperandsGradMask); + test(grad_y, ImageOperandsGradMask); + test(coffset, ImageOperandsConstOffsetMask); + test(offset, ImageOperandsOffsetMask); + test(coffsets, ImageOperandsConstOffsetsMask); + test(sample, ImageOperandsSampleMask); + + string expr; + string texop; + + if (op == OpImageFetch) + texop += "texelFetch"; + else + { + texop += "texture"; + + if (gather) + texop += "Gather"; + if (coffsets) + texop += "Offsets"; + if (proj) + texop += "Proj"; + if (grad_x || grad_y) + texop += "Grad"; + if (lod) + texop += "Lod"; + } + + if (coffset || offset) + texop += "Offset"; + + if (is_legacy()) + texop = legacy_tex_op(texop, imgtype); + + expr += texop; + expr += "("; + expr += to_expression(img); + + bool swizz_func = backend.swizzle_is_function; + auto swizzle = [swizz_func](uint32_t comps, uint32_t in_comps) -> const char * { + if (comps == in_comps) + return ""; + + switch (comps) + { + case 1: + return ".x"; + case 2: + return swizz_func ? ".xy()" : ".xy"; + case 3: + return swizz_func ? ".xyz()" : ".xyz"; + default: + return ""; + } + }; + + bool forward = should_forward(coord); + + // The IR can give us more components than we need, so chop them off as needed. + auto swizzle_expr = swizzle(coord_components, expression_type(coord).vecsize); + // Only enclose the UV expression if needed. + auto coord_expr = (*swizzle_expr == '\0') ? to_expression(coord) : (to_enclosed_expression(coord) + swizzle_expr); + + // TODO: implement rest ... A bit intensive. + + if (dref) + { + forward = forward && should_forward(dref); + + // SPIR-V splits dref and coordinate. + if (coord_components == 4) // GLSL also splits the arguments in two. + { + expr += ", "; + expr += to_expression(coord); + expr += ", "; + expr += to_expression(dref); + } + else + { + // Create a composite which merges coord/dref into a single vector. + auto type = expression_type(coord); + type.vecsize = coord_components + 1; + expr += ", "; + expr += type_to_glsl_constructor(type); + expr += "("; + expr += coord_expr; + expr += ", "; + expr += to_expression(dref); + expr += ")"; + } + } + else + { + expr += ", "; + expr += coord_expr; + } + + if (grad_x || grad_y) + { + forward = forward && should_forward(grad_x); + forward = forward && should_forward(grad_y); + expr += ", "; + expr += to_expression(grad_x); + expr += ", "; + expr += to_expression(grad_y); + } + + if (lod) + { + forward = forward && should_forward(lod); + expr += ", "; + expr += to_expression(lod); + } + + if (coffset) + { + forward = forward && should_forward(coffset); + expr += ", "; + expr += to_expression(coffset); + } + else if (offset) + { + forward = forward && should_forward(offset); + expr += ", "; + expr += to_expression(offset); + } + + if (bias) + { + forward = forward && should_forward(bias); + expr += ", "; + expr += to_expression(bias); + } + + if (comp) + { + forward = forward && should_forward(comp); + expr += ", "; + expr += to_expression(comp); + } + + if (sample) + { + expr += ", "; + expr += to_expression(sample); + } + + expr += ")"; + + emit_op(result_type, id, expr, forward); +} + +void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t) +{ + GLSLstd450 op = static_cast(eop); + + switch (op) + { + // FP fiddling + case GLSLstd450Round: + emit_unary_func_op(result_type, id, args[0], "round"); + break; + + case GLSLstd450RoundEven: + if ((options.es && options.version >= 300) || (!options.es && options.version >= 130)) + emit_unary_func_op(result_type, id, args[0], "roundEven"); + else + SPIRV_CROSS_THROW("roundEven supported only in ESSL 300 and GLSL 130 and up."); + break; + + case GLSLstd450Trunc: + emit_unary_func_op(result_type, id, args[0], "trunc"); + break; + case GLSLstd450SAbs: + case GLSLstd450FAbs: + emit_unary_func_op(result_type, id, args[0], "abs"); + break; + case GLSLstd450SSign: + case GLSLstd450FSign: + emit_unary_func_op(result_type, id, args[0], "sign"); + break; + case GLSLstd450Floor: + emit_unary_func_op(result_type, id, args[0], "floor"); + break; + case GLSLstd450Ceil: + emit_unary_func_op(result_type, id, args[0], "ceil"); + break; + case GLSLstd450Fract: + emit_unary_func_op(result_type, id, args[0], "fract"); + break; + case GLSLstd450Radians: + emit_unary_func_op(result_type, id, args[0], "radians"); + break; + case GLSLstd450Degrees: + emit_unary_func_op(result_type, id, args[0], "degrees"); + break; + case GLSLstd450Fma: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "fma"); + break; + case GLSLstd450Modf: + register_call_out_argument(args[1]); + forced_temporaries.insert(id); + emit_binary_func_op(result_type, id, args[0], args[1], "modf"); + break; + + // Minmax + case GLSLstd450FMin: + case GLSLstd450UMin: + case GLSLstd450SMin: + emit_binary_func_op(result_type, id, args[0], args[1], "min"); + break; + case GLSLstd450FMax: + case GLSLstd450UMax: + case GLSLstd450SMax: + emit_binary_func_op(result_type, id, args[0], args[1], "max"); + break; + case GLSLstd450FClamp: + case GLSLstd450UClamp: + case GLSLstd450SClamp: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "clamp"); + break; + + // Trig + case GLSLstd450Sin: + emit_unary_func_op(result_type, id, args[0], "sin"); + break; + case GLSLstd450Cos: + emit_unary_func_op(result_type, id, args[0], "cos"); + break; + case GLSLstd450Tan: + emit_unary_func_op(result_type, id, args[0], "tan"); + break; + case GLSLstd450Asin: + emit_unary_func_op(result_type, id, args[0], "asin"); + break; + case GLSLstd450Acos: + emit_unary_func_op(result_type, id, args[0], "acos"); + break; + case GLSLstd450Atan: + emit_unary_func_op(result_type, id, args[0], "atan"); + break; + case GLSLstd450Sinh: + emit_unary_func_op(result_type, id, args[0], "sinh"); + break; + case GLSLstd450Cosh: + emit_unary_func_op(result_type, id, args[0], "cosh"); + break; + case GLSLstd450Tanh: + emit_unary_func_op(result_type, id, args[0], "tanh"); + break; + case GLSLstd450Asinh: + emit_unary_func_op(result_type, id, args[0], "asinh"); + break; + case GLSLstd450Acosh: + emit_unary_func_op(result_type, id, args[0], "acosh"); + break; + case GLSLstd450Atanh: + emit_unary_func_op(result_type, id, args[0], "atanh"); + break; + case GLSLstd450Atan2: + emit_binary_func_op(result_type, id, args[0], args[1], "atan"); + break; + + // Exponentials + case GLSLstd450Pow: + emit_binary_func_op(result_type, id, args[0], args[1], "pow"); + break; + case GLSLstd450Exp: + emit_unary_func_op(result_type, id, args[0], "exp"); + break; + case GLSLstd450Log: + emit_unary_func_op(result_type, id, args[0], "log"); + break; + case GLSLstd450Exp2: + emit_unary_func_op(result_type, id, args[0], "exp2"); + break; + case GLSLstd450Log2: + emit_unary_func_op(result_type, id, args[0], "log2"); + break; + case GLSLstd450Sqrt: + emit_unary_func_op(result_type, id, args[0], "sqrt"); + break; + case GLSLstd450InverseSqrt: + emit_unary_func_op(result_type, id, args[0], "inversesqrt"); + break; + + // Matrix math + case GLSLstd450Determinant: + emit_unary_func_op(result_type, id, args[0], "determinant"); + break; + case GLSLstd450MatrixInverse: + emit_unary_func_op(result_type, id, args[0], "inverse"); + break; + + // Lerping + case GLSLstd450FMix: + case GLSLstd450IMix: + { + emit_mix_op(result_type, id, args[0], args[1], args[2]); + break; + } + case GLSLstd450Step: + emit_binary_func_op(result_type, id, args[0], args[1], "step"); + break; + case GLSLstd450SmoothStep: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "smoothstep"); + break; + + // Packing + case GLSLstd450Frexp: + register_call_out_argument(args[1]); + forced_temporaries.insert(id); + emit_binary_func_op(result_type, id, args[0], args[1], "frexp"); + break; + case GLSLstd450Ldexp: + emit_binary_func_op(result_type, id, args[0], args[1], "ldexp"); + break; + case GLSLstd450PackSnorm4x8: + emit_unary_func_op(result_type, id, args[0], "packSnorm4x8"); + break; + case GLSLstd450PackUnorm4x8: + emit_unary_func_op(result_type, id, args[0], "packUnorm4x8"); + break; + case GLSLstd450PackSnorm2x16: + emit_unary_func_op(result_type, id, args[0], "packSnorm2x16"); + break; + case GLSLstd450PackUnorm2x16: + emit_unary_func_op(result_type, id, args[0], "packUnorm2x16"); + break; + case GLSLstd450PackHalf2x16: + emit_unary_func_op(result_type, id, args[0], "packHalf2x16"); + break; + case GLSLstd450UnpackSnorm4x8: + emit_unary_func_op(result_type, id, args[0], "unpackSnorm4x8"); + break; + case GLSLstd450UnpackUnorm4x8: + emit_unary_func_op(result_type, id, args[0], "unpackUnorm4x8"); + break; + case GLSLstd450UnpackSnorm2x16: + emit_unary_func_op(result_type, id, args[0], "unpackSnorm2x16"); + break; + case GLSLstd450UnpackUnorm2x16: + emit_unary_func_op(result_type, id, args[0], "unpackUnorm2x16"); + break; + case GLSLstd450UnpackHalf2x16: + emit_unary_func_op(result_type, id, args[0], "unpackHalf2x16"); + break; + + case GLSLstd450PackDouble2x32: + emit_unary_func_op(result_type, id, args[0], "packDouble2x32"); + break; + case GLSLstd450UnpackDouble2x32: + emit_unary_func_op(result_type, id, args[0], "unpackDouble2x32"); + break; + + // Vector math + case GLSLstd450Length: + emit_unary_func_op(result_type, id, args[0], "length"); + break; + case GLSLstd450Distance: + emit_binary_func_op(result_type, id, args[0], args[1], "distance"); + break; + case GLSLstd450Cross: + emit_binary_func_op(result_type, id, args[0], args[1], "cross"); + break; + case GLSLstd450Normalize: + emit_unary_func_op(result_type, id, args[0], "normalize"); + break; + case GLSLstd450FaceForward: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "faceforward"); + break; + case GLSLstd450Reflect: + emit_binary_func_op(result_type, id, args[0], args[1], "reflect"); + break; + case GLSLstd450Refract: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "refract"); + break; + + // Bit-fiddling + case GLSLstd450FindILsb: + emit_unary_func_op(result_type, id, args[0], "findLSB"); + break; + case GLSLstd450FindSMsb: + case GLSLstd450FindUMsb: + emit_unary_func_op(result_type, id, args[0], "findMSB"); + break; + + // Multisampled varying + case GLSLstd450InterpolateAtCentroid: + emit_unary_func_op(result_type, id, args[0], "interpolateAtCentroid"); + break; + case GLSLstd450InterpolateAtSample: + emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtSample"); + break; + case GLSLstd450InterpolateAtOffset: + emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtOffset"); + break; + + default: + statement("// unimplemented GLSL op ", eop); + break; + } +} + +string CompilerGLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type) +{ + if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Int) + return type_to_glsl(out_type); + else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Int64) + return type_to_glsl(out_type); + else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Float) + return "floatBitsToUint"; + else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::UInt) + return type_to_glsl(out_type); + else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::UInt64) + return type_to_glsl(out_type); + else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Float) + return "floatBitsToInt"; + else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::UInt) + return "uintBitsToFloat"; + else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::Int) + return "intBitsToFloat"; + else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Double) + return "doubleBitsToInt64"; + else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Double) + return "doubleBitsToUint64"; + else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::Int64) + return "int64BitsToDouble"; + else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::UInt64) + return "uint64BitsToDouble"; + else + return ""; +} + +string CompilerGLSL::bitcast_glsl(const SPIRType &result_type, uint32_t argument) +{ + auto op = bitcast_glsl_op(result_type, expression_type(argument)); + if (op.empty()) + return to_enclosed_expression(argument); + else + return join(op, "(", to_expression(argument), ")"); +} + +string CompilerGLSL::builtin_to_glsl(BuiltIn builtin) +{ + switch (builtin) + { + case BuiltInPosition: + return "gl_Position"; + case BuiltInPointSize: + return "gl_PointSize"; + case BuiltInVertexId: + if (options.vulkan_semantics) + SPIRV_CROSS_THROW( + "Cannot implement gl_VertexID in Vulkan GLSL. This shader was created with GL semantics."); + return "gl_VertexID"; + case BuiltInInstanceId: + if (options.vulkan_semantics) + SPIRV_CROSS_THROW( + "Cannot implement gl_InstanceID in Vulkan GLSL. This shader was created with GL semantics."); + return "gl_InstanceID"; + case BuiltInVertexIndex: + if (options.vulkan_semantics) + return "gl_VertexIndex"; + else + return "gl_VertexID"; // gl_VertexID already has the base offset applied. + case BuiltInInstanceIndex: + if (options.vulkan_semantics) + return "gl_InstanceIndex"; + else + return "(gl_InstanceID + SPIRV_Cross_BaseInstance)"; // ... but not gl_InstanceID. + case BuiltInPrimitiveId: + return "gl_PrimitiveID"; + case BuiltInInvocationId: + return "gl_InvocationID"; + case BuiltInLayer: + return "gl_Layer"; + case BuiltInTessLevelOuter: + return "gl_TessLevelOuter"; + case BuiltInTessLevelInner: + return "gl_TessLevelInner"; + case BuiltInTessCoord: + return "gl_TessCoord"; + case BuiltInFragCoord: + return "gl_FragCoord"; + case BuiltInPointCoord: + return "gl_PointCoord"; + case BuiltInFrontFacing: + return "gl_FrontFacing"; + case BuiltInFragDepth: + return "gl_FragDepth"; + case BuiltInNumWorkgroups: + return "gl_NumWorkGroups"; + case BuiltInWorkgroupSize: + return "gl_WorkGroupSize"; + case BuiltInWorkgroupId: + return "gl_WorkGroupID"; + case BuiltInLocalInvocationId: + return "gl_LocalInvocationID"; + case BuiltInGlobalInvocationId: + return "gl_GlobalInvocationID"; + case BuiltInLocalInvocationIndex: + return "gl_LocalInvocationIndex"; + default: + return "gl_???"; + } +} + +const char *CompilerGLSL::index_to_swizzle(uint32_t index) +{ + switch (index) + { + case 0: + return "x"; + case 1: + return "y"; + case 2: + return "z"; + case 3: + return "w"; + default: + SPIRV_CROSS_THROW("Swizzle index out of range"); + } +} + +string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32_t count, bool index_is_literal, + bool chain_only) +{ + string expr; + if (!chain_only) + expr = to_enclosed_expression(base); + + const auto *type = &expression_type(base); + + // For resolving array accesses, etc, keep a local copy for poking. + SPIRType temp; + + bool access_chain_is_arrayed = false; + bool row_major_matrix_needs_conversion = is_non_native_row_major_matrix(base); + + for (uint32_t i = 0; i < count; i++) + { + uint32_t index = indices[i]; + + // Arrays + if (!type->array.empty()) + { + expr += "["; + if (index_is_literal) + expr += convert_to_string(index); + else + expr += to_expression(index); + expr += "]"; + + // We have to modify the type, so keep a local copy. + if (&temp != type) + temp = *type; + type = &temp; + temp.array.pop_back(); + + access_chain_is_arrayed = true; + } + // For structs, the index refers to a constant, which indexes into the members. + // We also check if this member is a builtin, since we then replace the entire expression with the builtin one. + else if (type->basetype == SPIRType::Struct) + { + if (!index_is_literal) + index = get(index).scalar(); + + if (index >= type->member_types.size()) + SPIRV_CROSS_THROW("Member index is out of bounds!"); + + BuiltIn builtin; + if (is_member_builtin(*type, index, &builtin)) + { + // FIXME: We rely here on OpName on gl_in/gl_out to make this work properly. + // To make this properly work by omitting all OpName opcodes, + // we need to infer gl_in or gl_out based on the builtin, and stage. + if (access_chain_is_arrayed) + { + expr += "."; + expr += builtin_to_glsl(builtin); + } + else + expr = builtin_to_glsl(builtin); + } + else + { + expr += "."; + expr += to_member_name(*type, index); + } + row_major_matrix_needs_conversion = member_is_non_native_row_major_matrix(*type, index); + type = &get(type->member_types[index]); + } + // Matrix -> Vector + else if (type->columns > 1) + { + if (row_major_matrix_needs_conversion) + { + expr = convert_row_major_matrix(expr); + row_major_matrix_needs_conversion = false; + } + + expr += "["; + if (index_is_literal) + expr += convert_to_string(index); + else + expr += to_expression(index); + expr += "]"; + + // We have to modify the type, so keep a local copy. + if (&temp != type) + temp = *type; + type = &temp; + temp.columns = 1; + } + // Vector -> Scalar + else if (type->vecsize > 1) + { + if (index_is_literal) + { + expr += "."; + expr += index_to_swizzle(index); + } + else if (ids[index].get_type() == TypeConstant) + { + auto &c = get(index); + expr += "."; + expr += index_to_swizzle(c.scalar()); + } + else + { + expr += "["; + expr += to_expression(index); + expr += "]"; + } + + // We have to modify the type, so keep a local copy. + if (&temp != type) + temp = *type; + type = &temp; + temp.vecsize = 1; + } + else + SPIRV_CROSS_THROW("Cannot subdivide a scalar value!"); + } + + return expr; +} + +bool CompilerGLSL::should_forward(uint32_t id) +{ + // Immutable expression can always be forwarded. + // If not immutable, we can speculate about it by forwarding potentially mutable variables. + auto *var = maybe_get(id); + bool forward = var ? var->forwardable : false; + return (is_immutable(id) || forward) && !options.force_temporary; +} + +void CompilerGLSL::track_expression_read(uint32_t id) +{ + // If we try to read a forwarded temporary more than once we will stamp out possibly complex code twice. + // In this case, it's better to just bind the complex expression to the temporary and read that temporary twice. + if (expression_is_forwarded(id)) + { + auto &v = expression_usage_counts[id]; + v++; + + if (v >= 2) + { + //if (v == 2) + // fprintf(stderr, "ID %u was forced to temporary due to more than 1 expression use!\n", id); + + forced_temporaries.insert(id); + // Force a recompile after this pass to avoid forwarding this variable. + force_recompile = true; + } + } +} + +bool CompilerGLSL::args_will_forward(uint32_t id, const uint32_t *args, uint32_t num_args, bool pure) +{ + if (forced_temporaries.find(id) != end(forced_temporaries)) + return false; + + for (uint32_t i = 0; i < num_args; i++) + if (!should_forward(args[i])) + return false; + + // We need to forward globals as well. + if (!pure) + { + for (auto global : global_variables) + if (!should_forward(global)) + return false; + for (auto aliased : aliased_variables) + if (!should_forward(aliased)) + return false; + } + + return true; +} + +void CompilerGLSL::register_impure_function_call() +{ + // Impure functions can modify globals and aliased variables, so invalidate them as well. + for (auto global : global_variables) + flush_dependees(get(global)); + for (auto aliased : aliased_variables) + flush_dependees(get(aliased)); +} + +void CompilerGLSL::register_call_out_argument(uint32_t id) +{ + register_write(id); + + auto *var = maybe_get(id); + if (var) + flush_variable_declaration(var->self); +} + +void CompilerGLSL::flush_variable_declaration(uint32_t id) +{ + auto *var = maybe_get(id); + if (var && var->deferred_declaration) + { + statement(variable_decl(*var), ";"); + var->deferred_declaration = false; + } +} + +bool CompilerGLSL::remove_duplicate_swizzle(string &op) +{ + auto pos = op.find_last_of('.'); + if (pos == string::npos || pos == 0) + return false; + + string final_swiz = op.substr(pos + 1, string::npos); + + if (backend.swizzle_is_function) + { + if (final_swiz.size() < 2) + return false; + + if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()") + final_swiz.erase(final_swiz.size() - 2, string::npos); + else + return false; + } + + // Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar. + // If so, and previous swizzle is of same length, + // we can drop the final swizzle altogether. + for (uint32_t i = 0; i < final_swiz.size(); i++) + { + static const char expected[] = { 'x', 'y', 'z', 'w' }; + if (i >= 4 || final_swiz[i] != expected[i]) + return false; + } + + auto prevpos = op.find_last_of('.', pos - 1); + if (prevpos == string::npos) + return false; + + prevpos++; + + // Make sure there are only swizzles here ... + for (auto i = prevpos; i < pos; i++) + { + if (op[i] < 'w' || op[i] > 'z') + { + // If swizzles are foo.xyz() like in C++ backend for example, check for that. + if (backend.swizzle_is_function && i + 2 == pos && op[i] == '(' && op[i + 1] == ')') + break; + return false; + } + } + + // If original swizzle is large enough, just carve out the components we need. + // E.g. foobar.wyx.xy will turn into foobar.wy. + if (pos - prevpos >= final_swiz.size()) + { + op.erase(prevpos + final_swiz.size(), string::npos); + + // Add back the function call ... + if (backend.swizzle_is_function) + op += "()"; + } + return true; +} + +// Optimizes away vector swizzles where we have something like +// vec3 foo; +// foo.xyz <-- swizzle expression does nothing. +// This is a very common pattern after OpCompositeCombine. +bool CompilerGLSL::remove_unity_swizzle(uint32_t base, string &op) +{ + auto pos = op.find_last_of('.'); + if (pos == string::npos || pos == 0) + return false; + + string final_swiz = op.substr(pos + 1, string::npos); + + if (backend.swizzle_is_function) + { + if (final_swiz.size() < 2) + return false; + + if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()") + final_swiz.erase(final_swiz.size() - 2, string::npos); + else + return false; + } + + // Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar. + // If so, and previous swizzle is of same length, + // we can drop the final swizzle altogether. + for (uint32_t i = 0; i < final_swiz.size(); i++) + { + static const char expected[] = { 'x', 'y', 'z', 'w' }; + if (i >= 4 || final_swiz[i] != expected[i]) + return false; + } + + auto &type = expression_type(base); + + // Sanity checking ... + assert(type.columns == 1 && type.array.empty()); + + if (type.vecsize == final_swiz.size()) + op.erase(pos, string::npos); + return true; +} + +string CompilerGLSL::build_composite_combiner(const uint32_t *elems, uint32_t length) +{ + uint32_t base = 0; + bool swizzle_optimization = false; + string op; + string subop; + + for (uint32_t i = 0; i < length; i++) + { + auto *e = maybe_get(elems[i]); + + // If we're merging another scalar which belongs to the same base + // object, just merge the swizzles to avoid triggering more than 1 expression read as much as possible! + if (e && e->base_expression && e->base_expression == base) + { + // Only supposed to be used for vector swizzle -> scalar. + assert(!e->expression.empty() && e->expression.front() == '.'); + subop += e->expression.substr(1, string::npos); + swizzle_optimization = true; + } + else + { + // We'll likely end up with duplicated swizzles, e.g. + // foobar.xyz.xyz from patterns like + // OpVectorSwizzle + // OpCompositeExtract x 3 + // OpCompositeConstruct 3x + other scalar. + // Just modify op in-place. + if (swizzle_optimization) + { + if (backend.swizzle_is_function) + subop += "()"; + + // Don't attempt to remove unity swizzling if we managed to remove duplicate swizzles. + // The base "foo" might be vec4, while foo.xyz is vec3 (OpVectorShuffle) and looks like a vec3 due to the .xyz tacked on. + // We only want to remove the swizzles if we're certain that the resulting base will be the same vecsize. + // Essentially, we can only remove one set of swizzles, since that's what we have control over ... + // Case 1: + // foo.yxz.xyz: Duplicate swizzle kicks in, giving foo.yxz, we are done. + // foo.yxz was the result of OpVectorShuffle and we don't know the type of foo. + // Case 2: + // foo.xyz: Duplicate swizzle won't kick in. + // If foo is vec3, we can remove xyz, giving just foo. + if (!remove_duplicate_swizzle(subop)) + remove_unity_swizzle(base, subop); + + // Strips away redundant parens if we created them during component extraction. + strip_enclosed_expression(subop); + swizzle_optimization = false; + op += subop; + } + else + op += subop; + + if (i) + op += ", "; + subop = to_expression(elems[i]); + } + + base = e ? e->base_expression : 0; + } + + if (swizzle_optimization) + { + if (backend.swizzle_is_function) + subop += "()"; + + if (!remove_duplicate_swizzle(subop)) + remove_unity_swizzle(base, subop); + // Strips away redundant parens if we created them during component extraction. + strip_enclosed_expression(subop); + } + + op += subop; + return op; +} + +bool CompilerGLSL::skip_argument(uint32_t id) const +{ + if (!combined_image_samplers.empty() || !options.vulkan_semantics) + { + auto &type = expression_type(id); + if (type.basetype == SPIRType::Sampler || (type.basetype == SPIRType::Image && type.image.sampled == 1)) + return true; + } + return false; +} + +bool CompilerGLSL::optimize_read_modify_write(const string &lhs, const string &rhs) +{ + // Do this with strings because we have a very clear pattern we can check for and it avoids + // adding lots of special cases to the code emission. + if (rhs.size() < lhs.size() + 3) + return false; + + auto index = rhs.find(lhs); + if (index != 0) + return false; + + // TODO: Shift operators, but it's not important for now. + auto op = rhs.find_first_of("+-/*%|&^", lhs.size() + 1); + if (op != lhs.size() + 1) + return false; + + char bop = rhs[op]; + auto expr = rhs.substr(lhs.size() + 3); + // Try to find increments and decrements. Makes it look neater as += 1, -= 1 is fairly rare to see in real code. + // Find some common patterns which are equivalent. + if ((bop == '+' || bop == '-') && (expr == "1" || expr == "uint(1)" || expr == "1u" || expr == "int(1u)")) + statement(lhs, bop, bop, ";"); + else + statement(lhs, " ", bop, "= ", expr, ";"); + return true; +} + +void CompilerGLSL::emit_instruction(const Instruction &instruction) +{ + auto ops = stream(instruction); + auto opcode = static_cast(instruction.op); + uint32_t length = instruction.length; + +#define BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op) +#define BOP_CAST(op, type) \ + emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode)) +#define UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op) +#define QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op) +#define TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op) +#define BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op) +#define BFOP_CAST(op, type) \ + emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode)) +#define BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op) +#define UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op) + + switch (opcode) + { + // Dealing with memory + case OpLoad: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t ptr = ops[2]; + + flush_variable_declaration(ptr); + + // If we're loading from memory that cannot be changed by the shader, + // just forward the expression directly to avoid needless temporaries. + // If an expression is mutable and forwardable, we speculate that it is immutable. + bool forward = should_forward(ptr) && forced_temporaries.find(id) == end(forced_temporaries); + + // If loading a non-native row-major matrix, convert it to column-major + auto expr = to_expression(ptr); + if (is_non_native_row_major_matrix(ptr)) + expr = convert_row_major_matrix(expr); + + // Suppress usage tracking since using same expression multiple times does not imply any extra work. + emit_op(result_type, id, expr, forward, true); + register_read(id, ptr, forward); + break; + } + + case OpInBoundsAccessChain: + case OpAccessChain: + { + auto *var = maybe_get(ops[2]); + if (var) + flush_variable_declaration(var->self); + + // If the base is immutable, the access chain pointer must also be. + // If an expression is mutable and forwardable, we speculate that it is immutable. + auto e = access_chain(ops[2], &ops[3], length - 3, false); + auto &expr = set(ops[1], move(e), ops[0], should_forward(ops[2])); + expr.loaded_from = ops[2]; + break; + } + + case OpStore: + { + auto *var = maybe_get(ops[0]); + + if (var && var->statically_assigned) + var->static_expression = ops[1]; + else if (var && var->loop_variable && !var->loop_variable_enable) + var->static_expression = ops[1]; + else + { + auto lhs = to_expression(ops[0]); + auto rhs = to_expression(ops[1]); + + // It is possible with OpLoad/OpCompositeInsert/OpStore that we get = . + // For this case, we don't need to invalidate anything and emit any opcode. + if (lhs != rhs) + { + // Tries to optimize assignments like " = op expr". + // While this is purely cosmetic, this is important for legacy ESSL where loop + // variable increments must be in either i++ or i += const-expr. + // Without this, we end up with i = i + 1, which is correct GLSL, but not correct GLES 2.0. + if (!optimize_read_modify_write(lhs, rhs)) + statement(lhs, " = ", rhs, ";"); + register_write(ops[0]); + } + } + break; + } + + case OpArrayLength: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + auto e = access_chain(ops[2], &ops[3], length - 3, true); + set(id, e + ".length()", result_type, true); + break; + } + + // Function calls + case OpFunctionCall: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t func = ops[2]; + const auto *arg = &ops[3]; + length -= 3; + + auto &callee = get(func); + bool pure = function_is_pure(callee); + + bool callee_has_out_variables = false; + + // Invalidate out variables passed to functions since they can be OpStore'd to. + for (uint32_t i = 0; i < length; i++) + { + if (callee.arguments[i].write_count) + { + register_call_out_argument(arg[i]); + callee_has_out_variables = true; + } + + flush_variable_declaration(arg[i]); + } + + if (!pure) + register_impure_function_call(); + + string funexpr; + vector arglist; + funexpr += to_name(func) + "("; + for (uint32_t i = 0; i < length; i++) + { + // Do not pass in separate images or samplers if we're remapping + // to combined image samplers. + if (skip_argument(arg[i])) + continue; + + arglist.push_back(to_func_call_arg(arg[i])); + } + + for (auto &combined : callee.combined_parameters) + { + uint32_t image_id = combined.global_image ? combined.image_id : arg[combined.image_id]; + uint32_t sampler_id = combined.global_sampler ? combined.sampler_id : arg[combined.sampler_id]; + + auto *image = maybe_get_backing_variable(image_id); + if (image) + image_id = image->self; + + auto *samp = maybe_get_backing_variable(sampler_id); + if (samp) + sampler_id = samp->self; + + arglist.push_back(to_combined_image_sampler(image_id, sampler_id)); + } + + append_global_func_args(callee, length, arglist); + + funexpr += merge(arglist); + funexpr += ")"; + + // Check for function call constraints. + check_function_call_constraints(arg, length); + + if (get(result_type).basetype != SPIRType::Void) + { + // If the function actually writes to an out variable, + // take the conservative route and do not forward. + // The problem is that we might not read the function + // result (and emit the function) before an out variable + // is read (common case when return value is ignored! + // In order to avoid start tracking invalid variables, + // just avoid the forwarding problem altogether. + bool forward = args_will_forward(id, arg, length, pure) && !callee_has_out_variables && pure && + (forced_temporaries.find(id) == end(forced_temporaries)); + + emit_op(result_type, id, funexpr, forward); + + // Function calls are implicit loads from all variables in question. + // Set dependencies for them. + for (uint32_t i = 0; i < length; i++) + register_read(id, arg[i], forward); + + // If we're going to forward the temporary result, + // put dependencies on every variable that must not change. + if (forward) + register_global_read_dependencies(callee, id); + } + else + statement(funexpr, ";"); + + break; + } + + // Composite munging + case OpCompositeConstruct: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + const auto *elems = &ops[2]; + length -= 2; + + if (!length) + SPIRV_CROSS_THROW("Invalid input to OpCompositeConstruct."); + + bool forward = true; + for (uint32_t i = 0; i < length; i++) + forward = forward && should_forward(elems[i]); + + auto &in_type = expression_type(elems[0]); + auto &out_type = get(result_type); + + // Only splat if we have vector constructors. + // Arrays and structs must be initialized properly in full. + bool composite = !out_type.array.empty() || out_type.basetype == SPIRType::Struct; + bool splat = in_type.vecsize == 1 && in_type.columns == 1 && !composite; + + if (splat) + { + uint32_t input = elems[0]; + for (uint32_t i = 0; i < length; i++) + if (input != elems[i]) + splat = false; + } + + string constructor_op; + if (backend.use_initializer_list && composite) + { + // Only use this path if we are building composites. + // This path cannot be used for arithmetic. + constructor_op += "{ "; + if (splat) + constructor_op += to_expression(elems[0]); + else + constructor_op += build_composite_combiner(elems, length); + constructor_op += " }"; + } + else + { + constructor_op = type_to_glsl_constructor(get(result_type)) + "("; + if (splat) + constructor_op += to_expression(elems[0]); + else + constructor_op += build_composite_combiner(elems, length); + constructor_op += ")"; + } + + emit_op(result_type, id, constructor_op, forward); + break; + } + + case OpVectorInsertDynamic: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t vec = ops[2]; + uint32_t comp = ops[3]; + uint32_t index = ops[4]; + + flush_variable_declaration(vec); + + // Make a copy, then use access chain to store the variable. + statement(declare_temporary(result_type, id), to_expression(vec), ";"); + set(id, to_name(id), result_type, true); + auto chain = access_chain(id, &index, 1, false); + statement(chain, " = ", to_expression(comp), ";"); + break; + } + + case OpVectorExtractDynamic: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + auto expr = access_chain(ops[2], &ops[3], 1, false); + emit_op(result_type, id, expr, should_forward(ops[2])); + break; + } + + case OpCompositeExtract: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + length -= 3; + + auto &type = get(result_type); + + // We can only split the expression here if our expression is forwarded as a temporary. + bool allow_base_expression = forced_temporaries.find(id) == end(forced_temporaries); + + // Only apply this optimization if result is scalar. + if (allow_base_expression && should_forward(ops[2]) && type.vecsize == 1 && type.columns == 1 && length == 1) + { + // We want to split the access chain from the base. + // This is so we can later combine different CompositeExtract results + // with CompositeConstruct without emitting code like + // + // vec3 temp = texture(...).xyz + // vec4(temp.x, temp.y, temp.z, 1.0). + // + // when we actually wanted to emit this + // vec4(texture(...).xyz, 1.0). + // + // Including the base will prevent this and would trigger multiple reads + // from expression causing it to be forced to an actual temporary in GLSL. + auto expr = access_chain(ops[2], &ops[3], length, true, true); + auto &e = emit_op(result_type, id, expr, true, !expression_is_forwarded(ops[2])); + e.base_expression = ops[2]; + } + else + { + auto expr = access_chain(ops[2], &ops[3], length, true); + emit_op(result_type, id, expr, should_forward(ops[2]), !expression_is_forwarded(ops[2])); + } + break; + } + + case OpCompositeInsert: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t obj = ops[2]; + uint32_t composite = ops[3]; + const auto *elems = &ops[4]; + length -= 4; + + flush_variable_declaration(composite); + + auto *expr = maybe_get(id); + if ((expr && expr->used_while_invalidated) || !should_forward(composite)) + { + // Make a copy, then use access chain to store the variable. + statement(declare_temporary(result_type, id), to_expression(composite), ";"); + set(id, to_name(id), result_type, true); + auto chain = access_chain(id, elems, length, true); + statement(chain, " = ", to_expression(obj), ";"); + } + else + { + auto chain = access_chain(composite, elems, length, true); + statement(chain, " = ", to_expression(obj), ";"); + set(id, to_expression(composite), result_type, true); + + register_write(composite); + register_read(id, composite, true); + // Invalidate the old expression we inserted into. + invalid_expressions.insert(composite); + } + break; + } + + case OpCopyMemory: + { + uint32_t lhs = ops[0]; + uint32_t rhs = ops[1]; + if (lhs != rhs) + { + flush_variable_declaration(lhs); + flush_variable_declaration(rhs); + statement(to_expression(lhs), " = ", to_expression(rhs), ";"); + register_write(lhs); + } + break; + } + + case OpCopyObject: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t rhs = ops[2]; + bool pointer = get(result_type).pointer; + + if (expression_is_lvalue(rhs) && !pointer) + { + // Need a copy. + // For pointer types, we copy the pointer itself. + statement(declare_temporary(result_type, id), to_expression(rhs), ";"); + set(id, to_name(id), result_type, true); + } + else + { + // RHS expression is immutable, so just forward it. + // Copying these things really make no sense, but + // seems to be allowed anyways. + auto &e = set(id, to_expression(rhs), result_type, true); + if (pointer) + { + auto *var = maybe_get_backing_variable(rhs); + e.loaded_from = var ? var->self : 0; + } + } + break; + } + + case OpVectorShuffle: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t vec0 = ops[2]; + uint32_t vec1 = ops[3]; + const auto *elems = &ops[4]; + length -= 4; + + auto &type0 = expression_type(vec0); + + bool shuffle = false; + for (uint32_t i = 0; i < length; i++) + if (elems[i] >= type0.vecsize) + shuffle = true; + + string expr; + bool trivial_forward; + + if (shuffle) + { + trivial_forward = !expression_is_forwarded(vec0) && !expression_is_forwarded(vec1); + + // Constructor style and shuffling from two different vectors. + vector args; + for (uint32_t i = 0; i < length; i++) + { + if (elems[i] >= type0.vecsize) + args.push_back(join(to_enclosed_expression(vec1), ".", index_to_swizzle(elems[i] - type0.vecsize))); + else + args.push_back(join(to_enclosed_expression(vec0), ".", index_to_swizzle(elems[i]))); + } + expr += join(type_to_glsl_constructor(get(result_type)), "(", merge(args), ")"); + } + else + { + trivial_forward = !expression_is_forwarded(vec0); + + // We only source from first vector, so can use swizzle. + expr += to_enclosed_expression(vec0); + expr += "."; + for (uint32_t i = 0; i < length; i++) + expr += index_to_swizzle(elems[i]); + if (backend.swizzle_is_function && length > 1) + expr += "()"; + } + + // A shuffle is trivial in that it doesn't actually *do* anything. + // We inherit the forwardedness from our arguments to avoid flushing out to temporaries when it's not really needed. + + emit_op(result_type, id, expr, should_forward(vec0) && should_forward(vec1), trivial_forward); + break; + } + + // ALU + case OpIsNan: + UFOP(isnan); + break; + + case OpIsInf: + UFOP(isinf); + break; + + case OpSNegate: + case OpFNegate: + UOP(-); + break; + + case OpIAdd: + { + // For simple arith ops, prefer the output type if there's a mismatch to avoid extra bitcasts. + auto type = get(ops[0]).basetype; + BOP_CAST(+, type); + break; + } + + case OpFAdd: + BOP(+); + break; + + case OpISub: + { + auto type = get(ops[0]).basetype; + BOP_CAST(-, type); + break; + } + + case OpFSub: + BOP(-); + break; + + case OpIMul: + { + auto type = get(ops[0]).basetype; + BOP_CAST(*, type); + break; + } + + case OpFMul: + case OpMatrixTimesVector: + case OpMatrixTimesScalar: + case OpVectorTimesScalar: + case OpVectorTimesMatrix: + case OpMatrixTimesMatrix: + BOP(*); + break; + + case OpOuterProduct: + BFOP(outerProduct); + break; + + case OpDot: + BFOP(dot); + break; + + case OpTranspose: + UFOP(transpose); + break; + + case OpSDiv: + BOP_CAST(/, SPIRType::Int); + break; + + case OpUDiv: + BOP_CAST(/, SPIRType::UInt); + break; + + case OpFDiv: + BOP(/); + break; + + case OpShiftRightLogical: + BOP_CAST(>>, SPIRType::UInt); + break; + + case OpShiftRightArithmetic: + BOP_CAST(>>, SPIRType::Int); + break; + + case OpShiftLeftLogical: + { + auto type = get(ops[0]).basetype; + BOP_CAST(<<, type); + break; + } + + case OpBitwiseOr: + { + auto type = get(ops[0]).basetype; + BOP_CAST(|, type); + break; + } + + case OpBitwiseXor: + { + auto type = get(ops[0]).basetype; + BOP_CAST (^, type); + break; + } + + case OpBitwiseAnd: + { + auto type = get(ops[0]).basetype; + BOP_CAST(&, type); + break; + } + + case OpNot: + UOP(~); + break; + + case OpUMod: + BOP_CAST(%, SPIRType::UInt); + break; + + case OpSMod: + BOP_CAST(%, SPIRType::Int); + break; + + case OpFMod: + BFOP(mod); + break; + + // Relational + case OpAny: + UFOP(any); + break; + + case OpAll: + UFOP(all); + break; + + case OpSelect: + emit_mix_op(ops[0], ops[1], ops[4], ops[3], ops[2]); + break; + + case OpLogicalOr: + BOP(||); + break; + + case OpLogicalAnd: + BOP(&&); + break; + + case OpLogicalNot: + UOP(!); + break; + + case OpIEqual: + { + if (expression_type(ops[2]).vecsize > 1) + BFOP_CAST(equal, SPIRType::Int); + else + BOP_CAST(==, SPIRType::Int); + break; + } + + case OpLogicalEqual: + case OpFOrdEqual: + { + if (expression_type(ops[2]).vecsize > 1) + BFOP(equal); + else + BOP(==); + break; + } + + case OpINotEqual: + { + if (expression_type(ops[2]).vecsize > 1) + BFOP_CAST(notEqual, SPIRType::Int); + else + BOP_CAST(!=, SPIRType::Int); + break; + } + + case OpLogicalNotEqual: + case OpFOrdNotEqual: + { + if (expression_type(ops[2]).vecsize > 1) + BFOP(notEqual); + else + BOP(!=); + break; + } + + case OpUGreaterThan: + case OpSGreaterThan: + { + auto type = opcode == OpUGreaterThan ? SPIRType::UInt : SPIRType::Int; + if (expression_type(ops[2]).vecsize > 1) + BFOP_CAST(greaterThan, type); + else + BOP_CAST(>, type); + break; + } + + case OpFOrdGreaterThan: + { + if (expression_type(ops[2]).vecsize > 1) + BFOP(greaterThan); + else + BOP(>); + break; + } + + case OpUGreaterThanEqual: + case OpSGreaterThanEqual: + { + auto type = opcode == OpUGreaterThanEqual ? SPIRType::UInt : SPIRType::Int; + if (expression_type(ops[2]).vecsize > 1) + BFOP_CAST(greaterThanEqual, type); + else + BOP_CAST(>=, type); + break; + } + + case OpFOrdGreaterThanEqual: + { + if (expression_type(ops[2]).vecsize > 1) + BFOP(greaterThanEqual); + else + BOP(>=); + break; + } + + case OpULessThan: + case OpSLessThan: + { + auto type = opcode == OpULessThan ? SPIRType::UInt : SPIRType::Int; + if (expression_type(ops[2]).vecsize > 1) + BFOP_CAST(lessThan, type); + else + BOP_CAST(<, type); + break; + } + + case OpFOrdLessThan: + { + if (expression_type(ops[2]).vecsize > 1) + BFOP(lessThan); + else + BOP(<); + break; + } + + case OpULessThanEqual: + case OpSLessThanEqual: + { + auto type = opcode == OpULessThanEqual ? SPIRType::UInt : SPIRType::Int; + if (expression_type(ops[2]).vecsize > 1) + BFOP_CAST(lessThanEqual, type); + else + BOP_CAST(<=, type); + break; + } + + case OpFOrdLessThanEqual: + { + if (expression_type(ops[2]).vecsize > 1) + BFOP(lessThanEqual); + else + BOP(<=); + break; + } + + // Conversion + case OpConvertFToU: + case OpConvertFToS: + case OpConvertSToF: + case OpConvertUToF: + case OpUConvert: + case OpSConvert: + case OpFConvert: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + auto func = type_to_glsl_constructor(get(result_type)); + emit_unary_func_op(result_type, id, ops[2], func.c_str()); + break; + } + + case OpBitcast: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t arg = ops[2]; + + auto op = bitcast_glsl_op(get(result_type), expression_type(arg)); + emit_unary_func_op(result_type, id, arg, op.c_str()); + break; + } + + case OpQuantizeToF16: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t arg = ops[2]; + + string op; + auto &type = get(result_type); + + switch (type.vecsize) + { + case 1: + op = join("unpackHalf2x16(packHalf2x16(vec2(", to_expression(arg), "))).x"); + break; + case 2: + op = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), "))"); + break; + case 3: + { + auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))"); + auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zz)).x"); + op = join("vec3(", op0, ", ", op1, ")"); + break; + } + case 4: + { + auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))"); + auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zw))"); + op = join("vec4(", op0, ", ", op1, ")"); + break; + } + default: + SPIRV_CROSS_THROW("Illegal argument to OpQuantizeToF16."); + } + + emit_op(result_type, id, op, should_forward(arg)); + break; + } + + // Derivatives + case OpDPdx: + UFOP(dFdx); + if (is_legacy_es()) + require_extension("GL_OES_standard_derivatives"); + break; + + case OpDPdy: + UFOP(dFdy); + if (is_legacy_es()) + require_extension("GL_OES_standard_derivatives"); + break; + + case OpFwidth: + UFOP(fwidth); + if (is_legacy_es()) + require_extension("GL_OES_standard_derivatives"); + break; + + // Bitfield + case OpBitFieldInsert: + QFOP(bitfieldInsert); + break; + + case OpBitFieldSExtract: + case OpBitFieldUExtract: + QFOP(bitfieldExtract); + break; + + case OpBitReverse: + UFOP(bitfieldReverse); + break; + + case OpBitCount: + UFOP(bitCount); + break; + + // Atomics + case OpAtomicExchange: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t ptr = ops[2]; + // Ignore semantics for now, probably only relevant to CL. + uint32_t val = ops[5]; + const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange"; + forced_temporaries.insert(id); + emit_binary_func_op(result_type, id, ptr, val, op); + flush_all_atomic_capable_variables(); + break; + } + + case OpAtomicCompareExchange: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t ptr = ops[2]; + uint32_t val = ops[6]; + uint32_t comp = ops[7]; + const char *op = check_atomic_image(ptr) ? "imageAtomicCompSwap" : "atomicCompSwap"; + + forced_temporaries.insert(id); + emit_trinary_func_op(result_type, id, ptr, comp, val, op); + flush_all_atomic_capable_variables(); + break; + } + + case OpAtomicLoad: + flush_all_atomic_capable_variables(); + // FIXME: Image? + UFOP(atomicCounter); + register_read(ops[1], ops[2], should_forward(ops[2])); + break; + + // OpAtomicStore unimplemented. Not sure what would use that. + // OpAtomicLoad seems to only be relevant for atomic counters. + + case OpAtomicIIncrement: + forced_temporaries.insert(ops[1]); + // FIXME: Image? + UFOP(atomicCounterIncrement); + flush_all_atomic_capable_variables(); + register_read(ops[1], ops[2], should_forward(ops[2])); + break; + + case OpAtomicIDecrement: + forced_temporaries.insert(ops[1]); + // FIXME: Image? + UFOP(atomicCounterDecrement); + flush_all_atomic_capable_variables(); + register_read(ops[1], ops[2], should_forward(ops[2])); + break; + + case OpAtomicIAdd: + { + const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd"; + forced_temporaries.insert(ops[1]); + emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); + flush_all_atomic_capable_variables(); + register_read(ops[1], ops[2], should_forward(ops[2])); + break; + } + + case OpAtomicISub: + { + const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd"; + forced_temporaries.insert(ops[1]); + auto expr = join(op, "(", to_expression(ops[2]), ", -", to_enclosed_expression(ops[5]), ")"); + emit_op(ops[0], ops[1], expr, should_forward(ops[2]) && should_forward(ops[5])); + flush_all_atomic_capable_variables(); + register_read(ops[1], ops[2], should_forward(ops[2])); + break; + } + + case OpAtomicSMin: + case OpAtomicUMin: + { + const char *op = check_atomic_image(ops[2]) ? "imageAtomicMin" : "atomicMin"; + forced_temporaries.insert(ops[1]); + emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); + flush_all_atomic_capable_variables(); + register_read(ops[1], ops[2], should_forward(ops[2])); + break; + } + + case OpAtomicSMax: + case OpAtomicUMax: + { + const char *op = check_atomic_image(ops[2]) ? "imageAtomicMax" : "atomicMax"; + forced_temporaries.insert(ops[1]); + emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); + flush_all_atomic_capable_variables(); + register_read(ops[1], ops[2], should_forward(ops[2])); + break; + } + + case OpAtomicAnd: + { + const char *op = check_atomic_image(ops[2]) ? "imageAtomicAnd" : "atomicAnd"; + forced_temporaries.insert(ops[1]); + emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); + flush_all_atomic_capable_variables(); + register_read(ops[1], ops[2], should_forward(ops[2])); + break; + } + + case OpAtomicOr: + { + const char *op = check_atomic_image(ops[2]) ? "imageAtomicOr" : "atomicOr"; + forced_temporaries.insert(ops[1]); + emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); + flush_all_atomic_capable_variables(); + register_read(ops[1], ops[2], should_forward(ops[2])); + break; + } + + case OpAtomicXor: + { + const char *op = check_atomic_image(ops[2]) ? "imageAtomicXor" : "atomicXor"; + forced_temporaries.insert(ops[1]); + emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); + flush_all_atomic_capable_variables(); + register_read(ops[1], ops[2], should_forward(ops[2])); + break; + } + + // Geometry shaders + case OpEmitVertex: + statement("EmitVertex();"); + break; + + case OpEndPrimitive: + statement("EndPrimitive();"); + break; + + case OpEmitStreamVertex: + statement("EmitStreamVertex();"); + break; + + case OpEndStreamPrimitive: + statement("EndStreamPrimitive();"); + break; + + // Textures + case OpImageSampleExplicitLod: + case OpImageSampleProjExplicitLod: + case OpImageSampleDrefExplicitLod: + case OpImageSampleProjDrefExplicitLod: + case OpImageSampleImplicitLod: + case OpImageSampleProjImplicitLod: + case OpImageSampleDrefImplicitLod: + case OpImageSampleProjDrefImplicitLod: + case OpImageFetch: + case OpImageGather: + case OpImageDrefGather: + // Gets a bit hairy, so move this to a separate instruction. + emit_texture_op(instruction); + break; + + case OpImage: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + auto &e = emit_op(result_type, id, to_expression(ops[2]), true); + + // When using the image, we need to know which variable it is actually loaded from. + auto *var = maybe_get_backing_variable(ops[2]); + e.loaded_from = var ? var->self : 0; + break; + } + + case OpImageQueryLod: + { + if (!options.es && options.version < 400) + { + require_extension("GL_ARB_texture_query_lod"); + // For some reason, the ARB spec is all-caps. + BFOP(textureQueryLOD); + } + else if (options.es) + SPIRV_CROSS_THROW("textureQueryLod not supported in ES profile."); + else + BFOP(textureQueryLod); + break; + } + + case OpImageQueryLevels: + { + if (!options.es && options.version < 430) + require_extension("GL_ARB_texture_query_levels"); + if (options.es) + SPIRV_CROSS_THROW("textureQueryLevels not supported in ES profile."); + UFOP(textureQueryLevels); + break; + } + + case OpImageQuerySamples: + { + auto *var = maybe_get_backing_variable(ops[2]); + if (!var) + SPIRV_CROSS_THROW( + "Bug. OpImageQuerySamples must have a backing variable so we know if the image is sampled or not."); + + auto &type = get(var->basetype); + bool image = type.image.sampled == 2; + if (image) + UFOP(imageSamples); + else + UFOP(textureSamples); + break; + } + + case OpSampledImage: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + emit_sampled_image_op(result_type, id, ops[2], ops[3]); + break; + } + + case OpImageQuerySizeLod: + BFOP(textureSize); + break; + + // Image load/store + case OpImageRead: + { + // We added Nonreadable speculatively to the OpImage variable due to glslangValidator + // not adding the proper qualifiers. + // If it turns out we need to read the image after all, remove the qualifier and recompile. + auto *var = maybe_get_backing_variable(ops[2]); + if (var) + { + auto &flags = meta.at(var->self).decoration.decoration_flags; + if (flags & (1ull << DecorationNonReadable)) + { + flags &= ~(1ull << DecorationNonReadable); + force_recompile = true; + } + } + + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + bool pure; + string imgexpr; + auto &type = expression_type(ops[2]); + + if (var && var->remapped_variable) // Remapped input, just read as-is without any op-code + { + if (type.image.ms) + SPIRV_CROSS_THROW("Trying to remap multisampled image to variable, this is not possible."); + + auto itr = + find_if(begin(pls_inputs), end(pls_inputs), [var](const PlsRemap &pls) { return pls.id == var->self; }); + + if (itr == end(pls_inputs)) + { + // For non-PLS inputs, we rely on subpass type remapping information to get it right + // since ImageRead always returns 4-component vectors and the backing type is opaque. + if (!var->remapped_components) + SPIRV_CROSS_THROW("subpassInput was remapped, but remap_components is not set correctly."); + imgexpr = remap_swizzle(result_type, var->remapped_components, ops[2]); + } + else + { + // PLS input could have different number of components than what the SPIR expects, swizzle to + // the appropriate vector size. + uint32_t components = pls_format_to_components(itr->format); + imgexpr = remap_swizzle(result_type, components, ops[2]); + } + pure = true; + } + else if (type.image.dim == DimSubpassData) + { + if (options.vulkan_semantics) + { + // With Vulkan semantics, use the proper Vulkan GLSL construct. + if (type.image.ms) + { + uint32_t operands = ops[4]; + if (operands != ImageOperandsSampleMask || length != 6) + SPIRV_CROSS_THROW( + "Multisampled image used in OpImageRead, but unexpected operand mask was used."); + + uint32_t samples = ops[5]; + imgexpr = join("subpassLoad(", to_expression(ops[2]), ", ", to_expression(samples), ")"); + } + else + imgexpr = join("subpassLoad(", to_expression(ops[2]), ")"); + } + else + { + if (type.image.ms) + { + uint32_t operands = ops[4]; + if (operands != ImageOperandsSampleMask || length != 6) + SPIRV_CROSS_THROW( + "Multisampled image used in OpImageRead, but unexpected operand mask was used."); + + uint32_t samples = ops[5]; + imgexpr = join("texelFetch(", to_expression(ops[2]), ", ivec2(gl_FragCoord.xy), ", + to_expression(samples), ")"); + } + else + { + // Implement subpass loads via texture barrier style sampling. + imgexpr = join("texelFetch(", to_expression(ops[2]), ", ivec2(gl_FragCoord.xy), 0)"); + } + } + pure = true; + } + else + { + // Plain image load/store. + if (type.image.ms) + { + uint32_t operands = ops[4]; + if (operands != ImageOperandsSampleMask || length != 6) + SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected operand mask was used."); + + uint32_t samples = ops[5]; + imgexpr = join("imageLoad(", to_expression(ops[2]), ", ", to_expression(ops[3]), ", ", + to_expression(samples), ")"); + } + else + imgexpr = join("imageLoad(", to_expression(ops[2]), ", ", to_expression(ops[3]), ")"); + pure = false; + } + + if (var && var->forwardable) + { + auto &e = emit_op(result_type, id, imgexpr, true); + + // We only need to track dependencies if we're reading from image load/store. + if (!pure) + { + e.loaded_from = var->self; + var->dependees.push_back(id); + } + } + else + emit_op(result_type, id, imgexpr, false); + break; + } + + case OpImageTexelPointer: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + auto &e = set(id, join(to_expression(ops[2]), ", ", to_expression(ops[3])), result_type, true); + + // When using the pointer, we need to know which variable it is actually loaded from. + auto *var = maybe_get_backing_variable(ops[2]); + e.loaded_from = var ? var->self : 0; + break; + } + + case OpImageWrite: + { + // We added Nonwritable speculatively to the OpImage variable due to glslangValidator + // not adding the proper qualifiers. + // If it turns out we need to write to the image after all, remove the qualifier and recompile. + auto *var = maybe_get_backing_variable(ops[0]); + if (var) + { + auto &flags = meta.at(var->self).decoration.decoration_flags; + if (flags & (1ull << DecorationNonWritable)) + { + flags &= ~(1ull << DecorationNonWritable); + force_recompile = true; + } + } + + auto &type = expression_type(ops[0]); + if (type.image.ms) + { + uint32_t operands = ops[3]; + if (operands != ImageOperandsSampleMask || length != 5) + SPIRV_CROSS_THROW("Multisampled image used in OpImageWrite, but unexpected operand mask was used."); + uint32_t samples = ops[4]; + statement("imageStore(", to_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(samples), + ", ", to_expression(ops[2]), ");"); + } + else + statement("imageStore(", to_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]), + ");"); + + if (var && variable_storage_is_aliased(*var)) + flush_all_aliased_variables(); + break; + } + + case OpImageQuerySize: + { + auto &type = expression_type(ops[2]); + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + if (type.basetype == SPIRType::Image) + { + // The size of an image is always constant. + emit_op(result_type, id, join("imageSize(", to_expression(ops[2]), ")"), true); + } + else + SPIRV_CROSS_THROW("Invalid type for OpImageQuerySize."); + break; + } + + // Compute + case OpControlBarrier: + { + // Ignore execution and memory scope. + if (get_entry_point().model == ExecutionModelGLCompute) + { + uint32_t mem = get(ops[2]).scalar(); + if (mem == MemorySemanticsWorkgroupMemoryMask) + statement("memoryBarrierShared();"); + else if (mem) + statement("memoryBarrier();"); + } + statement("barrier();"); + break; + } + + case OpMemoryBarrier: + { + uint32_t mem = get(ops[1]).scalar(); + + // We cannot forward any loads beyond the memory barrier. + if (mem) + flush_all_active_variables(); + + if (mem == MemorySemanticsWorkgroupMemoryMask) + statement("memoryBarrierShared();"); + else if (mem) + statement("memoryBarrier();"); + break; + } + + case OpExtInst: + { + uint32_t extension_set = ops[2]; + if (get(extension_set).ext != SPIRExtension::GLSL) + { + statement("// unimplemented ext op ", instruction.op); + break; + } + + emit_glsl_op(ops[0], ops[1], ops[3], &ops[4], length - 4); + break; + } + + default: + statement("// unimplemented op ", instruction.op); + break; + } +} + +// Appends function arguments, mapped from global variables, beyond the specified arg index. +// This is used when a function call uses fewer arguments than the function defines. +// This situation may occur if the function signature has been dynamically modified to +// extract global variables referenced from within the function, and convert them to +// function arguments. This is necessary for shader languages that do not support global +// access to shader input content from within a function (eg. Metal). Each additional +// function args uses the name of the global variable. Function nesting will modify the +// functions and calls all the way up the nesting chain. +void CompilerGLSL::append_global_func_args(const SPIRFunction &func, uint32_t index, vector &arglist) +{ + auto &args = func.arguments; + uint32_t arg_cnt = uint32_t(args.size()); + for (uint32_t arg_idx = index; arg_idx < arg_cnt; arg_idx++) + arglist.push_back(to_func_call_arg(args[arg_idx].id)); +} + +string CompilerGLSL::to_member_name(const SPIRType &type, uint32_t index) +{ + auto &memb = meta[type.self].members; + if (index < memb.size() && !memb[index].alias.empty()) + return memb[index].alias; + else + return join("_", index); +} + +void CompilerGLSL::add_member_name(SPIRType &type, uint32_t index) +{ + auto &memb = meta[type.self].members; + if (index < memb.size() && !memb[index].alias.empty()) + { + auto &name = memb[index].alias; + if (name.empty()) + return; + + // Reserved for temporaries. + if (name[0] == '_' && name.size() >= 2 && isdigit(name[1])) + { + name.clear(); + return; + } + + update_name_cache(type.member_name_cache, name); + } +} + +// Checks whether the member is a row_major matrix that requires conversion before use +bool CompilerGLSL::is_non_native_row_major_matrix(uint32_t id) +{ + // Natively supported row-major matrices do not need to be converted. + if (backend.native_row_major_matrix) + return false; + + // Non-matrix or column-major matrix types do not need to be converted. + if (!(meta[id].decoration.decoration_flags & (1ull << DecorationRowMajor))) + return false; + + // Only square row-major matrices can be converted at this time. + // Converting non-square matrices will require defining custom GLSL function that + // swaps matrix elements while retaining the original dimensional form of the matrix. + const auto type = expression_type(id); + if (type.columns != type.vecsize) + SPIRV_CROSS_THROW("Row-major matrices must be square on this platform."); + + return true; +} + +// Checks whether the member is a row_major matrix that requires conversion before use +bool CompilerGLSL::member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index) +{ + // Natively supported row-major matrices do not need to be converted. + if (backend.native_row_major_matrix) + return false; + + // Non-matrix or column-major matrix types do not need to be converted. + if (!(combined_decoration_for_member(type, index) & (1ull << DecorationRowMajor))) + return false; + + // Only square row-major matrices can be converted at this time. + // Converting non-square matrices will require defining custom GLSL function that + // swaps matrix elements while retaining the original dimensional form of the matrix. + const auto mbr_type = get(type.member_types[index]); + if (mbr_type.columns != mbr_type.vecsize) + SPIRV_CROSS_THROW("Row-major matrices must be square on this platform."); + + return true; +} + +// Wraps the expression string in a function call that converts the +// row_major matrix result of the expression to a column_major matrix. +// Base implementation uses the standard library transpose() function. +// Subclasses may override to use a different function. +string CompilerGLSL::convert_row_major_matrix(string exp_str) +{ + strip_enclosed_expression(exp_str); + return join("transpose(", exp_str, ")"); +} + +string CompilerGLSL::variable_decl(const SPIRType &type, const string &name) +{ + string type_name = type_to_glsl(type); + remap_variable_type_name(type, name, type_name); + return join(type_name, " ", name, type_to_array_glsl(type)); +} + +string CompilerGLSL::member_decl(const SPIRType &type, const SPIRType &membertype, uint32_t index) +{ + uint64_t memberflags = 0; + auto &memb = meta[type.self].members; + if (index < memb.size()) + memberflags = memb[index].decoration_flags; + + string qualifiers; + bool is_block = (meta[type.self].decoration.decoration_flags & + ((1ull << DecorationBlock) | (1ull << DecorationBufferBlock))) != 0; + if (is_block) + qualifiers = to_interpolation_qualifiers(memberflags); + + return join(layout_for_member(type, index), flags_to_precision_qualifiers_glsl(membertype, memberflags), qualifiers, + variable_decl(membertype, to_member_name(type, index))); +} + +const char *CompilerGLSL::flags_to_precision_qualifiers_glsl(const SPIRType &type, uint64_t flags) +{ + if (options.es) + { + auto &execution = get_entry_point(); + + // Structs do not have precision qualifiers, neither do doubles (desktop only anyways, so no mediump/highp). + if (type.basetype != SPIRType::Float && type.basetype != SPIRType::Int && type.basetype != SPIRType::UInt && + type.basetype != SPIRType::Image && type.basetype != SPIRType::SampledImage && + type.basetype != SPIRType::Sampler) + return ""; + + if (flags & (1ull << DecorationRelaxedPrecision)) + { + bool implied_fmediump = type.basetype == SPIRType::Float && + options.fragment.default_float_precision == Options::Mediump && + execution.model == ExecutionModelFragment; + + bool implied_imediump = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) && + options.fragment.default_int_precision == Options::Mediump && + execution.model == ExecutionModelFragment; + + return implied_fmediump || implied_imediump ? "" : "mediump "; + } + else + { + bool implied_fhighp = + type.basetype == SPIRType::Float && ((options.fragment.default_float_precision == Options::Highp && + execution.model == ExecutionModelFragment) || + (execution.model != ExecutionModelFragment)); + + bool implied_ihighp = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) && + ((options.fragment.default_int_precision == Options::Highp && + execution.model == ExecutionModelFragment) || + (execution.model != ExecutionModelFragment)); + + return implied_fhighp || implied_ihighp ? "" : "highp "; + } + } + else + return ""; +} + +const char *CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id) +{ + return flags_to_precision_qualifiers_glsl(expression_type(id), meta[id].decoration.decoration_flags); +} + +string CompilerGLSL::to_qualifiers_glsl(uint32_t id) +{ + auto flags = meta[id].decoration.decoration_flags; + string res; + + auto *var = maybe_get(id); + + if (var && var->storage == StorageClassWorkgroup && !backend.shared_is_implied) + res += "shared "; + + res += to_precision_qualifiers_glsl(id); + res += to_interpolation_qualifiers(flags); + auto &type = expression_type(id); + if (type.image.dim != DimSubpassData && type.image.sampled == 2) + { + if (flags & (1ull << DecorationNonWritable)) + res += "readonly "; + if (flags & (1ull << DecorationNonReadable)) + res += "writeonly "; + } + + return res; +} + +string CompilerGLSL::argument_decl(const SPIRFunction::Parameter &arg) +{ + // glslangValidator seems to make all arguments pointer no matter what which is rather bizarre ... + // Not sure if argument being pointer type should make the argument inout. + auto &type = expression_type(arg.id); + const char *direction = ""; + + if (type.pointer) + { + if (arg.write_count && arg.read_count) + direction = "inout "; + else if (arg.write_count) + direction = "out "; + } + + return join(direction, to_qualifiers_glsl(arg.id), variable_decl(type, to_name(arg.id))); +} + +string CompilerGLSL::variable_decl(const SPIRVariable &variable) +{ + // Ignore the pointer type since GLSL doesn't have pointers. + auto &type = get(variable.basetype); + auto res = join(to_qualifiers_glsl(variable.self), variable_decl(type, to_name(variable.self))); + if (variable.loop_variable) + res += join(" = ", to_expression(variable.static_expression)); + else if (variable.initializer) + res += join(" = ", to_expression(variable.initializer)); + return res; +} + +const char *CompilerGLSL::to_pls_qualifiers_glsl(const SPIRVariable &variable) +{ + auto flags = meta[variable.self].decoration.decoration_flags; + if (flags & (1ull << DecorationRelaxedPrecision)) + return "mediump "; + else + return "highp "; +} + +string CompilerGLSL::pls_decl(const PlsRemap &var) +{ + auto &variable = get(var.id); + + SPIRType type; + type.vecsize = pls_format_to_components(var.format); + type.basetype = pls_format_to_basetype(var.format); + + return join(to_pls_layout(var.format), to_pls_qualifiers_glsl(variable), type_to_glsl(type), " ", + to_name(variable.self)); +} + +uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type, uint32_t index) const +{ + assert(type.array.size() == type.array_size_literal.size()); + + if (!type.array_size_literal[index]) + SPIRV_CROSS_THROW("The array size is not a literal, but a specialization constant or spec constant op."); + + return type.array[index]; +} + +string CompilerGLSL::to_array_size(const SPIRType &type, uint32_t index) +{ + assert(type.array.size() == type.array_size_literal.size()); + + auto &size = type.array[index]; + if (!type.array_size_literal[index]) + return to_expression(size); + else if (size) + return convert_to_string(size); + else if (!backend.flexible_member_array_supported) + { + // For runtime-sized arrays, we can work around + // lack of standard support for this by simply having + // a single element array. + // + // Runtime length arrays must always be the last element + // in an interface block. + return "1"; + } + else + return ""; +} + +string CompilerGLSL::type_to_array_glsl(const SPIRType &type) +{ + if (type.array.empty()) + return ""; + + string res; + for (auto i = uint32_t(type.array.size()); i; i--) + { + res += "["; + res += to_array_size(type, i - 1); + res += "]"; + } + return res; +} + +string CompilerGLSL::image_type_glsl(const SPIRType &type) +{ + auto &imagetype = get(type.image.type); + string res; + + switch (imagetype.basetype) + { + case SPIRType::Int: + res = "i"; + break; + case SPIRType::UInt: + res = "u"; + break; + default: + break; + } + + if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && options.vulkan_semantics) + return res + "subpassInput" + (type.image.ms ? "MS" : ""); + + // If we're emulating subpassInput with samplers, force sampler2D + // so we don't have to specify format. + if (type.basetype == SPIRType::Image && type.image.dim != DimSubpassData) + res += type.image.sampled == 2 ? "image" : "texture"; + else + res += "sampler"; + + switch (type.image.dim) + { + case Dim1D: + res += "1D"; + break; + case Dim2D: + res += "2D"; + break; + case Dim3D: + res += "3D"; + break; + case DimCube: + res += "Cube"; + break; + + case DimBuffer: + if (options.es && options.version < 320) + require_extension("GL_OES_texture_buffer"); + else if (!options.es && options.version < 300) + require_extension("GL_EXT_texture_buffer_object"); + res += "Buffer"; + break; + + case DimSubpassData: + res += "2D"; + break; + default: + SPIRV_CROSS_THROW("Only 1D, 2D, 3D, Buffer, InputTarget and Cube textures supported."); + } + + if (type.image.ms) + res += "MS"; + if (type.image.arrayed) + { + if (is_legacy_desktop()) + require_extension("GL_EXT_texture_array"); + res += "Array"; + } + if (type.image.depth) + res += "Shadow"; + + return res; +} + +string CompilerGLSL::type_to_glsl_constructor(const SPIRType &type) +{ + auto e = type_to_glsl(type); + for (uint32_t i = 0; i < type.array.size(); i++) + e += "[]"; + return e; +} + +string CompilerGLSL::type_to_glsl(const SPIRType &type) +{ + // Ignore the pointer type since GLSL doesn't have pointers. + + switch (type.basetype) + { + case SPIRType::Struct: + // Need OpName lookup here to get a "sensible" name for a struct. + if (backend.explicit_struct_type) + return join("struct ", to_name(type.self)); + else + return to_name(type.self); + + case SPIRType::Image: + case SPIRType::SampledImage: + return image_type_glsl(type); + + case SPIRType::Sampler: + // Not really used. + return "sampler"; + + case SPIRType::Void: + return "void"; + + default: + break; + } + + if (type.vecsize == 1 && type.columns == 1) // Scalar builtin + { + switch (type.basetype) + { + case SPIRType::Boolean: + return "bool"; + case SPIRType::Int: + return backend.basic_int_type; + case SPIRType::UInt: + return backend.basic_uint_type; + case SPIRType::AtomicCounter: + return "atomic_uint"; + case SPIRType::Float: + return "float"; + case SPIRType::Double: + return "double"; + case SPIRType::Int64: + return "int64_t"; + case SPIRType::UInt64: + return "uint64_t"; + default: + return "???"; + } + } + else if (type.vecsize > 1 && type.columns == 1) // Vector builtin + { + switch (type.basetype) + { + case SPIRType::Boolean: + return join("bvec", type.vecsize); + case SPIRType::Int: + return join("ivec", type.vecsize); + case SPIRType::UInt: + return join("uvec", type.vecsize); + case SPIRType::Float: + return join("vec", type.vecsize); + case SPIRType::Double: + return join("dvec", type.vecsize); + case SPIRType::Int64: + return join("i64vec", type.vecsize); + case SPIRType::UInt64: + return join("u64vec", type.vecsize); + default: + return "???"; + } + } + else if (type.vecsize == type.columns) // Simple Matrix builtin + { + switch (type.basetype) + { + case SPIRType::Boolean: + return join("bmat", type.vecsize); + case SPIRType::Int: + return join("imat", type.vecsize); + case SPIRType::UInt: + return join("umat", type.vecsize); + case SPIRType::Float: + return join("mat", type.vecsize); + case SPIRType::Double: + return join("dmat", type.vecsize); + // Matrix types not supported for int64/uint64. + default: + return "???"; + } + } + else + { + switch (type.basetype) + { + case SPIRType::Boolean: + return join("bmat", type.columns, "x", type.vecsize); + case SPIRType::Int: + return join("imat", type.columns, "x", type.vecsize); + case SPIRType::UInt: + return join("umat", type.columns, "x", type.vecsize); + case SPIRType::Float: + return join("mat", type.columns, "x", type.vecsize); + case SPIRType::Double: + return join("dmat", type.columns, "x", type.vecsize); + // Matrix types not supported for int64/uint64. + default: + return "???"; + } + } +} + +void CompilerGLSL::add_variable(unordered_set &variables, uint32_t id) +{ + auto &name = meta[id].decoration.alias; + if (name.empty()) + return; + + // Reserved for temporaries. + if (name[0] == '_' && name.size() >= 2 && isdigit(name[1])) + { + name.clear(); + return; + } + + update_name_cache(variables, name); +} + +void CompilerGLSL::add_local_variable_name(uint32_t id) +{ + add_variable(local_variable_names, id); +} + +void CompilerGLSL::add_resource_name(uint32_t id) +{ + add_variable(resource_names, id); +} + +void CompilerGLSL::add_header_line(const std::string &line) +{ + header_lines.push_back(line); +} + +void CompilerGLSL::require_extension(const string &ext) +{ + if (forced_extensions.find(ext) == end(forced_extensions)) + { + forced_extensions.insert(ext); + force_recompile = true; + } +} + +bool CompilerGLSL::check_atomic_image(uint32_t id) +{ + auto &type = expression_type(id); + if (type.storage == StorageClassImage) + { + if (options.es && options.version < 320) + require_extension("GL_OES_shader_image_atomic"); + + auto *var = maybe_get_backing_variable(id); + if (var) + { + auto &flags = meta.at(var->self).decoration.decoration_flags; + if (flags & ((1ull << DecorationNonWritable) | (1ull << DecorationNonReadable))) + { + flags &= ~(1ull << DecorationNonWritable); + flags &= ~(1ull << DecorationNonReadable); + force_recompile = true; + } + } + return true; + } + else + return false; +} + +void CompilerGLSL::emit_function_prototype(SPIRFunction &func, uint64_t return_flags) +{ + // Avoid shadow declarations. + local_variable_names = resource_names; + + string decl; + + auto &type = get(func.return_type); + decl += flags_to_precision_qualifiers_glsl(type, return_flags); + decl += type_to_glsl(type); + decl += " "; + + if (func.self == entry_point) + { + decl += "main"; + processing_entry_point = true; + } + else + decl += to_name(func.self); + + decl += "("; + vector arglist; + for (auto &arg : func.arguments) + { + // Do not pass in separate images or samplers if we're remapping + // to combined image samplers. + if (skip_argument(arg.id)) + continue; + + // Might change the variable name if it already exists in this function. + // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation + // to use same name for variables. + // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates. + add_local_variable_name(arg.id); + + arglist.push_back(argument_decl(arg)); + + // Hold a pointer to the parameter so we can invalidate the readonly field if needed. + auto *var = maybe_get(arg.id); + if (var) + var->parameter = &arg; + } + + for (auto &arg : func.shadow_arguments) + { + // Might change the variable name if it already exists in this function. + // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation + // to use same name for variables. + // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates. + add_local_variable_name(arg.id); + + arglist.push_back(argument_decl(arg)); + + // Hold a pointer to the parameter so we can invalidate the readonly field if needed. + auto *var = maybe_get(arg.id); + if (var) + var->parameter = &arg; + } + + decl += merge(arglist); + decl += ")"; + statement(decl); +} + +void CompilerGLSL::emit_function(SPIRFunction &func, uint64_t return_flags) +{ + // Avoid potential cycles. + if (func.active) + return; + func.active = true; + + // If we depend on a function, emit that function before we emit our own function. + for (auto block : func.blocks) + { + auto &b = get(block); + for (auto &i : b.ops) + { + auto ops = stream(i); + auto op = static_cast(i.op); + + if (op == OpFunctionCall) + { + // Recursively emit functions which are called. + uint32_t id = ops[2]; + emit_function(get(id), meta[ops[1]].decoration.decoration_flags); + } + } + } + + emit_function_prototype(func, return_flags); + begin_scope(); + + current_function = &func; + auto &entry_block = get(func.entry_block); + + if (!func.analyzed_variable_scope) + { + if (options.cfg_analysis) + { + analyze_variable_scope(func); + + // Check if we can actually use the loop variables we found in analyze_variable_scope. + // To use multiple initializers, we need the same type and qualifiers. + for (auto block : func.blocks) + { + auto &b = get(block); + if (b.loop_variables.size() < 2) + continue; + + uint64_t flags = get_decoration_mask(b.loop_variables.front()); + uint32_t type = get(b.loop_variables.front()).basetype; + bool invalid_initializers = false; + for (auto loop_variable : b.loop_variables) + { + if (flags != get_decoration_mask(loop_variable) || + type != get(b.loop_variables.front()).basetype) + { + invalid_initializers = true; + break; + } + } + + if (invalid_initializers) + { + for (auto loop_variable : b.loop_variables) + get(loop_variable).loop_variable = false; + b.loop_variables.clear(); + } + } + } + else + entry_block.dominated_variables = func.local_variables; + func.analyzed_variable_scope = true; + } + + for (auto &v : func.local_variables) + { + auto &var = get(v); + if (expression_is_lvalue(v)) + { + add_local_variable_name(var.self); + + if (var.initializer) + statement(variable_decl(var), ";"); + else + { + // Don't declare variable until first use to declutter the GLSL output quite a lot. + // If we don't touch the variable before first branch, + // declare it then since we need variable declaration to be in top scope. + var.deferred_declaration = true; + } + } + else + { + // HACK: SPIRV likes to use samplers and images as local variables, but GLSL does not allow this. + // For these types (non-lvalue), we enforce forwarding through a shadowed variable. + // This means that when we OpStore to these variables, we just write in the expression ID directly. + // This breaks any kind of branching, since the variable must be statically assigned. + // Branching on samplers and images would be pretty much impossible to fake in GLSL. + var.statically_assigned = true; + } + + var.loop_variable_enable = false; + + // Loop variables are never declared outside their for-loop, so block any implicit declaration. + if (var.loop_variable) + var.deferred_declaration = false; + } + + entry_block.loop_dominator = SPIRBlock::NoDominator; + emit_block_chain(entry_block); + + end_scope(); + processing_entry_point = false; + statement(""); +} + +void CompilerGLSL::emit_fixup() +{ + auto &execution = get_entry_point(); + if (execution.model == ExecutionModelVertex && options.vertex.fixup_clipspace) + { + const char *suffix = backend.float_literal_suffix ? "f" : ""; + statement("gl_Position.z = 2.0", suffix, " * gl_Position.z - gl_Position.w;"); + } +} + +bool CompilerGLSL::flush_phi_required(uint32_t from, uint32_t to) +{ + auto &child = get(to); + for (auto &phi : child.phi_variables) + if (phi.parent == from) + return true; + return false; +} + +void CompilerGLSL::flush_phi(uint32_t from, uint32_t to) +{ + auto &child = get(to); + + for (auto &phi : child.phi_variables) + if (phi.parent == from) + statement(to_expression(phi.function_variable), " = ", to_expression(phi.local_variable), ";"); +} + +void CompilerGLSL::branch(uint32_t from, uint32_t to) +{ + flush_phi(from, to); + flush_all_active_variables(); + + // This is only a continue if we branch to our loop dominator. + if (loop_blocks.find(to) != end(loop_blocks) && get(from).loop_dominator == to) + { + // This can happen if we had a complex continue block which was emitted. + // Once the continue block tries to branch to the loop header, just emit continue; + // and end the chain here. + statement("continue;"); + } + else if (is_continue(to)) + { + auto &to_block = get(to); + if (to_block.complex_continue) + { + // Just emit the whole block chain as is. + auto usage_counts = expression_usage_counts; + auto invalid = invalid_expressions; + + emit_block_chain(to_block); + + // Expression usage counts and invalid expressions + // are moot after returning from the continue block. + // Since we emit the same block multiple times, + // we don't want to invalidate ourselves. + expression_usage_counts = usage_counts; + invalid_expressions = invalid; + } + else + { + auto &from_block = get(from); + auto &dominator = get(from_block.loop_dominator); + + // For non-complex continue blocks, we implicitly branch to the continue block + // by having the continue block be part of the loop header in for (; ; continue-block). + bool outside_control_flow = block_is_outside_flow_control_from_block(dominator, from_block); + + // Some simplification for for-loops. We always end up with a useless continue; + // statement since we branch to a loop block. + // Walk the CFG, if we uncoditionally execute the block calling continue assuming we're in the loop block, + // we can avoid writing out an explicit continue statement. + // Similar optimization to return statements if we know we're outside flow control. + if (!outside_control_flow) + statement("continue;"); + } + } + else if (is_break(to)) + statement("break;"); + else if (!is_conditional(to)) + emit_block_chain(get(to)); +} + +void CompilerGLSL::branch(uint32_t from, uint32_t cond, uint32_t true_block, uint32_t false_block) +{ + // If we branch directly to a selection merge target, we don't really need a code path. + bool true_sub = !is_conditional(true_block); + bool false_sub = !is_conditional(false_block); + + if (true_sub) + { + statement("if (", to_expression(cond), ")"); + begin_scope(); + branch(from, true_block); + end_scope(); + + if (false_sub) + { + statement("else"); + begin_scope(); + branch(from, false_block); + end_scope(); + } + else if (flush_phi_required(from, false_block)) + { + statement("else"); + begin_scope(); + flush_phi(from, false_block); + end_scope(); + } + } + else if (false_sub && !true_sub) + { + // Only need false path, use negative conditional. + statement("if (!", to_expression(cond), ")"); + begin_scope(); + branch(from, false_block); + end_scope(); + + if (flush_phi_required(from, true_block)) + { + statement("else"); + begin_scope(); + flush_phi(from, true_block); + end_scope(); + } + } +} + +void CompilerGLSL::propagate_loop_dominators(const SPIRBlock &block) +{ + // Propagate down the loop dominator block, so that dominated blocks can back trace. + if (block.merge == SPIRBlock::MergeLoop || block.loop_dominator) + { + uint32_t dominator = block.merge == SPIRBlock::MergeLoop ? block.self : block.loop_dominator; + + auto set_dominator = [this](uint32_t self, uint32_t new_dominator) { + auto &dominated_block = this->get(self); + + // If we already have a loop dominator, we're trying to break out to merge targets + // which should not update the loop dominator. + if (!dominated_block.loop_dominator) + dominated_block.loop_dominator = new_dominator; + }; + + // After merging a loop, we inherit the loop dominator always. + if (block.merge_block) + set_dominator(block.merge_block, block.loop_dominator); + + if (block.true_block) + set_dominator(block.true_block, dominator); + if (block.false_block) + set_dominator(block.false_block, dominator); + if (block.next_block) + set_dominator(block.next_block, dominator); + + for (auto &c : block.cases) + set_dominator(c.block, dominator); + + // In older glslang output continue_block can be == loop header. + if (block.continue_block && block.continue_block != block.self) + set_dominator(block.continue_block, dominator); + } +} + +// FIXME: This currently cannot handle complex continue blocks +// as in do-while. +// This should be seen as a "trivial" continue block. +string CompilerGLSL::emit_continue_block(uint32_t continue_block) +{ + auto *block = &get(continue_block); + + // While emitting the continue block, declare_temporary will check this + // if we have to emit temporaries. + current_continue_block = block; + + vector statements; + + // Capture all statements into our list. + auto *old = redirect_statement; + redirect_statement = &statements; + + // Stamp out all blocks one after each other. + while (loop_blocks.find(block->self) == end(loop_blocks)) + { + propagate_loop_dominators(*block); + // Write out all instructions we have in this block. + for (auto &op : block->ops) + emit_instruction(op); + + // For plain branchless for/while continue blocks. + if (block->next_block) + { + flush_phi(continue_block, block->next_block); + block = &get(block->next_block); + } + // For do while blocks. The last block will be a select block. + else if (block->true_block) + { + flush_phi(continue_block, block->true_block); + block = &get(block->true_block); + } + } + + // Restore old pointer. + redirect_statement = old; + + // Somewhat ugly, strip off the last ';' since we use ',' instead. + // Ideally, we should select this behavior in statement(). + for (auto &s : statements) + { + if (!s.empty() && s.back() == ';') + s.erase(s.size() - 1, 1); + } + + current_continue_block = nullptr; + return merge(statements); +} + +string CompilerGLSL::emit_for_loop_initializers(const SPIRBlock &block) +{ + if (block.loop_variables.empty()) + return ""; + + if (block.loop_variables.size() == 1) + { + return variable_decl(get(block.loop_variables.front())); + } + else + { + auto &var = get(block.loop_variables.front()); + auto &type = get(var.basetype); + + // Don't remap the type here as we have multiple names, + // doesn't make sense to remap types for loop variables anyways. + // It is assumed here that all relevant qualifiers are equal for all loop variables. + string expr = join(to_qualifiers_glsl(var.self), type_to_glsl(type), " "); + + for (auto &loop_var : block.loop_variables) + { + auto &v = get(loop_var); + expr += join(to_name(loop_var), " = ", to_expression(v.static_expression)); + if (&loop_var != &block.loop_variables.back()) + expr += ", "; + } + return expr; + } +} + +bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method method) +{ + SPIRBlock::ContinueBlockType continue_type = continue_block_type(get(block.continue_block)); + + if (method == SPIRBlock::MergeToSelectForLoop) + { + uint32_t current_count = statement_count; + // If we're trying to create a true for loop, + // we need to make sure that all opcodes before branch statement do not actually emit any code. + // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead. + for (auto &op : block.ops) + emit_instruction(op); + + bool condition_is_temporary = forced_temporaries.find(block.condition) == end(forced_temporaries); + + // This can work! We only did trivial things which could be forwarded in block body! + if (current_count == statement_count && condition_is_temporary) + { + switch (continue_type) + { + case SPIRBlock::ForLoop: + statement("for (", emit_for_loop_initializers(block), "; ", to_expression(block.condition), "; ", + emit_continue_block(block.continue_block), ")"); + break; + + case SPIRBlock::WhileLoop: + statement("while (", to_expression(block.condition), ")"); + break; + + default: + SPIRV_CROSS_THROW("For/while loop detected, but need while/for loop semantics."); + } + + begin_scope(); + return true; + } + else + { + block.disable_block_optimization = true; + force_recompile = true; + begin_scope(); // We'll see an end_scope() later. + return false; + } + } + else if (method == SPIRBlock::MergeToDirectForLoop) + { + auto &child = get(block.next_block); + + // This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header. + flush_undeclared_variables(child); + + uint32_t current_count = statement_count; + + // If we're trying to create a true for loop, + // we need to make sure that all opcodes before branch statement do not actually emit any code. + // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead. + for (auto &op : child.ops) + emit_instruction(op); + + bool condition_is_temporary = forced_temporaries.find(child.condition) == end(forced_temporaries); + + if (current_count == statement_count && condition_is_temporary) + { + propagate_loop_dominators(child); + + switch (continue_type) + { + case SPIRBlock::ForLoop: + statement("for (", emit_for_loop_initializers(block), "; ", to_expression(child.condition), "; ", + emit_continue_block(block.continue_block), ")"); + break; + + case SPIRBlock::WhileLoop: + statement("while (", to_expression(child.condition), ")"); + break; + + default: + SPIRV_CROSS_THROW("For/while loop detected, but need while/for loop semantics."); + } + + begin_scope(); + branch(child.self, child.true_block); + return true; + } + else + { + block.disable_block_optimization = true; + force_recompile = true; + begin_scope(); // We'll see an end_scope() later. + return false; + } + } + else + return false; +} + +void CompilerGLSL::flush_undeclared_variables(SPIRBlock &block) +{ + for (auto &v : block.dominated_variables) + { + auto &var = get(v); + if (var.deferred_declaration) + statement(variable_decl(var), ";"); + var.deferred_declaration = false; + } +} + +void CompilerGLSL::emit_block_chain(SPIRBlock &block) +{ + propagate_loop_dominators(block); + + bool select_branch_to_true_block = false; + bool skip_direct_branch = false; + bool emitted_for_loop_header = false; + + // If we need to force temporaries for certain IDs due to continue blocks, do it before starting loop header. + for (auto &tmp : block.declare_temporary) + { + auto flags = meta[tmp.second].decoration.decoration_flags; + auto &type = get(tmp.first); + statement(flags_to_precision_qualifiers_glsl(type, flags), variable_decl(type, to_name(tmp.second)), ";"); + } + + SPIRBlock::ContinueBlockType continue_type = SPIRBlock::ContinueNone; + if (block.continue_block) + continue_type = continue_block_type(get(block.continue_block)); + + // If we have loop variables, stop masking out access to the variable now. + for (auto var : block.loop_variables) + get(var).loop_variable_enable = true; + + // This is the older loop behavior in glslang which branches to loop body directly from the loop header. + if (block_is_loop_candidate(block, SPIRBlock::MergeToSelectForLoop)) + { + flush_undeclared_variables(block); + if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectForLoop)) + { + // The body of while, is actually just the true block, so always branch there unconditionally. + select_branch_to_true_block = true; + emitted_for_loop_header = true; + } + } + // This is the newer loop behavior in glslang which branches from Loop header directly to + // a new block, which in turn has a OpBranchSelection without a selection merge. + else if (block_is_loop_candidate(block, SPIRBlock::MergeToDirectForLoop)) + { + flush_undeclared_variables(block); + if (attempt_emit_loop_header(block, SPIRBlock::MergeToDirectForLoop)) + { + skip_direct_branch = true; + emitted_for_loop_header = true; + } + } + else if (continue_type == SPIRBlock::DoWhileLoop) + { + statement("do"); + begin_scope(); + for (auto &op : block.ops) + emit_instruction(op); + } + else if (block.merge == SPIRBlock::MergeLoop) + { + flush_undeclared_variables(block); + + // We have a generic loop without any distinguishable pattern like for, while or do while. + get(block.continue_block).complex_continue = true; + continue_type = SPIRBlock::ComplexLoop; + + statement("for (;;)"); + begin_scope(); + for (auto &op : block.ops) + emit_instruction(op); + } + else + { + for (auto &op : block.ops) + emit_instruction(op); + } + + // If we didn't successfully emit a loop header and we had loop variable candidates, we have a problem + // as writes to said loop variables might have been masked out, we need a recompile. + if (!emitted_for_loop_header && !block.loop_variables.empty()) + { + force_recompile = true; + for (auto var : block.loop_variables) + get(var).loop_variable = false; + block.loop_variables.clear(); + } + + flush_undeclared_variables(block); + bool emit_next_block = true; + + // Handle end of block. + switch (block.terminator) + { + case SPIRBlock::Direct: + // True when emitting complex continue block. + if (block.loop_dominator == block.next_block) + { + branch(block.self, block.next_block); + emit_next_block = false; + } + // True if MergeToDirectForLoop succeeded. + else if (skip_direct_branch) + emit_next_block = false; + else if (is_continue(block.next_block) || is_break(block.next_block) || is_conditional(block.next_block)) + { + branch(block.self, block.next_block); + emit_next_block = false; + } + break; + + case SPIRBlock::Select: + // True if MergeToSelectForLoop succeeded. + if (select_branch_to_true_block) + branch(block.self, block.true_block); + else + branch(block.self, block.condition, block.true_block, block.false_block); + break; + + case SPIRBlock::MultiSelect: + { + auto &type = expression_type(block.condition); + bool uint32_t_case = type.basetype == SPIRType::UInt; + + statement("switch (", to_expression(block.condition), ")"); + begin_scope(); + + for (auto &c : block.cases) + { + auto case_value = + uint32_t_case ? convert_to_string(uint32_t(c.value)) : convert_to_string(int32_t(c.value)); + statement("case ", case_value, ":"); + begin_scope(); + branch(block.self, c.block); + end_scope(); + } + + if (block.default_block != block.next_block) + { + statement("default:"); + begin_scope(); + if (is_break(block.default_block)) + SPIRV_CROSS_THROW("Cannot break; out of a switch statement and out of a loop at the same time ..."); + branch(block.self, block.default_block); + end_scope(); + } + else if (flush_phi_required(block.self, block.next_block)) + { + statement("default:"); + begin_scope(); + flush_phi(block.self, block.next_block); + statement("break;"); + end_scope(); + } + + end_scope(); + break; + } + + case SPIRBlock::Return: + if (processing_entry_point) + emit_fixup(); + + if (block.return_value) + { + // OpReturnValue can return Undef, so don't emit anything for this case. + if (ids.at(block.return_value).get_type() != TypeUndef) + statement("return ", to_expression(block.return_value), ";"); + } + // If this block is the very final block and not called from control flow, + // we do not need an explicit return which looks out of place. Just end the function here. + // In the very weird case of for(;;) { return; } executing return is unconditional, + // but we actually need a return here ... + else if (!block_is_outside_flow_control_from_block(get(current_function->entry_block), block) || + block.loop_dominator != SPIRBlock::NoDominator) + statement("return;"); + break; + + case SPIRBlock::Kill: + statement(backend.discard_literal, ";"); + break; + + default: + SPIRV_CROSS_THROW("Unimplemented block terminator."); + } + + if (block.next_block && emit_next_block) + { + // If we hit this case, we're dealing with an unconditional branch, which means we will output + // that block after this. If we had selection merge, we already flushed phi variables. + if (block.merge != SPIRBlock::MergeSelection) + flush_phi(block.self, block.next_block); + emit_block_chain(get(block.next_block)); + } + + if (block.merge == SPIRBlock::MergeLoop) + { + if (continue_type == SPIRBlock::DoWhileLoop) + { + // Make sure that we run the continue block to get the expressions set, but this + // should become an empty string. + // We have no fallbacks if we cannot forward everything to temporaries ... + auto statements = emit_continue_block(block.continue_block); + if (!statements.empty()) + { + // The DoWhile block has side effects, force ComplexLoop pattern next pass. + get(block.continue_block).complex_continue = true; + force_recompile = true; + } + + end_scope_decl(join("while (", to_expression(get(block.continue_block).condition), ")")); + } + else + end_scope(); + + flush_phi(block.self, block.merge_block); + emit_block_chain(get(block.merge_block)); + } +} + +void CompilerGLSL::begin_scope() +{ + statement("{"); + indent++; +} + +void CompilerGLSL::end_scope() +{ + if (!indent) + SPIRV_CROSS_THROW("Popping empty indent stack."); + indent--; + statement("}"); +} + +void CompilerGLSL::end_scope_decl() +{ + if (!indent) + SPIRV_CROSS_THROW("Popping empty indent stack."); + indent--; + statement("};"); +} + +void CompilerGLSL::end_scope_decl(const string &decl) +{ + if (!indent) + SPIRV_CROSS_THROW("Popping empty indent stack."); + indent--; + statement("} ", decl, ";"); +} + +void CompilerGLSL::check_function_call_constraints(const uint32_t *args, uint32_t length) +{ + // If our variable is remapped, and we rely on type-remapping information as + // well, then we cannot pass the variable as a function parameter. + // Fixing this is non-trivial without stamping out variants of the same function, + // so for now warn about this and suggest workarounds instead. + for (uint32_t i = 0; i < length; i++) + { + auto *var = maybe_get(args[i]); + if (!var || !var->remapped_variable) + continue; + + auto &type = get(var->basetype); + if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData) + { + SPIRV_CROSS_THROW("Tried passing a remapped subpassInput variable to a function. " + "This will not work correctly because type-remapping information is lost. " + "To workaround, please consider not passing the subpass input as a function parameter, " + "or use in/out variables instead which do not need type remapping information."); + } + } +} diff --git a/spirv_glsl.hpp b/spirv_glsl.hpp new file mode 100644 index 0000000000..a8824f6b46 --- /dev/null +++ b/spirv_glsl.hpp @@ -0,0 +1,398 @@ +/* + * Copyright 2015-2016 ARM Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SPIRV_CROSS_GLSL_HPP +#define SPIRV_CROSS_GLSL_HPP + +#include "spirv_cross.hpp" +#include +#include +#include +#include + +namespace spirv_cross +{ +enum PlsFormat +{ + PlsNone = 0, + + PlsR11FG11FB10F, + PlsR32F, + PlsRG16F, + PlsRGB10A2, + PlsRGBA8, + PlsRG16, + + PlsRGBA8I, + PlsRG16I, + + PlsRGB10A2UI, + PlsRGBA8UI, + PlsRG16UI, + PlsR32UI +}; + +struct PlsRemap +{ + uint32_t id; + PlsFormat format; +}; + +class CompilerGLSL : public Compiler +{ +public: + struct Options + { + uint32_t version = 450; + bool es = false; + bool force_temporary = false; + + // If true, variables will be moved to their appropriate scope through CFG analysis. + bool cfg_analysis = true; + + // If true, Vulkan GLSL features are used instead of GL-compatible features. + // Mostly useful for debugging SPIR-V files. + bool vulkan_semantics = false; + + enum Precision + { + DontCare, + Lowp, + Mediump, + Highp + }; + + struct + { + // In vertex shaders, rewrite [0, w] depth (Vulkan/D3D style) to [-w, w] depth (GL style). + bool fixup_clipspace = true; + } vertex; + + struct + { + // Add precision mediump float in ES targets when emitting GLES source. + // Add precision highp int in ES targets when emitting GLES source. + Precision default_float_precision = Mediump; + Precision default_int_precision = Highp; + } fragment; + }; + + void remap_pixel_local_storage(std::vector inputs, std::vector outputs) + { + pls_inputs = std::move(inputs); + pls_outputs = std::move(outputs); + remap_pls_variables(); + } + + CompilerGLSL(std::vector spirv_) + : Compiler(move(spirv_)) + { + if (source.known) + { + options.es = source.es; + options.version = source.version; + } + } + + const Options &get_options() const + { + return options; + } + void set_options(Options &opts) + { + options = opts; + } + + std::string compile() override; + + // Returns the current string held in the conversion buffer. Useful for + // capturing what has been converted so far when compile() throws an error. + std::string get_partial_source(); + + // Adds a line to be added right after #version in GLSL backend. + // This is useful for enabling custom extensions which are outside the scope of SPIRV-Cross. + // This can be combined with variable remapping. + // A new-line will be added. + // + // While add_header_line() is a more generic way of adding arbitrary text to the header + // of a GLSL file, require_extension() should be used when adding extensions since it will + // avoid creating collisions with SPIRV-Cross generated extensions. + // + // Code added via add_header_line() is typically backend-specific. + void add_header_line(const std::string &str); + + // Adds an extension which is required to run this shader, e.g. + // require_extension("GL_KHR_my_extension"); + void require_extension(const std::string &ext); + +protected: + void reset(); + void emit_function(SPIRFunction &func, uint64_t return_flags); + + // Virtualize methods which need to be overridden by subclass targets like C++ and such. + virtual void emit_function_prototype(SPIRFunction &func, uint64_t return_flags); + virtual void emit_instruction(const Instruction &instr); + virtual void emit_glsl_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args, + uint32_t count); + virtual void emit_header(); + virtual void emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id); + virtual void emit_texture_op(const Instruction &i); + virtual std::string type_to_glsl(const SPIRType &type); + virtual std::string builtin_to_glsl(spv::BuiltIn builtin); + virtual std::string member_decl(const SPIRType &type, const SPIRType &member_type, uint32_t member); + virtual std::string image_type_glsl(const SPIRType &type); + virtual std::string constant_expression(const SPIRConstant &c); + std::string constant_op_expression(const SPIRConstantOp &cop); + virtual std::string constant_expression_vector(const SPIRConstant &c, uint32_t vector); + virtual void emit_fixup(); + virtual std::string variable_decl(const SPIRType &type, const std::string &name); + virtual std::string to_func_call_arg(uint32_t id); + + std::unique_ptr buffer; + + template + inline void statement_inner(T &&t) + { + (*buffer) << std::forward(t); + statement_count++; + } + + template + inline void statement_inner(T &&t, Ts &&... ts) + { + (*buffer) << std::forward(t); + statement_count++; + statement_inner(std::forward(ts)...); + } + + template + inline void statement(Ts &&... ts) + { + if (redirect_statement) + redirect_statement->push_back(join(std::forward(ts)...)); + else + { + for (uint32_t i = 0; i < indent; i++) + (*buffer) << " "; + + statement_inner(std::forward(ts)...); + (*buffer) << '\n'; + } + } + + template + inline void statement_no_indent(Ts &&... ts) + { + auto old_indent = indent; + indent = 0; + statement(std::forward(ts)...); + indent = old_indent; + } + + // Used for implementing continue blocks where + // we want to obtain a list of statements we can merge + // on a single line separated by comma. + std::vector *redirect_statement = nullptr; + const SPIRBlock *current_continue_block = nullptr; + + void begin_scope(); + void end_scope(); + void end_scope_decl(); + void end_scope_decl(const std::string &decl); + + Options options; + + std::string type_to_array_glsl(const SPIRType &type); + std::string to_array_size(const SPIRType &type, uint32_t index); + uint32_t to_array_size_literal(const SPIRType &type, uint32_t index) const; + std::string variable_decl(const SPIRVariable &variable); + + void add_local_variable_name(uint32_t id); + void add_resource_name(uint32_t id); + void add_member_name(SPIRType &type, uint32_t name); + + bool is_non_native_row_major_matrix(uint32_t id); + bool member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index); + virtual std::string convert_row_major_matrix(std::string exp_str); + + std::unordered_set local_variable_names; + std::unordered_set resource_names; + + bool processing_entry_point = false; + + // Can be overriden by subclass backends for trivial things which + // shouldn't need polymorphism. + struct BackendVariations + { + std::string discard_literal = "discard"; + bool float_literal_suffix = false; + bool double_literal_suffix = true; + bool uint32_t_literal_suffix = true; + bool long_long_literal_suffix = false; + const char *basic_int_type = "int"; + const char *basic_uint_type = "uint"; + bool swizzle_is_function = false; + bool shared_is_implied = false; + bool flexible_member_array_supported = true; + bool explicit_struct_type = false; + bool use_initializer_list = false; + bool native_row_major_matrix = true; + } backend; + + void emit_struct(SPIRType &type); + void emit_resources(); + void emit_buffer_block(const SPIRVariable &type); + void emit_push_constant_block(const SPIRVariable &var); + void emit_push_constant_block_vulkan(const SPIRVariable &var); + void emit_push_constant_block_glsl(const SPIRVariable &var); + void emit_interface_block(const SPIRVariable &type); + void emit_block_chain(SPIRBlock &block); + void emit_specialization_constant(const SPIRConstant &constant); + std::string emit_continue_block(uint32_t continue_block); + bool attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method method); + void emit_uniform(const SPIRVariable &var); + void propagate_loop_dominators(const SPIRBlock &block); + + void branch(uint32_t from, uint32_t to); + void branch(uint32_t from, uint32_t cond, uint32_t true_block, uint32_t false_block); + void flush_phi(uint32_t from, uint32_t to); + bool flush_phi_required(uint32_t from, uint32_t to); + void flush_variable_declaration(uint32_t id); + void flush_undeclared_variables(SPIRBlock &block); + + bool should_forward(uint32_t id); + void emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, uint32_t right, uint32_t lerp); + bool to_trivial_mix_op(const SPIRType &type, std::string &op, uint32_t left, uint32_t right, uint32_t lerp); + void emit_quaternary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2, + uint32_t op3, const char *op); + void emit_trinary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2, + const char *op); + void emit_binary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op); + void emit_binary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op, + SPIRType::BaseType input_type, bool skip_cast_if_equal_type); + void emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op); + void emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op); + void emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op, + SPIRType::BaseType input_type, bool skip_cast_if_equal_type); + + SPIRType binary_op_bitcast_helper(std::string &cast_op0, std::string &cast_op1, SPIRType::BaseType &input_type, + uint32_t op0, uint32_t op1, bool skip_cast_if_equal_type); + + void emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op); + bool expression_is_forwarded(uint32_t id); + SPIRExpression &emit_op(uint32_t result_type, uint32_t result_id, const std::string &rhs, bool forward_rhs, + bool suppress_usage_tracking = false); + std::string access_chain(uint32_t base, const uint32_t *indices, uint32_t count, bool index_is_literal, + bool chain_only = false); + + const char *index_to_swizzle(uint32_t index); + std::string remap_swizzle(uint32_t result_type, uint32_t input_components, uint32_t expr); + std::string declare_temporary(uint32_t type, uint32_t id); + void append_global_func_args(const SPIRFunction &func, uint32_t index, std::vector &arglist); + std::string to_expression(uint32_t id); + std::string to_enclosed_expression(uint32_t id); + void strip_enclosed_expression(std::string &expr); + std::string to_member_name(const SPIRType &type, uint32_t index); + std::string type_to_glsl_constructor(const SPIRType &type); + std::string argument_decl(const SPIRFunction::Parameter &arg); + std::string to_qualifiers_glsl(uint32_t id); + const char *to_precision_qualifiers_glsl(uint32_t id); + const char *flags_to_precision_qualifiers_glsl(const SPIRType &type, uint64_t flags); + const char *format_to_glsl(spv::ImageFormat format); + std::string layout_for_member(const SPIRType &type, uint32_t index); + std::string to_interpolation_qualifiers(uint64_t flags); + uint64_t combined_decoration_for_member(const SPIRType &type, uint32_t index); + std::string layout_for_variable(const SPIRVariable &variable); + std::string to_combined_image_sampler(uint32_t image_id, uint32_t samp_id); + bool skip_argument(uint32_t id) const; + + bool ssbo_is_std430_packing(const SPIRType &type); + uint32_t type_to_std430_base_size(const SPIRType &type); + uint32_t type_to_std430_alignment(const SPIRType &type, uint64_t flags); + uint32_t type_to_std430_array_stride(const SPIRType &type, uint64_t flags); + uint32_t type_to_std430_size(const SPIRType &type, uint64_t flags); + + std::string bitcast_glsl(const SPIRType &result_type, uint32_t arg); + std::string bitcast_glsl_op(const SPIRType &result_type, const SPIRType &argument_type); + std::string build_composite_combiner(const uint32_t *elems, uint32_t length); + bool remove_duplicate_swizzle(std::string &op); + bool remove_unity_swizzle(uint32_t base, std::string &op); + + // Can modify flags to remote readonly/writeonly if image type + // and force recompile. + bool check_atomic_image(uint32_t id); + + void replace_illegal_names(); + + void replace_fragment_output(SPIRVariable &var); + void replace_fragment_outputs(); + std::string legacy_tex_op(const std::string &op, const SPIRType &imgtype); + + uint32_t indent = 0; + + std::unordered_set emitted_functions; + + // Usage tracking. If a temporary is used more than once, use the temporary instead to + // avoid AST explosion when SPIRV is generated with pure SSA and doesn't write stuff to variables. + std::unordered_map expression_usage_counts; + std::unordered_set forced_temporaries; + std::unordered_set forwarded_temporaries; + void track_expression_read(uint32_t id); + + std::unordered_set forced_extensions; + std::vector header_lines; + + uint32_t statement_count; + + inline bool is_legacy() const + { + return (options.es && options.version < 300) || (!options.es && options.version < 130); + } + + inline bool is_legacy_es() const + { + return options.es && options.version < 300; + } + + inline bool is_legacy_desktop() const + { + return !options.es && options.version < 130; + } + + bool args_will_forward(uint32_t id, const uint32_t *args, uint32_t num_args, bool pure); + void register_call_out_argument(uint32_t id); + void register_impure_function_call(); + + // GL_EXT_shader_pixel_local_storage support. + std::vector pls_inputs; + std::vector pls_outputs; + std::string pls_decl(const PlsRemap &variable); + const char *to_pls_qualifiers_glsl(const SPIRVariable &variable); + void emit_pls(); + void remap_pls_variables(); + + void add_variable(std::unordered_set &variables, uint32_t id); + void check_function_call_constraints(const uint32_t *args, uint32_t length); + void handle_invalid_expression(uint32_t id); + void find_static_extensions(); + + std::string emit_for_loop_initializers(const SPIRBlock &block); + + bool optimize_read_modify_write(const std::string &lhs, const std::string &rhs); +}; +} + +#endif diff --git a/spirv_msl.cpp b/spirv_msl.cpp new file mode 100644 index 0000000000..8ab5b3999f --- /dev/null +++ b/spirv_msl.cpp @@ -0,0 +1,2019 @@ +/* + * Copyright 2015-2016 The Brenwill Workshop Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "spirv_msl.hpp" +#include "GLSL.std.450.h" +#include +#include + +using namespace spv; +using namespace spirv_cross; +using namespace std; + +CompilerMSL::CompilerMSL(vector spirv_) + : CompilerGLSL(move(spirv_)) +{ + options.vertex.fixup_clipspace = false; +} + +string CompilerMSL::compile(MSLConfiguration &msl_cfg, vector *p_vtx_attrs, + std::vector *p_res_bindings) +{ + pad_type_ids_by_pad_len.clear(); + + msl_config = msl_cfg; + + vtx_attrs_by_location.clear(); + if (p_vtx_attrs) + for (auto &va : *p_vtx_attrs) + vtx_attrs_by_location[va.location] = &va; + + resource_bindings.clear(); + if (p_res_bindings) + { + resource_bindings.reserve(p_res_bindings->size()); + for (auto &rb : *p_res_bindings) + resource_bindings.push_back(&rb); + } + + extract_builtins(); + localize_global_variables(); + add_interface_structs(); + extract_global_variables_from_functions(); + + // Do not deal with ES-isms like precision, older extensions and such. + options.es = false; + options.version = 120; + backend.float_literal_suffix = false; + backend.uint32_t_literal_suffix = true; + backend.basic_int_type = "int"; + backend.basic_uint_type = "uint"; + backend.discard_literal = "discard_fragment()"; + backend.swizzle_is_function = false; + backend.shared_is_implied = false; + backend.native_row_major_matrix = false; + + uint32_t pass_count = 0; + do + { + if (pass_count >= 3) + SPIRV_CROSS_THROW("Over 3 compilation loops detected. Must be a bug!"); + + reset(); + + next_metal_resource_index = MSLResourceBinding(); // Start bindings at zero + + // Move constructor for this type is broken on GCC 4.9 ... + buffer = unique_ptr(new ostringstream()); + + emit_header(); + emit_resources(); + emit_function_declarations(); + emit_function(get(entry_point), 0); + + pass_count++; + } while (force_recompile); + + return buffer->str(); +} + +string CompilerMSL::compile() +{ + MSLConfiguration default_msl_cfg; + return compile(default_msl_cfg, nullptr, nullptr); +} + +// Adds any builtins used by this shader to the builtin_vars collection +void CompilerMSL::extract_builtins() +{ + builtin_vars.clear(); + + for (auto &id : ids) + { + if (id.get_type() == TypeVariable) + { + auto &var = id.get(); + auto &dec = meta[var.self].decoration; + + if (dec.builtin) + builtin_vars[dec.builtin_type] = var.self; + } + } + + auto &execution = get_entry_point(); + if (execution.model == ExecutionModelVertex) + { + if (!(builtin_vars[BuiltInVertexIndex] || builtin_vars[BuiltInVertexId])) + add_builtin(BuiltInVertexIndex); + + if (!(builtin_vars[BuiltInInstanceIndex] || builtin_vars[BuiltInInstanceId])) + add_builtin(BuiltInInstanceIndex); + } +} + +// Adds an appropriate built-in variable for the specified builtin type. +void CompilerMSL::add_builtin(BuiltIn builtin_type) +{ + + // Add a new typed variable for this interface structure. + uint32_t next_id = increase_bound_by(2); + uint32_t ib_type_id = next_id++; + auto &ib_type = set(ib_type_id); + ib_type.basetype = SPIRType::UInt; + ib_type.storage = StorageClassInput; + + uint32_t ib_var_id = next_id++; + set(ib_var_id, ib_type_id, StorageClassInput, 0); + set_decoration(ib_var_id, DecorationBuiltIn, builtin_type); + set_name(ib_var_id, builtin_to_glsl(builtin_type)); + + builtin_vars[builtin_type] = ib_var_id; +} + +// Move the Private global variables to the entry function. +// Non-constant variables cannot have global scope in Metal. +void CompilerMSL::localize_global_variables() +{ + auto &entry_func = get(entry_point); + auto iter = global_variables.begin(); + while (iter != global_variables.end()) + { + uint32_t gv_id = *iter; + auto &gbl_var = get(gv_id); + if (gbl_var.storage == StorageClassPrivate) + { + entry_func.add_local_variable(gv_id); + iter = global_variables.erase(iter); + } + else + { + iter++; + } + } +} + +// For any global variable accessed directly by a function, +// extract that variable and add it as an argument to that function. +void CompilerMSL::extract_global_variables_from_functions() +{ + + // Uniforms + std::set global_var_ids; + for (auto &id : ids) + { + if (id.get_type() == TypeVariable) + { + auto &var = id.get(); + if (var.storage == StorageClassInput || var.storage == StorageClassUniform || + var.storage == StorageClassUniformConstant || var.storage == StorageClassPushConstant) + global_var_ids.insert(var.self); + } + } + + std::set added_arg_ids; + std::set processed_func_ids; + extract_global_variables_from_function(entry_point, added_arg_ids, global_var_ids, processed_func_ids); +} + +// MSL does not support the use of global variables for shader input content. +// For any global variable accessed directly by the specified function, extract that variable, +// add it as an argument to that function, and the arg to the added_arg_ids collection. +void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::set &added_arg_ids, + std::set &global_var_ids, + std::set &processed_func_ids) +{ + // Avoid processing a function more than once + if (processed_func_ids.find(func_id) != processed_func_ids.end()) + return; + + processed_func_ids.insert(func_id); + + auto &func = get(func_id); + + // Recursively establish global args added to functions on which we depend. + for (auto block : func.blocks) + { + auto &b = get(block); + for (auto &i : b.ops) + { + auto ops = stream(i); + auto op = static_cast(i.op); + + switch (op) + { + case OpLoad: + case OpAccessChain: + { + uint32_t base_id = ops[2]; + if (global_var_ids.find(base_id) != global_var_ids.end()) + added_arg_ids.insert(base_id); + break; + } + case OpFunctionCall: + { + uint32_t inner_func_id = ops[2]; + std::set inner_func_args; + extract_global_variables_from_function(inner_func_id, inner_func_args, global_var_ids, + processed_func_ids); + added_arg_ids.insert(inner_func_args.begin(), inner_func_args.end()); + break; + } + + default: + break; + } + } + } + + // Add the global variables as arguments to the function + if (func_id != entry_point) + { + uint32_t next_id = increase_bound_by(uint32_t(added_arg_ids.size())); + for (uint32_t arg_id : added_arg_ids) + { + uint32_t type_id = get(arg_id).basetype; + func.add_parameter(type_id, next_id); + set(next_id, type_id, StorageClassFunction); + + // Ensure both the existing and new variables have the same name, and the name is valid + string vld_name = ensure_valid_name(to_name(arg_id), "v"); + set_name(arg_id, vld_name); + set_name(next_id, vld_name); + + meta[next_id].decoration.qualified_alias = meta[arg_id].decoration.qualified_alias; + next_id++; + } + } +} + +// Adds any interface structure variables needed by this shader +void CompilerMSL::add_interface_structs() +{ + auto &execution = get_entry_point(); + + stage_in_var_ids.clear(); + qual_pos_var_name = ""; + + uint32_t var_id; + if (execution.model == ExecutionModelVertex && !vtx_attrs_by_location.empty()) + { + std::set vtx_bindings; + bind_vertex_attributes(vtx_bindings); + for (uint32_t vb : vtx_bindings) + { + var_id = add_interface_struct(StorageClassInput, vb); + if (var_id) + stage_in_var_ids.push_back(var_id); + } + } + else + { + var_id = add_interface_struct(StorageClassInput); + if (var_id) + stage_in_var_ids.push_back(var_id); + } + + stage_out_var_id = add_interface_struct(StorageClassOutput); +} + +// Iterate through the variables and populates each input vertex attribute variable +// from the binding info provided during compiler construction, matching by location. +void CompilerMSL::bind_vertex_attributes(std::set &bindings) +{ + auto &execution = get_entry_point(); + + if (execution.model == ExecutionModelVertex) + { + for (auto &id : ids) + { + if (id.get_type() == TypeVariable) + { + auto &var = id.get(); + auto &type = get(var.basetype); + + if (var.storage == StorageClassInput && interface_variable_exists_in_entry_point(var.self) && + !is_hidden_variable(var) && type.pointer) + { + auto &dec = meta[var.self].decoration; + MSLVertexAttr *p_va = vtx_attrs_by_location[dec.location]; + if (p_va) + { + dec.binding = p_va->msl_buffer; + dec.offset = p_va->msl_offset; + dec.array_stride = p_va->msl_stride; + dec.per_instance = p_va->per_instance; + + // Mark the vertex attributes that were used. + p_va->used_by_shader = true; + bindings.insert(p_va->msl_buffer); + } + } + } + } + } +} + +// Add an the interface structure for the type of storage. For vertex inputs, each +// binding must have its own structure, and a structure is created for vtx_binding. +// For non-vertex input, and all outputs, the vtx_binding argument is ignored. +// Returns the ID of the newly added variable, or zero if no variable was added. +uint32_t CompilerMSL::add_interface_struct(StorageClass storage, uint32_t vtx_binding) +{ + auto &execution = get_entry_point(); + bool incl_builtins = (storage == StorageClassOutput); + bool match_binding = (execution.model == ExecutionModelVertex) && (storage == StorageClassInput); + + // Accumulate the variables that should appear in the interface struct + vector vars; + for (auto &id : ids) + { + if (id.get_type() == TypeVariable) + { + auto &var = id.get(); + auto &type = get(var.basetype); + auto &dec = meta[var.self].decoration; + + if (var.storage == storage && interface_variable_exists_in_entry_point(var.self) && + !is_hidden_variable(var, incl_builtins) && (!match_binding || (vtx_binding == dec.binding)) && + type.pointer) + { + vars.push_back(&var); + } + } + } + + if (vars.empty()) + { + return 0; + } // Leave if no variables qualify + + // Add a new typed variable for this interface structure. + // The initializer expression is allocated here, but populated when the function + // declaraion is emitted, because it is cleared after each compilation pass. + uint32_t next_id = increase_bound_by(3); + uint32_t ib_type_id = next_id++; + auto &ib_type = set(ib_type_id); + ib_type.basetype = SPIRType::Struct; + ib_type.storage = storage; + set_decoration(ib_type.self, DecorationBlock); + + uint32_t ib_var_id = next_id++; + auto &var = set(ib_var_id, ib_type_id, storage, 0); + var.initializer = next_id++; + + // Set the binding of the variable and mark if packed (used only with vertex inputs) + auto &var_dec = meta[ib_var_id].decoration; + var_dec.binding = vtx_binding; + + // Track whether this is vertex input that is indexed, as opposed to stage_in + bool is_indxd_vtx_input = (execution.model == ExecutionModelVertex && storage == StorageClassInput && + var_dec.binding != msl_config.vtx_attr_stage_in_binding); + + string ib_var_ref; + + if (storage == StorageClassInput) + { + ib_var_ref = stage_in_var_name; + + // Multiple vertex input bindings are available, so qualify each with the Metal buffer index + if (execution.model == ExecutionModelVertex) + ib_var_ref += convert_to_string(vtx_binding); + } + + if (storage == StorageClassOutput) + { + ib_var_ref = stage_out_var_name; + + // Add the output interface struct as a local variable to the entry function, + // and force the entry function to return the output interface struct from + // any blocks that perform a function return. + auto &entry_func = get(entry_point); + entry_func.add_local_variable(ib_var_id); + for (auto &blk_id : entry_func.blocks) + { + auto &blk = get(blk_id); + if (blk.terminator == SPIRBlock::Return) + blk.return_value = ib_var_id; + } + } + + set_name(ib_type_id, get_entry_point_name() + "_" + ib_var_ref); + set_name(ib_var_id, ib_var_ref); + + size_t struct_size = 0; + bool first_elem = true; + for (auto p_var : vars) + { + // For index-accessed vertex attributes, copy the attribute characteristics to the parent + // structure (all components have same vertex attribute characteristics except offset), + // and add a reference to the vertex index builtin to the parent struct variable name. + if (is_indxd_vtx_input && first_elem) + { + auto &elem_dec = meta[p_var->self].decoration; + var_dec.binding = elem_dec.binding; + var_dec.array_stride = elem_dec.array_stride; + var_dec.per_instance = elem_dec.per_instance; + ib_var_ref += "[" + get_vtx_idx_var_name(var_dec.per_instance) + "]"; + first_elem = false; + } + + auto &type = get(p_var->basetype); + if (type.basetype == SPIRType::Struct) + { + // Flatten the struct members into the interface struct + uint32_t i = 0; + for (auto &member : type.member_types) + { + // If needed, add a padding member to the struct to align to the next member's offset. + uint32_t mbr_offset = get_member_decoration(type.self, i, DecorationOffset); + struct_size = + pad_to_offset(ib_type, is_indxd_vtx_input, (var_dec.offset + mbr_offset), uint32_t(struct_size)); + + // Add a reference to the member to the interface struct. + auto &membertype = get(member); + uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size()); + ib_type.member_types.push_back(membertype.self); + + // Give the member a name, and assign it an offset within the struct. + string mbr_name = ensure_valid_name(to_qualified_member_name(type, i), "m"); + set_member_name(ib_type.self, ib_mbr_idx, mbr_name); + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationOffset, uint32_t(struct_size)); + struct_size = get_declared_struct_size(ib_type); + + // Update the original variable reference to include the structure reference + string qual_var_name = ib_var_ref + "." + mbr_name; + set_member_qualified_name(type.self, i, qual_var_name); + + // Copy the variable location from the original variable to the member + uint32_t locn = get_member_decoration(type.self, i, DecorationLocation); + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); + + // Mark the member as builtin if needed + BuiltIn builtin; + if (is_member_builtin(type, i, &builtin)) + { + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationBuiltIn, builtin); + if (builtin == BuiltInPosition) + qual_pos_var_name = qual_var_name; + } + + i++; + } + } + else + { + // If needed, add a padding member to the struct to align to the next member's offset. + struct_size = pad_to_offset(ib_type, is_indxd_vtx_input, var_dec.offset, uint32_t(struct_size)); + + // Add a reference to the variable type to the interface struct. + uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size()); + ib_type.member_types.push_back(type.self); + + // Give the member a name, and assign it an offset within the struct. + string mbr_name = ensure_valid_name(to_name(p_var->self), "m"); + set_member_name(ib_type.self, ib_mbr_idx, mbr_name); + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationOffset, uint32_t(struct_size)); + struct_size = get_declared_struct_size(ib_type); + + // Update the original variable reference to include the structure reference + string qual_var_name = ib_var_ref + "." + mbr_name; + meta[p_var->self].decoration.qualified_alias = qual_var_name; + + // Copy the variable location from the original variable to the member + auto &dec = meta[p_var->self].decoration; + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, dec.location); + + // Mark the member as builtin if needed + if (is_builtin_variable(*p_var)) + { + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationBuiltIn, dec.builtin_type); + if (dec.builtin_type == BuiltInPosition) + qual_pos_var_name = qual_var_name; + } + } + } + + // Sort the members of the interface structure by their offsets + MemberSorter memberSorter(ib_type, meta[ib_type.self], MemberSorter::Offset); + memberSorter.sort(); + + return ib_var_id; +} + +// Emits the file header info +void CompilerMSL::emit_header() +{ + for (auto &header : header_lines) + statement(header); + + statement("#include "); + statement("#include "); + statement(""); + statement("using namespace metal;"); + statement(""); +} + +void CompilerMSL::emit_resources() +{ + + // Output all basic struct types which are not Block or BufferBlock as these are declared inplace + // when such variables are instantiated. + for (auto &id : ids) + { + if (id.get_type() == TypeType) + { + auto &type = id.get(); + if (type.basetype == SPIRType::Struct && type.array.empty() && !type.pointer && + (meta[type.self].decoration.decoration_flags & + ((1ull << DecorationBlock) | (1ull << DecorationBufferBlock))) == 0) + { + emit_struct(type); + } + } + } + + // Output Uniform buffers and constants + for (auto &id : ids) + { + if (id.get_type() == TypeVariable) + { + auto &var = id.get(); + auto &type = get(var.basetype); + + if (var.storage != StorageClassFunction && type.pointer && + (type.storage == StorageClassUniform || type.storage == StorageClassUniformConstant || + type.storage == StorageClassPushConstant) && + !is_hidden_variable(var) && (meta[type.self].decoration.decoration_flags & + ((1ull << DecorationBlock) | (1ull << DecorationBufferBlock)))) + { + emit_struct(type); + } + } + } + + // Output interface blocks. + for (uint32_t var_id : stage_in_var_ids) + emit_interface_block(var_id); + + emit_interface_block(stage_out_var_id); + + // TODO: Consolidate and output loose uniforms into an input struct +} + +// Override for MSL-specific syntax instructions +void CompilerMSL::emit_instruction(const Instruction &instruction) +{ + +#define BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op) +#define BOP_CAST(op, type) \ + emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode)) +#define UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op) +#define QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op) +#define TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op) +#define BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op) +#define BFOP_CAST(op, type) \ + emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode)) +#define BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op) +#define UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op) + + auto ops = stream(instruction); + auto opcode = static_cast(instruction.op); + + switch (opcode) + { + + // ALU + case OpFMod: + BFOP(fmod); + break; + + // Comparisons + case OpIEqual: + case OpLogicalEqual: + case OpFOrdEqual: + BOP(==); + break; + + case OpINotEqual: + case OpLogicalNotEqual: + case OpFOrdNotEqual: + BOP(!=); + break; + + case OpUGreaterThan: + case OpSGreaterThan: + case OpFOrdGreaterThan: + BOP(>); + break; + + case OpUGreaterThanEqual: + case OpSGreaterThanEqual: + case OpFOrdGreaterThanEqual: + BOP(>=); + break; + + case OpULessThan: + case OpSLessThan: + case OpFOrdLessThan: + BOP(<); + break; + + case OpULessThanEqual: + case OpSLessThanEqual: + case OpFOrdLessThanEqual: + BOP(<=); + break; + + // Derivatives + case OpDPdx: + UFOP(dfdx); + break; + + case OpDPdy: + UFOP(dfdy); + break; + + case OpImageQuerySize: + { + auto &type = expression_type(ops[2]); + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + if (type.basetype == SPIRType::Image) + { + string img_exp = to_expression(ops[2]); + auto &img_type = type.image; + switch (img_type.dim) + { + case Dim1D: + if (img_type.arrayed) + emit_op(result_type, id, join("uint2(", img_exp, ".get_width(), ", img_exp, ".get_array_size())"), + false); + else + emit_op(result_type, id, join(img_exp, ".get_width()"), true); + break; + + case Dim2D: + case DimCube: + if (img_type.arrayed) + emit_op(result_type, id, join("uint3(", img_exp, ".get_width(), ", img_exp, ".get_height(), ", + img_exp, ".get_array_size())"), + false); + else + emit_op(result_type, id, join("uint2(", img_exp, ".get_width(), ", img_exp, ".get_height())"), + false); + break; + + case Dim3D: + emit_op(result_type, id, + join("uint3(", img_exp, ".get_width(), ", img_exp, ".get_height(), ", img_exp, ".get_depth())"), + false); + break; + + default: + break; + } + } + else + SPIRV_CROSS_THROW("Invalid type for OpImageQuerySize."); + break; + } + + default: + CompilerGLSL::emit_instruction(instruction); + break; + } +} + +// Override for MSL-specific extension syntax instructions +void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t count) +{ + GLSLstd450 op = static_cast(eop); + + switch (op) + { + case GLSLstd450Atan2: + emit_binary_func_op(result_type, id, args[0], args[1], "atan2"); + break; + + default: + CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); + break; + } +} + +// Emit a structure declaration for the specified interface variable. +void CompilerMSL::emit_interface_block(uint32_t ib_var_id) +{ + if (ib_var_id) + { + auto &ib_var = get(ib_var_id); + auto &ib_type = get(ib_var.basetype); + emit_struct(ib_type); + } +} + +// Output a declaration statement for each function. +void CompilerMSL::emit_function_declarations() +{ + for (auto &id : ids) + if (id.get_type() == TypeFunction) + { + auto &func = id.get(); + if (func.self != entry_point) + emit_function_prototype(func, true); + } + + statement(""); +} + +void CompilerMSL::emit_function_prototype(SPIRFunction &func, uint64_t) +{ + emit_function_prototype(func, false); +} + +// Emits the declaration signature of the specified function. +// If this is the entry point function, Metal-specific return value and function arguments are added. +void CompilerMSL::emit_function_prototype(SPIRFunction &func, bool is_decl) +{ + local_variable_names = resource_names; + string decl; + + processing_entry_point = (func.self == entry_point); + + auto &type = get(func.return_type); + decl += func_type_decl(type); + decl += " "; + decl += clean_func_name(to_name(func.self)); + + decl += "("; + + if (processing_entry_point) + { + decl += entry_point_args(!func.arguments.empty()); + + // If entry point function has a output interface struct, set its initializer. + // This is done at this late stage because the initialization expression is + // cleared after each compilation pass. + if (stage_out_var_id) + { + auto &so_var = get(stage_out_var_id); + auto &so_type = get(so_var.basetype); + set(so_var.initializer, "{}", so_type.self, true); + } + } + + for (auto &arg : func.arguments) + { + add_local_variable_name(arg.id); + + bool is_uniform_struct = false; + auto *var = maybe_get(arg.id); + if (var) + { + var->parameter = &arg; // Hold a pointer to the parameter so we can invalidate the readonly field if needed. + + // Check if this arg is one of the synthetic uniform args + // created to handle uniform access inside the function + auto &var_type = get(var->basetype); + is_uniform_struct = + ((var_type.basetype == SPIRType::Struct) && + (var_type.storage == StorageClassUniform || var_type.storage == StorageClassUniformConstant || + var_type.storage == StorageClassPushConstant)); + } + + decl += (is_uniform_struct ? "constant " : "thread "); + decl += argument_decl(arg); + + // Manufacture automatic sampler arg for SampledImage texture + auto &arg_type = get(arg.type); + if (arg_type.basetype == SPIRType::SampledImage) + decl += ", thread const sampler& " + to_sampler_expression(arg.id); + + if (&arg != &func.arguments.back()) + decl += ", "; + } + + decl += ")"; + statement(decl, (is_decl ? ";" : "")); +} + +// Emit a texture operation +void CompilerMSL::emit_texture_op(const Instruction &i) +{ + auto ops = stream(i); + auto op = static_cast(i.op); + uint32_t length = i.length; + + if (i.offset + length > spirv.size()) + SPIRV_CROSS_THROW("Compiler::compile() opcode out of range."); + + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t img = ops[2]; + uint32_t coord = ops[3]; + uint32_t comp = 0; + bool gather = false; + bool fetch = false; + const uint32_t *opt = nullptr; + + switch (op) + { + case OpImageSampleDrefImplicitLod: + case OpImageSampleDrefExplicitLod: + opt = &ops[5]; + length -= 5; + break; + + case OpImageSampleProjDrefImplicitLod: + case OpImageSampleProjDrefExplicitLod: + opt = &ops[5]; + length -= 5; + break; + + case OpImageDrefGather: + opt = &ops[5]; + gather = true; + length -= 5; + break; + + case OpImageGather: + comp = ops[4]; + opt = &ops[5]; + gather = true; + length -= 5; + break; + + case OpImageFetch: + fetch = true; + opt = &ops[4]; + length -= 4; + break; + + case OpImageSampleImplicitLod: + case OpImageSampleExplicitLod: + case OpImageSampleProjImplicitLod: + case OpImageSampleProjExplicitLod: + default: + opt = &ops[4]; + length -= 4; + break; + } + + uint32_t bias = 0; + uint32_t lod = 0; + uint32_t grad_x = 0; + uint32_t grad_y = 0; + uint32_t coffset = 0; + uint32_t offset = 0; + uint32_t coffsets = 0; + uint32_t sample = 0; + uint32_t flags = 0; + + if (length) + { + flags = *opt; + opt++; + length--; + } + + auto test = [&](uint32_t &v, uint32_t flag) { + if (length && (flags & flag)) + { + v = *opt++; + length--; + } + }; + + test(bias, ImageOperandsBiasMask); + test(lod, ImageOperandsLodMask); + test(grad_x, ImageOperandsGradMask); + test(grad_y, ImageOperandsGradMask); + test(coffset, ImageOperandsConstOffsetMask); + test(offset, ImageOperandsOffsetMask); + test(coffsets, ImageOperandsConstOffsetsMask); + test(sample, ImageOperandsSampleMask); + + auto &img_type = expression_type(img).image; + + // Texture reference + string expr = to_expression(img); + + // Texture function and sampler + if (fetch) + { + expr += ".read("; + } + else + { + expr += std::string(".") + (gather ? "gather" : "sample") + "(" + to_sampler_expression(img) + ", "; + } + + // Add texture coordinates + bool forward = should_forward(coord); + auto coord_expr = to_enclosed_expression(coord); + string tex_coords = coord_expr; + string array_coord; + + switch (img_type.dim) + { + case spv::DimBuffer: + break; + case Dim1D: + if (img_type.arrayed) + { + tex_coords = coord_expr + ".x"; + array_coord = coord_expr + ".y"; + remove_duplicate_swizzle(tex_coords); + remove_duplicate_swizzle(array_coord); + } + else + { + tex_coords = coord_expr + ".x"; + } + break; + + case Dim2D: + if (msl_config.flip_frag_y) + { + string coord_x = coord_expr + ".x"; + remove_duplicate_swizzle(coord_x); + string coord_y = coord_expr + ".y"; + remove_duplicate_swizzle(coord_y); + tex_coords = "float2(" + coord_x + ", (1.0 - " + coord_y + "))"; + } + else + { + tex_coords = coord_expr + ".xy"; + remove_duplicate_swizzle(tex_coords); + } + + if (img_type.arrayed) + { + array_coord = coord_expr + ".z"; + remove_duplicate_swizzle(array_coord); + } + + break; + + case Dim3D: + case DimCube: + if (msl_config.flip_frag_y) + { + string coord_x = coord_expr + ".x"; + remove_duplicate_swizzle(coord_x); + string coord_y = coord_expr + ".y"; + remove_duplicate_swizzle(coord_y); + string coord_z = coord_expr + ".z"; + remove_duplicate_swizzle(coord_z); + tex_coords = "float3(" + coord_x + ", (1.0 - " + coord_y + "), " + coord_z + ")"; + } + else + { + tex_coords = coord_expr + ".xyz"; + remove_duplicate_swizzle(tex_coords); + } + + if (img_type.arrayed) + { + array_coord = coord_expr + ".w"; + remove_duplicate_swizzle(array_coord); + } + + break; + + default: + break; + } + expr += tex_coords; + + // Add texture array index + if (!array_coord.empty()) + expr += ", " + array_coord; + + // LOD Options + if (bias) + { + forward = forward && should_forward(bias); + expr += ", bias(" + to_expression(bias) + ")"; + } + + if (lod) + { + forward = forward && should_forward(lod); + if (fetch) + { + expr += ", " + to_expression(lod); + } + else + { + expr += ", level(" + to_expression(lod) + ")"; + } + } + + if (grad_x || grad_y) + { + forward = forward && should_forward(grad_x); + forward = forward && should_forward(grad_y); + string grad_opt; + switch (img_type.dim) + { + case Dim2D: + grad_opt = "2d"; + break; + case Dim3D: + grad_opt = "3d"; + break; + case DimCube: + grad_opt = "cube"; + break; + default: + grad_opt = "unsupported_gradient_dimension"; + break; + } + expr += ", gradient" + grad_opt + "(" + to_expression(grad_x) + ", " + to_expression(grad_y) + ")"; + } + + // Add offsets + string offset_expr; + if (coffset) + { + forward = forward && should_forward(coffset); + offset_expr = to_expression(coffset); + } + else if (offset) + { + forward = forward && should_forward(offset); + offset_expr = to_expression(offset); + } + + if (!offset_expr.empty()) + { + switch (img_type.dim) + { + case Dim2D: + if (msl_config.flip_frag_y) + { + string coord_x = offset_expr + ".x"; + remove_duplicate_swizzle(coord_x); + string coord_y = offset_expr + ".y"; + remove_duplicate_swizzle(coord_y); + offset_expr = "float2(" + coord_x + ", (1.0 - " + coord_y + "))"; + } + else + { + offset_expr = offset_expr + ".xy"; + remove_duplicate_swizzle(offset_expr); + } + + expr += ", " + offset_expr; + break; + + case Dim3D: + if (msl_config.flip_frag_y) + { + string coord_x = offset_expr + ".x"; + remove_duplicate_swizzle(coord_x); + string coord_y = offset_expr + ".y"; + remove_duplicate_swizzle(coord_y); + string coord_z = offset_expr + ".z"; + remove_duplicate_swizzle(coord_z); + offset_expr = "float3(" + coord_x + ", (1.0 - " + coord_y + "), " + coord_z + ")"; + } + else + { + offset_expr = offset_expr + ".xyz"; + remove_duplicate_swizzle(offset_expr); + } + + expr += ", " + offset_expr; + break; + + default: + break; + } + } + + if (comp) + { + forward = forward && should_forward(comp); + expr += ", " + to_expression(comp); + } + + expr += ")"; + + emit_op(result_type, id, expr, forward); +} + +// Establish sampled image as expression object and assign the sampler to it. +void CompilerMSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id) +{ + set(result_id, to_expression(image_id), result_type, true); + meta[result_id].sampler = samp_id; +} + +// Returns a string representation of the ID, usable as a function arg. +// Manufacture automatic sampler arg for SampledImage texture. +string CompilerMSL::to_func_call_arg(uint32_t id) +{ + string arg_str = CompilerGLSL::to_func_call_arg(id); + + // Manufacture automatic sampler arg if the arg is a SampledImage texture. + Variant &id_v = ids[id]; + if (id_v.get_type() == TypeVariable) + { + auto &var = id_v.get(); + auto &type = get(var.basetype); + if (type.basetype == SPIRType::SampledImage) + arg_str += ", " + to_sampler_expression(id); + } + + return arg_str; +} + +// If the ID represents a sampled image that has been assigned a sampler already, +// generate an expression for the sampler, otherwise generate a fake sampler name +// by appending a suffix to the expression constructed from the ID. +string CompilerMSL::to_sampler_expression(uint32_t id) +{ + uint32_t samp_id = meta[id].sampler; + return samp_id ? to_expression(samp_id) : to_expression(id) + sampler_name_suffix; +} + +// Called automatically at the end of the entry point function +void CompilerMSL::emit_fixup() +{ + auto &execution = get_entry_point(); + + if ((execution.model == ExecutionModelVertex) && stage_out_var_id && !qual_pos_var_name.empty()) + { + if (options.vertex.fixup_clipspace) + { + const char *suffix = backend.float_literal_suffix ? "f" : ""; + statement(qual_pos_var_name, ".z = 2.0", suffix, " * ", qual_pos_var_name, ".z - ", qual_pos_var_name, + ".w;", " // Adjust clip-space for Metal"); + } + + if (msl_config.flip_vert_y) + statement(qual_pos_var_name, ".y = -(", qual_pos_var_name, ".y);", " // Invert Y-axis for Metal"); + } +} + +// Returns a declaration for a structure member. +string CompilerMSL::member_decl(const SPIRType &type, const SPIRType &membertype, uint32_t index) +{ + return join(type_to_glsl(membertype), " ", to_member_name(type, index), type_to_array_glsl(membertype), + member_attribute_qualifier(type, index)); +} + +// Return a MSL qualifier for the specified function attribute member +string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t index) +{ + auto &execution = get_entry_point(); + + BuiltIn builtin; + bool is_builtin = is_member_builtin(type, index, &builtin); + + // Vertex function inputs + if (execution.model == ExecutionModelVertex && type.storage == StorageClassInput) + { + if (is_builtin) + { + switch (builtin) + { + case BuiltInVertexId: + case BuiltInVertexIndex: + case BuiltInInstanceId: + case BuiltInInstanceIndex: + return string(" [[") + builtin_qualifier(builtin) + "]]"; + + default: + return ""; + } + } + uint32_t locn = get_ordered_member_location(type.self, index); + return string(" [[attribute(") + convert_to_string(locn) + ")]]"; + } + + // Vertex function outputs + if (execution.model == ExecutionModelVertex && type.storage == StorageClassOutput) + { + if (is_builtin) + { + switch (builtin) + { + case BuiltInClipDistance: + return " /* [[clip_distance]] built-in not yet supported under Metal. */"; + + case BuiltInPointSize: // Must output only if really rendering points + return msl_config.is_rendering_points ? (string(" [[") + builtin_qualifier(builtin) + "]]") : ""; + + case BuiltInPosition: + case BuiltInLayer: + return string(" [[") + builtin_qualifier(builtin) + "]]"; + + default: + return ""; + } + } + uint32_t locn = get_ordered_member_location(type.self, index); + return string(" [[user(locn") + convert_to_string(locn) + ")]]"; + } + + // Fragment function inputs + if (execution.model == ExecutionModelFragment && type.storage == StorageClassInput) + { + if (is_builtin) + { + switch (builtin) + { + case BuiltInFrontFacing: + case BuiltInPointCoord: + case BuiltInFragCoord: + case BuiltInSampleId: + case BuiltInSampleMask: + case BuiltInLayer: + return string(" [[") + builtin_qualifier(builtin) + "]]"; + + default: + return ""; + } + } + uint32_t locn = get_ordered_member_location(type.self, index); + return string(" [[user(locn") + convert_to_string(locn) + ")]]"; + } + + // Fragment function outputs + if (execution.model == ExecutionModelFragment && type.storage == StorageClassOutput) + { + if (is_builtin) + { + switch (builtin) + { + case BuiltInSampleMask: + case BuiltInFragDepth: + return string(" [[") + builtin_qualifier(builtin) + "]]"; + + default: + return ""; + } + } + uint32_t locn = get_ordered_member_location(type.self, index); + return string(" [[color(") + convert_to_string(locn) + ")]]"; + } + + return ""; +} + +// Returns the location decoration of the member with the specified index in the specified type. +// If the location of the member has been explicitly set, that location is used. If not, this +// function assumes the members are ordered in their location order, and simply returns the +// index as the location. +uint32_t CompilerMSL::get_ordered_member_location(uint32_t type_id, uint32_t index) +{ + auto &m = meta.at(type_id); + if (index < m.members.size()) + { + auto &dec = m.members[index]; + if (dec.decoration_flags & (1ull << DecorationLocation)) + return dec.location; + } + + return index; +} + +string CompilerMSL::constant_expression(const SPIRConstant &c) +{ + if (!c.subconstants.empty()) + { + // Handles Arrays and structures. + string res = "{"; + for (auto &elem : c.subconstants) + { + res += constant_expression(get(elem)); + if (&elem != &c.subconstants.back()) + res += ", "; + } + res += "}"; + return res; + } + else if (c.columns() == 1) + { + return constant_expression_vector(c, 0); + } + else + { + string res = type_to_glsl(get(c.constant_type)) + "("; + for (uint32_t col = 0; col < c.columns(); col++) + { + res += constant_expression_vector(c, col); + if (col + 1 < c.columns()) + res += ", "; + } + res += ")"; + return res; + } +} + +// Returns the type declaration for a function, including the +// entry type if the current function is the entry point function +string CompilerMSL::func_type_decl(SPIRType &type) +{ + auto &execution = get_entry_point(); + // The regular function return type. If not processing the entry point function, that's all we need + string return_type = type_to_glsl(type); + if (!processing_entry_point) + return return_type; + + // If an outgoing interface block has been defined, override the entry point return type + if (stage_out_var_id) + { + auto &so_var = get(stage_out_var_id); + auto &so_type = get(so_var.basetype); + return_type = type_to_glsl(so_type); + } + + // Prepend a entry type, based on the execution model + string entry_type; + switch (execution.model) + { + case ExecutionModelVertex: + entry_type = "vertex"; + break; + case ExecutionModelFragment: + entry_type = (execution.flags & (1ull << ExecutionModeEarlyFragmentTests)) ? + "fragment [[ early_fragment_tests ]]" : + "fragment"; + break; + case ExecutionModelGLCompute: + case ExecutionModelKernel: + entry_type = "kernel"; + break; + default: + entry_type = "unknown"; + break; + } + + return entry_type + " " + return_type; +} + +// Ensures the function name is not "main", which is illegal in MSL +string CompilerMSL::clean_func_name(string func_name) +{ + static std::string _clean_msl_main_func_name = "mmain"; + return (func_name == "main") ? _clean_msl_main_func_name : func_name; +} + +// Returns a string containing a comma-delimited list of args for the entry point function +string CompilerMSL::entry_point_args(bool append_comma) +{ + auto &execution = get_entry_point(); + string ep_args; + + // Stage-in structures + for (uint32_t var_id : stage_in_var_ids) + { + auto &var = get(var_id); + auto &type = get(var.basetype); + auto &dec = meta[var.self].decoration; + + bool use_stage_in = + (execution.model != ExecutionModelVertex || dec.binding == msl_config.vtx_attr_stage_in_binding); + + if (!ep_args.empty()) + ep_args += ", "; + if (use_stage_in) + ep_args += type_to_glsl(type) + " " + to_name(var.self) + " [[stage_in]]"; + else + ep_args += "device " + type_to_glsl(type) + "* " + to_name(var.self) + " [[buffer(" + + convert_to_string(dec.binding) + ")]]"; + } + + // Uniforms + for (auto &id : ids) + { + if (id.get_type() == TypeVariable) + { + auto &var = id.get(); + auto &type = get(var.basetype); + + if (is_hidden_variable(var, true)) + continue; + + if (var.storage == StorageClassUniform || var.storage == StorageClassUniformConstant || + var.storage == StorageClassPushConstant) + { + switch (type.basetype) + { + case SPIRType::Struct: + if (!ep_args.empty()) + ep_args += ", "; + ep_args += "constant " + type_to_glsl(type) + "& " + to_name(var.self); + ep_args += " [[buffer(" + convert_to_string(get_metal_resource_index(var, type.basetype)) + ")]]"; + break; + case SPIRType::Sampler: + if (!ep_args.empty()) + ep_args += ", "; + ep_args += type_to_glsl(type) + " " + to_name(var.self); + ep_args += " [[sampler(" + convert_to_string(get_metal_resource_index(var, type.basetype)) + ")]]"; + break; + case SPIRType::Image: + if (!ep_args.empty()) + ep_args += ", "; + ep_args += type_to_glsl(type) + " " + to_name(var.self); + ep_args += " [[texture(" + convert_to_string(get_metal_resource_index(var, type.basetype)) + ")]]"; + break; + case SPIRType::SampledImage: + if (!ep_args.empty()) + ep_args += ", "; + ep_args += type_to_glsl(type) + " " + to_name(var.self); + ep_args += + " [[texture(" + convert_to_string(get_metal_resource_index(var, SPIRType::Image)) + ")]]"; + if (type.image.dim != DimBuffer) + { + ep_args += ", sampler " + to_sampler_expression(var.self); + ep_args += + " [[sampler(" + convert_to_string(get_metal_resource_index(var, SPIRType::Sampler)) + ")]]"; + } + break; + default: + break; + } + } + if (var.storage == StorageClassInput && is_builtin_variable(var)) + { + if (!ep_args.empty()) + ep_args += ", "; + BuiltIn bi_type = meta[var.self].decoration.builtin_type; + ep_args += builtin_type_decl(bi_type) + " " + to_expression(var.self); + ep_args += " [[" + builtin_qualifier(bi_type) + "]]"; + } + } + } + + if (!ep_args.empty() && append_comma) + ep_args += ", "; + + return ep_args; +} + +// Returns the Metal index of the resource of the specified type as used by the specified variable. +uint32_t CompilerMSL::get_metal_resource_index(SPIRVariable &var, SPIRType::BaseType basetype) +{ + auto &execution = get_entry_point(); + auto &var_dec = meta[var.self].decoration; + uint32_t var_desc_set = (var.storage == StorageClassPushConstant) ? kPushConstDescSet : var_dec.set; + uint32_t var_binding = (var.storage == StorageClassPushConstant) ? kPushConstBinding : var_dec.binding; + + // If a matching binding has been specified, find and use it + for (auto p_res_bind : resource_bindings) + { + if (p_res_bind->stage == execution.model && p_res_bind->desc_set == var_desc_set && + p_res_bind->binding == var_binding) + { + + p_res_bind->used_by_shader = true; + switch (basetype) + { + case SPIRType::Struct: + return p_res_bind->msl_buffer; + case SPIRType::Image: + return p_res_bind->msl_texture; + case SPIRType::Sampler: + return p_res_bind->msl_sampler; + default: + return 0; + } + } + } + + // If a binding has not been specified, revert to incrementing resource indices + switch (basetype) + { + case SPIRType::Struct: + return next_metal_resource_index.msl_buffer++; + case SPIRType::Image: + return next_metal_resource_index.msl_texture++; + case SPIRType::Sampler: + return next_metal_resource_index.msl_sampler++; + default: + return 0; + } +} + +// Returns the name of the entry point of this shader +string CompilerMSL::get_entry_point_name() +{ + return clean_func_name(to_name(entry_point)); +} + +// Returns the name of either the vertex index or instance index builtin +string CompilerMSL::get_vtx_idx_var_name(bool per_instance) +{ + BuiltIn builtin; + uint32_t var_id; + + // Try modern builtin name first + builtin = per_instance ? BuiltInInstanceIndex : BuiltInVertexIndex; + var_id = builtin_vars[builtin]; + if (var_id) + return to_expression(var_id); + + // Try legacy builtin name second + builtin = per_instance ? BuiltInInstanceId : BuiltInVertexId; + var_id = builtin_vars[builtin]; + if (var_id) + return to_expression(var_id); + + return "missing_vtx_idx_var"; +} + +// If the struct contains indexed vertex input, and the offset is greater than the current +// size of the struct, appends a padding member to the struct, and returns the offset to +// use for the next member, which is the offset provided. Otherwise, no padding is added, +// and the struct size is returned. +uint32_t CompilerMSL::pad_to_offset(SPIRType &struct_type, bool is_indxd_vtx_input, uint32_t offset, + uint32_t struct_size) +{ + if (!(is_indxd_vtx_input && offset > struct_size)) + return struct_size; + + auto &pad_type = get_pad_type(offset - struct_size); + uint32_t mbr_idx = uint32_t(struct_type.member_types.size()); + struct_type.member_types.push_back(pad_type.self); + set_member_name(struct_type.self, mbr_idx, ("pad" + convert_to_string(mbr_idx))); + set_member_decoration(struct_type.self, mbr_idx, DecorationOffset, struct_size); + return offset; +} + +// Returns a char array type suitable for use as a padding member in a packed struct +SPIRType &CompilerMSL::get_pad_type(uint32_t pad_len) +{ + uint32_t pad_type_id = pad_type_ids_by_pad_len[pad_len]; + if (pad_type_id != 0) + return get(pad_type_id); + + pad_type_id = increase_bound_by(1); + auto &ib_type = set(pad_type_id); + ib_type.storage = StorageClassGeneric; + ib_type.basetype = SPIRType::Char; + ib_type.width = 8; + ib_type.array.push_back(pad_len); + ib_type.array_size_literal.push_back(true); + set_decoration(ib_type.self, DecorationArrayStride, pad_len); + + pad_type_ids_by_pad_len[pad_len] = pad_type_id; + return ib_type; +} + +string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) +{ + auto &type = expression_type(arg.id); + bool constref = !type.pointer || arg.write_count == 0; + + auto &var = get(arg.id); + return join(constref ? "const " : "", type_to_glsl(type), "& ", to_name(var.self), type_to_array_glsl(type)); +} + +// If we're currently in the entry point function, and the object +// has a qualified name, use it, otherwise use the standard name. +string CompilerMSL::to_name(uint32_t id, bool allow_alias) +{ + if (current_function && (current_function->self == entry_point)) + { + string qual_name = meta.at(id).decoration.qualified_alias; + if (!qual_name.empty()) + return qual_name; + } + return Compiler::to_name(id, allow_alias); +} + +// Returns a name that combines the name of the struct with the name of the member, except for Builtins +string CompilerMSL::to_qualified_member_name(const SPIRType &type, uint32_t index) +{ + //Start with existing member name + string mbr_name = to_member_name(type, index); + + // Don't qualify Builtin names because they are unique and are treated as such when building expressions + if (is_member_builtin(type, index, nullptr)) + return mbr_name; + + // Strip any underscore prefix from member name + size_t startPos = mbr_name.find_first_not_of("_"); + mbr_name = (startPos != std::string::npos) ? mbr_name.substr(startPos) : ""; + return join(to_name(type.self), "_", mbr_name); +} + +// Ensures that the specified name is permanently usable by prepending a prefix +// if the first chars are _ and a digit, which indicate a transient name. +string CompilerMSL::ensure_valid_name(string name, string pfx) +{ + if (name.size() >= 2 && name[0] == '_' && isdigit(name[1])) + return join(pfx, name); + else + return name; +} + +// Returns an MSL string describing the SPIR-V type +string CompilerMSL::type_to_glsl(const SPIRType &type) +{ + // Ignore the pointer type since GLSL doesn't have pointers. + + switch (type.basetype) + { + case SPIRType::Struct: + // Need OpName lookup here to get a "sensible" name for a struct. + return to_name(type.self); + + case SPIRType::Image: + case SPIRType::SampledImage: + return image_type_glsl(type); + + case SPIRType::Sampler: + // Not really used. + return "sampler"; + + case SPIRType::Void: + return "void"; + + default: + break; + } + + if (is_scalar(type)) // Scalar builtin + { + switch (type.basetype) + { + case SPIRType::Boolean: + return "bool"; + case SPIRType::Char: + return "char"; + case SPIRType::Int: + return (type.width == 16 ? "short" : "int"); + case SPIRType::UInt: + return (type.width == 16 ? "ushort" : "uint"); + case SPIRType::AtomicCounter: + return "atomic_uint"; + case SPIRType::Float: + return (type.width == 16 ? "half" : "float"); + default: + return "unknown_type"; + } + } + else if (is_vector(type)) // Vector builtin + { + switch (type.basetype) + { + case SPIRType::Boolean: + return join("bool", type.vecsize); + case SPIRType::Char: + return join("char", type.vecsize); + ; + case SPIRType::Int: + return join((type.width == 16 ? "short" : "int"), type.vecsize); + case SPIRType::UInt: + return join((type.width == 16 ? "ushort" : "uint"), type.vecsize); + case SPIRType::Float: + return join((type.width == 16 ? "half" : "float"), type.vecsize); + default: + return "unknown_type"; + } + } + else + { + switch (type.basetype) + { + case SPIRType::Boolean: + case SPIRType::Int: + case SPIRType::UInt: + case SPIRType::Float: + return join((type.width == 16 ? "half" : "float"), type.columns, "x", type.vecsize); + default: + return "unknown_type"; + } + } +} + +// Returns an MSL string describing the SPIR-V image type +string CompilerMSL::image_type_glsl(const SPIRType &type) +{ + string img_type_name; + + auto &img_type = type.image; + if (img_type.depth) + { + switch (img_type.dim) + { + case spv::Dim2D: + img_type_name += (img_type.ms ? "depth2d_ms" : (img_type.arrayed ? "depth2d_array" : "depth2d")); + break; + case spv::DimCube: + img_type_name += (img_type.arrayed ? "depthcube_array" : "depthcube"); + break; + default: + img_type_name += "unknown_depth_texture_type"; + break; + } + } + else + { + switch (img_type.dim) + { + case spv::Dim1D: + img_type_name += (img_type.arrayed ? "texture1d_array" : "texture1d"); + break; + case spv::DimBuffer: + case spv::Dim2D: + img_type_name += (img_type.ms ? "texture2d_ms" : (img_type.arrayed ? "texture2d_array" : "texture2d")); + break; + case spv::Dim3D: + img_type_name += "texture3d"; + break; + case spv::DimCube: + img_type_name += (img_type.arrayed ? "texturecube_array" : "texturecube"); + break; + default: + img_type_name += "unknown_texture_type"; + break; + } + } + + // Append the pixel type + auto &img_pix_type = get(img_type.type); + img_type_name += "<" + type_to_glsl(img_pix_type) + ">"; + + return img_type_name; +} + +// Returns an MSL string identifying the name of a SPIR-V builtin +string CompilerMSL::builtin_to_glsl(BuiltIn builtin) +{ + switch (builtin) + { + case BuiltInPosition: + return qual_pos_var_name.empty() ? (stage_out_var_name + ".gl_Position") : qual_pos_var_name; + case BuiltInPointSize: + return (stage_out_var_name + ".gl_PointSize"); + case BuiltInVertexId: + return "gl_VertexID"; + case BuiltInInstanceId: + return "gl_InstanceID"; + case BuiltInVertexIndex: + return "gl_VertexIndex"; + case BuiltInInstanceIndex: + return "gl_InstanceIndex"; + case BuiltInPrimitiveId: + return "gl_PrimitiveID"; + case BuiltInInvocationId: + return "gl_InvocationID"; + case BuiltInLayer: + return "gl_Layer"; + case BuiltInTessLevelOuter: + return "gl_TessLevelOuter"; + case BuiltInTessLevelInner: + return "gl_TessLevelInner"; + case BuiltInTessCoord: + return "gl_TessCoord"; + case BuiltInFragCoord: + return "gl_FragCoord"; + case BuiltInPointCoord: + return "gl_PointCoord"; + case BuiltInFrontFacing: + return "gl_FrontFacing"; + case BuiltInFragDepth: + return "gl_FragDepth"; + case BuiltInNumWorkgroups: + return "gl_NumWorkGroups"; + case BuiltInWorkgroupSize: + return "gl_WorkGroupSize"; + case BuiltInWorkgroupId: + return "gl_WorkGroupID"; + case BuiltInLocalInvocationId: + return "gl_LocalInvocationID"; + case BuiltInGlobalInvocationId: + return "gl_GlobalInvocationID"; + case BuiltInLocalInvocationIndex: + return "gl_LocalInvocationIndex"; + default: + return "gl_???"; + } +} + +// Returns an MSL string attribute qualifer for a SPIR-V builtin +string CompilerMSL::builtin_qualifier(BuiltIn builtin) +{ + auto &execution = get_entry_point(); + + switch (builtin) + { + // Vertex function in + case BuiltInVertexId: + return "vertex_id"; + case BuiltInVertexIndex: + return "vertex_id"; + case BuiltInInstanceId: + return "instance_id"; + case BuiltInInstanceIndex: + return "instance_id"; + + // Vertex function out + case BuiltInClipDistance: + return "clip_distance"; + case BuiltInPointSize: + return "point_size"; + case BuiltInPosition: + return "position"; + case BuiltInLayer: + return "render_target_array_index"; + + // Fragment function in + case BuiltInFrontFacing: + return "front_facing"; + case BuiltInPointCoord: + return "point_coord"; + case BuiltInFragCoord: + return "position"; + case BuiltInSampleId: + return "sample_id"; + case BuiltInSampleMask: + return "sample_mask"; + + // Fragment function out + case BuiltInFragDepth: + { + if (execution.flags & (1ull << ExecutionModeDepthGreater)) + return "depth(greater)"; + + if (execution.flags & (1ull << ExecutionModeDepthLess)) + return "depth(less)"; + + if (execution.flags & (1ull << ExecutionModeDepthUnchanged)) + return "depth(any)"; + } + + default: + return "unsupported-built-in"; + } +} + +// Returns an MSL string type declaration for a SPIR-V builtin +string CompilerMSL::builtin_type_decl(BuiltIn builtin) +{ + switch (builtin) + { + // Vertex function in + case BuiltInVertexId: + return "uint"; + case BuiltInVertexIndex: + return "uint"; + case BuiltInInstanceId: + return "uint"; + case BuiltInInstanceIndex: + return "uint"; + + // Vertex function out + case BuiltInClipDistance: + return "float"; + case BuiltInPointSize: + return "float"; + case BuiltInPosition: + return "float4"; + + // Fragment function in + case BuiltInFrontFacing: + return "bool"; + case BuiltInPointCoord: + return "float2"; + case BuiltInFragCoord: + return "float4"; + case BuiltInSampleId: + return "uint"; + case BuiltInSampleMask: + return "uint"; + + default: + return "unsupported-built-in-type"; + } +} + +// Returns the effective size of a buffer block struct member. +size_t CompilerMSL::get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const +{ + auto &type = get(struct_type.member_types[index]); + auto dec_mask = get_member_decoration_mask(struct_type.self, index); + return get_declared_type_size(type, dec_mask); +} + +// Returns the effective size of a variable type. +size_t CompilerMSL::get_declared_type_size(const SPIRType &type) const +{ + return get_declared_type_size(type, get_decoration_mask(type.self)); +} + +// Returns the effective size of a variable type or member type, +// taking into consideration the specified mask of decorations. +size_t CompilerMSL::get_declared_type_size(const SPIRType &type, uint64_t dec_mask) const +{ + if (type.basetype == SPIRType::Struct) + return get_declared_struct_size(type); + + switch (type.basetype) + { + case SPIRType::Unknown: + case SPIRType::Void: + case SPIRType::AtomicCounter: + case SPIRType::Image: + case SPIRType::SampledImage: + case SPIRType::Sampler: + SPIRV_CROSS_THROW("Querying size of object with opaque size."); + default: + break; + } + + size_t component_size = type.width / 8; + unsigned vecsize = type.vecsize; + unsigned columns = type.columns; + + if (type.array.empty()) + { + // Vectors. + if (columns == 1) + return vecsize * component_size; + else + { + // Per SPIR-V spec, matrices must be tightly packed and aligned up for vec3 accesses. + if ((dec_mask & (1ull << DecorationRowMajor)) && columns == 3) + columns = 4; + else if ((dec_mask & (1ull << DecorationColMajor)) && vecsize == 3) + vecsize = 4; + + return vecsize * columns * component_size; + } + } + else + { + // For arrays, we can use ArrayStride to get an easy check. + // ArrayStride is part of the array type not OpMemberDecorate. + auto &dec = meta[type.self].decoration; + if (dec.decoration_flags & (1ull << DecorationArrayStride)) + return dec.array_stride * to_array_size_literal(type, uint32_t(type.array.size()) - 1); + else + { + SPIRV_CROSS_THROW("Type does not have ArrayStride set."); + } + } +} + +// Sort both type and meta member content based on builtin status (put builtins at end), then by location. +void MemberSorter::sort() +{ + // Create a temporary array of consecutive member indices and sort it base on + // how the members should be reordered, based on builtin and location meta info. + size_t mbr_cnt = type.member_types.size(); + vector mbr_idxs(mbr_cnt); + iota(mbr_idxs.begin(), mbr_idxs.end(), 0); // Fill with consecutive indices + std::sort(mbr_idxs.begin(), mbr_idxs.end(), *this); // Sort member indices based on member locations + + // Move type and meta member info to the order defined by the sorted member indices. + // This is done by creating temporary copies of both member types and meta, and then + // copying back to the original content at the sorted indices. + auto mbr_types_cpy = type.member_types; + auto mbr_meta_cpy = meta.members; + for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++) + { + type.member_types[mbr_idx] = mbr_types_cpy[mbr_idxs[mbr_idx]]; + meta.members[mbr_idx] = mbr_meta_cpy[mbr_idxs[mbr_idx]]; + } +} + +// Sort first by builtin status (put builtins at end), then by location. +bool MemberSorter::operator()(uint32_t mbr_idx1, uint32_t mbr_idx2) +{ + auto &mbr_meta1 = meta.members[mbr_idx1]; + auto &mbr_meta2 = meta.members[mbr_idx2]; + if (mbr_meta1.builtin != mbr_meta2.builtin) + return mbr_meta2.builtin; + else + switch (sort_aspect) + { + case Location: + return mbr_meta1.location < mbr_meta2.location; + case Offset: + return mbr_meta1.offset < mbr_meta2.offset; + default: + return false; + } +} diff --git a/spirv_msl.hpp b/spirv_msl.hpp new file mode 100644 index 0000000000..eadaea2fb2 --- /dev/null +++ b/spirv_msl.hpp @@ -0,0 +1,189 @@ +/* + * Copyright 2015-2016 The Brenwill Workshop Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SPIRV_CROSS_MSL_HPP +#define SPIRV_CROSS_MSL_HPP + +#include "spirv_glsl.hpp" +#include +#include + +namespace spirv_cross +{ + +// Options for compiling to Metal Shading Language +struct MSLConfiguration +{ + uint32_t vtx_attr_stage_in_binding = 0; + bool flip_vert_y = true; + bool flip_frag_y = true; + bool is_rendering_points = false; +}; + +// Defines MSL characteristics of a vertex attribute at a particular location. +// The used_by_shader flag is set to true during compilation of SPIR-V to MSL +// if the shader makes use of this vertex attribute. +struct MSLVertexAttr +{ + uint32_t location = 0; + uint32_t msl_buffer = 0; + uint32_t msl_offset = 0; + uint32_t msl_stride = 0; + bool per_instance = false; + bool used_by_shader = false; +}; + +// Matches the binding index of a MSL resource for a binding within a descriptor set. +// Taken together, the stage, desc_set and binding combine to form a reference to a resource +// descriptor used in a particular shading stage. Generally, only one of the buffer, texture, +// or sampler elements will be populated. The used_by_shader flag is set to true during +// compilation of SPIR-V to MSL if the shader makes use of this vertex attribute. +struct MSLResourceBinding +{ + spv::ExecutionModel stage; + uint32_t desc_set = 0; + uint32_t binding = 0; + + uint32_t msl_buffer = 0; + uint32_t msl_texture = 0; + uint32_t msl_sampler = 0; + + bool used_by_shader = false; +}; + +// Special constant used in a MSLResourceBinding desc_set +// element to indicate the bindings for the push constants. +static const uint32_t kPushConstDescSet = UINT32_MAX; + +// Special constant used in a MSLResourceBinding binding +// element to indicate the bindings for the push constants. +static const uint32_t kPushConstBinding = 0; + +// Decompiles SPIR-V to Metal Shading Language +class CompilerMSL : public CompilerGLSL +{ +public: + // Constructs an instance to compile the SPIR-V code into Metal Shading Language. + CompilerMSL(std::vector spirv); + + // Compiles the SPIR-V code into Metal Shading Language using the specified configuration parameters. + // - msl_cfg indicates some general configuration for directing the compilation. + // - p_vtx_attrs is an optional list of vertex attribute bindings used to match + // vertex content locations to MSL attributes. If vertex attributes are provided, + // the compiler will set the used_by_shader flag to true in any vertex attribute + // actually used by the MSL code. + // - p_res_bindings is a list of resource bindings to indicate the MSL buffer, + // texture or sampler index to use for a particular SPIR-V description set + // and binding. If resource bindings are provided, the compiler will set the + // used_by_shader flag to true in any resource binding actually used by the MSL code. + std::string compile(MSLConfiguration &msl_cfg, std::vector *p_vtx_attrs = nullptr, + std::vector *p_res_bindings = nullptr); + + // Compiles the SPIR-V code into Metal Shading Language using default configuration parameters. + std::string compile() override; + +protected: + void emit_instruction(const Instruction &instr) override; + void emit_glsl_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args, + uint32_t count) override; + void emit_header() override; + void emit_function_prototype(SPIRFunction &func, uint64_t return_flags) override; + void emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id) override; + void emit_texture_op(const Instruction &i) override; + void emit_fixup() override; + std::string type_to_glsl(const SPIRType &type) override; + std::string image_type_glsl(const SPIRType &type) override; + std::string builtin_to_glsl(spv::BuiltIn builtin) override; + std::string member_decl(const SPIRType &type, const SPIRType &member_type, uint32_t member) override; + std::string constant_expression(const SPIRConstant &c) override; + size_t get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const override; + std::string to_func_call_arg(uint32_t id) override; + std::string to_name(uint32_t id, bool allow_alias = true) override; + + void extract_builtins(); + void add_builtin(spv::BuiltIn builtin_type); + void localize_global_variables(); + void extract_global_variables_from_functions(); + void extract_global_variables_from_function(uint32_t func_id, std::set &added_arg_ids, + std::set &global_var_ids, + std::set &processed_func_ids); + void add_interface_structs(); + void bind_vertex_attributes(std::set &bindings); + uint32_t add_interface_struct(spv::StorageClass storage, uint32_t vtx_binding = 0); + void emit_resources(); + void emit_interface_block(uint32_t ib_var_id); + void emit_function_prototype(SPIRFunction &func, bool is_decl); + void emit_function_declarations(); + + std::string func_type_decl(SPIRType &type); + std::string clean_func_name(std::string func_name); + std::string entry_point_args(bool append_comma); + std::string get_entry_point_name(); + std::string to_qualified_member_name(const SPIRType &type, uint32_t index); + std::string ensure_valid_name(std::string name, std::string pfx); + std::string to_sampler_expression(uint32_t id); + std::string builtin_qualifier(spv::BuiltIn builtin); + std::string builtin_type_decl(spv::BuiltIn builtin); + std::string member_attribute_qualifier(const SPIRType &type, uint32_t index); + std::string argument_decl(const SPIRFunction::Parameter &arg); + std::string get_vtx_idx_var_name(bool per_instance); + uint32_t get_metal_resource_index(SPIRVariable &var, SPIRType::BaseType basetype); + uint32_t get_ordered_member_location(uint32_t type_id, uint32_t index); + uint32_t pad_to_offset(SPIRType &struct_type, bool is_indxd_vtx_input, uint32_t offset, uint32_t struct_size); + SPIRType &get_pad_type(uint32_t pad_len); + size_t get_declared_type_size(const SPIRType &type) const; + size_t get_declared_type_size(const SPIRType &type, uint64_t dec_mask) const; + + MSLConfiguration msl_config; + std::unordered_map vtx_attrs_by_location; + std::vector resource_bindings; + std::unordered_map builtin_vars; + MSLResourceBinding next_metal_resource_index; + std::unordered_map pad_type_ids_by_pad_len; + std::vector stage_in_var_ids; + uint32_t stage_out_var_id = 0; + std::string qual_pos_var_name; + std::string stage_in_var_name = "in"; + std::string stage_out_var_name = "out"; + std::string sampler_name_suffix = "Smplr"; +}; + +// Sorts the members of a SPIRType and associated Meta info based on a settable sorting +// aspect, which defines which aspect of the struct members will be used to sort them. +// Regardless of the sorting aspect, built-in members always appear at the end of the struct. +struct MemberSorter +{ + enum SortAspect + { + Location, + Offset, + }; + + void sort(); + bool operator()(uint32_t mbr_idx1, uint32_t mbr_idx2); + MemberSorter(SPIRType &t, Meta &m, SortAspect sa) + : type(t) + , meta(m) + , sort_aspect(sa) + { + } + SPIRType &type; + Meta &meta; + SortAspect sort_aspect; +}; +} + +#endif diff --git a/test_shaders.py b/test_shaders.py new file mode 100755 index 0000000000..daac82a660 --- /dev/null +++ b/test_shaders.py @@ -0,0 +1,245 @@ +#!/usr/bin/env python3 + +import sys +import os +import subprocess +import tempfile +import re +import itertools +import hashlib +import shutil +import argparse + +def parse_stats(stats): + m = re.search('([0-9]+) work registers', stats) + registers = int(m.group(1)) if m else 0 + + m = re.search('([0-9]+) uniform registers', stats) + uniform_regs = int(m.group(1)) if m else 0 + + m_list = re.findall('(-?[0-9]+)\s+(-?[0-9]+)\s+(-?[0-9]+)', stats) + alu_short = float(m_list[1][0]) if m_list else 0 + ls_short = float(m_list[1][1]) if m_list else 0 + tex_short = float(m_list[1][2]) if m_list else 0 + alu_long = float(m_list[2][0]) if m_list else 0 + ls_long = float(m_list[2][1]) if m_list else 0 + tex_long = float(m_list[2][2]) if m_list else 0 + + return (registers, uniform_regs, alu_short, ls_short, tex_short, alu_long, ls_long, tex_long) + +def get_shader_type(shader): + _, ext = os.path.splitext(shader) + if ext == '.vert': + return '--vertex' + elif ext == '.frag': + return '--fragment' + elif ext == '.comp': + return '--compute' + elif ext == '.tesc': + return '--tessellation_control' + elif ext == '.tese': + return '--tessellation_evaluation' + elif ext == '.geom': + return '--geometry' + else: + return '' + +def get_shader_stats(shader): + f, path = tempfile.mkstemp() + + os.close(f) + p = subprocess.Popen(['malisc', get_shader_type(shader), '--core', 'Mali-T760', '-V', shader], stdout = subprocess.PIPE, stderr = subprocess.PIPE) + stdout, stderr = p.communicate() + os.remove(path) + + if p.returncode != 0: + print(stderr.decode('utf-8')) + raise OSError('malisc failed') + p.wait() + + returned = stdout.decode('utf-8') + return parse_stats(returned) + +def validate_shader(shader, vulkan): + if vulkan: + subprocess.check_call(['glslangValidator', '-V', shader]) + else: + subprocess.check_call(['glslangValidator', shader]) + +def cross_compile(shader, vulkan, spirv, eliminate, invalid_spirv): + spirv_f, spirv_path = tempfile.mkstemp() + glsl_f, glsl_path = tempfile.mkstemp(suffix = os.path.basename(shader)) + os.close(spirv_f) + os.close(glsl_f) + + if vulkan or spirv: + vulkan_glsl_f, vulkan_glsl_path = tempfile.mkstemp(suffix = os.path.basename(shader)) + os.close(vulkan_glsl_f) + + if spirv: + subprocess.check_call(['spirv-as', '-o', spirv_path, shader]) + else: + subprocess.check_call(['glslangValidator', '-V', '-o', spirv_path, shader]) + + if not invalid_spirv: + subprocess.check_call(['spirv-val', spirv_path]) + + spirv_cross_path = './spirv-cross' + if eliminate: + subprocess.check_call([spirv_cross_path, '--remove-unused-variables', '--entry', 'main', '--output', glsl_path, spirv_path]) + else: + subprocess.check_call([spirv_cross_path, '--entry', 'main', '--output', glsl_path, spirv_path]) + + # A shader might not be possible to make valid GLSL from, skip validation for this case. + if (not ('nocompat' in glsl_path)) and (not spirv): + validate_shader(glsl_path, False) + + if vulkan or spirv: + if eliminate: + subprocess.check_call([spirv_cross_path, '--remove-unused-variables', '--entry', 'main', '--vulkan-semantics', '--output', vulkan_glsl_path, spirv_path]) + else: + subprocess.check_call([spirv_cross_path, '--entry', 'main', '--vulkan-semantics', '--output', vulkan_glsl_path, spirv_path]) + validate_shader(vulkan_glsl_path, vulkan) + + return (spirv_path, glsl_path, vulkan_glsl_path if vulkan else None) + +def md5_for_file(path): + md5 = hashlib.md5() + with open(path, 'rb') as f: + for chunk in iter(lambda: f.read(8192), b''): + md5.update(chunk) + return md5.digest() + +def make_reference_dir(path): + base = os.path.dirname(path) + if not os.path.exists(base): + os.makedirs(base) + +def reference_path(directory, relpath): + split_paths = os.path.split(directory) + reference_dir = os.path.join(split_paths[0], 'reference/') + reference_dir = os.path.join(reference_dir, split_paths[1]) + return os.path.join(reference_dir, relpath) + +def regression_check(shader, glsl, update, keep): + reference = reference_path(shader[0], shader[1]) + joined_path = os.path.join(shader[0], shader[1]) + print('Reference shader path:', reference) + + if os.path.exists(reference): + if md5_for_file(glsl) != md5_for_file(reference): + if update: + print('Generated GLSL has changed for {}!'.format(reference)) + # If we expect changes, update the reference file. + if os.path.exists(reference): + os.remove(reference) + make_reference_dir(reference) + shutil.move(glsl, reference) + else: + print('Generated GLSL in {} does not match reference {}!'.format(glsl, reference)) + with open(glsl, 'r') as f: + print('') + print('Generated:') + print('======================') + print(f.read()) + print('======================') + print('') + + # Otherwise, fail the test. Keep the shader file around so we can inspect. + if not keep: + os.remove(glsl) + sys.exit(1) + else: + os.remove(glsl) + else: + print('Found new shader {}. Placing GLSL in {}'.format(joined_path, reference)) + make_reference_dir(reference) + shutil.move(glsl, reference) + +def shader_is_vulkan(shader): + return '.vk.' in shader + +def shader_is_desktop(shader): + return '.desktop.' in shader + +def shader_is_eliminate_dead_variables(shader): + return '.noeliminate.' not in shader + +def shader_is_spirv(shader): + return '.asm.' in shader + +def shader_is_invalid_spirv(shader): + return '.invalid.' in shader + +def test_shader(stats, shader, update, keep): + joined_path = os.path.join(shader[0], shader[1]) + vulkan = shader_is_vulkan(shader[1]) + desktop = shader_is_desktop(shader[1]) + eliminate = shader_is_eliminate_dead_variables(shader[1]) + is_spirv = shader_is_spirv(shader[1]) + invalid_spirv = shader_is_invalid_spirv(shader[1]) + + print('Testing shader:', joined_path) + spirv, glsl, vulkan_glsl = cross_compile(joined_path, vulkan, is_spirv, eliminate, invalid_spirv) + + # Only test GLSL stats if we have a shader following GL semantics. + if stats and (not vulkan) and (not is_spirv) and (not desktop): + cross_stats = get_shader_stats(glsl) + + regression_check(shader, glsl, update, keep) + if vulkan_glsl: + regression_check((shader[0], shader[1] + '.vk'), vulkan_glsl, update, keep) + os.remove(spirv) + + if stats and (not vulkan) and (not is_spirv) and (not desktop): + pristine_stats = get_shader_stats(joined_path) + + a = [] + a.append(shader[1]) + for i in pristine_stats: + a.append(str(i)) + for i in cross_stats: + a.append(str(i)) + print(','.join(a), file = stats) + +def test_shaders_helper(stats, shader_dir, update, malisc, keep): + for root, dirs, files in os.walk(os.path.join(shader_dir)): + for i in files: + path = os.path.join(root, i) + relpath = os.path.relpath(path, shader_dir) + test_shader(stats, (shader_dir, relpath), update, keep) + +def test_shaders(shader_dir, update, malisc, keep): + if malisc: + with open('stats.csv', 'w') as stats: + print('Shader,OrigRegs,OrigUniRegs,OrigALUShort,OrigLSShort,OrigTEXShort,OrigALULong,OrigLSLong,OrigTEXLong,CrossRegs,CrossUniRegs,CrossALUShort,CrossLSShort,CrossTEXShort,CrossALULong,CrossLSLong,CrossTEXLong', file = stats) + test_shaders_helper(stats, shader_dir, update, malisc, keep) + else: + test_shaders_helper(None, shader_dir, update, malisc, keep) + +def main(): + parser = argparse.ArgumentParser(description = 'Script for regression testing.') + parser.add_argument('folder', + help = 'Folder containing shader files to test.') + parser.add_argument('--update', + action = 'store_true', + help = 'Updates reference files if there is a mismatch. Use when legitimate changes in output is found.') + parser.add_argument('--keep', + action = 'store_true', + help = 'Leave failed GLSL shaders on disk if they fail regression. Useful for debugging.') + parser.add_argument('--malisc', + action = 'store_true', + help = 'Use malisc offline compiler to determine static cycle counts before and after spirv-cross.') + args = parser.parse_args() + + if not args.folder: + sys.stderr.write('Need shader folder.\n') + sys.exit(1) + + test_shaders(args.folder, args.update, args.malisc, args.keep) + if args.malisc: + print('Stats in stats.csv!') + print('Tests completed!') + +if __name__ == '__main__': + main()